ADD: added other eigen lib
This commit is contained in:
@@ -7,8 +7,8 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ADLOC_FORWARD
|
||||
#define EIGEN_ADLOC_FORWARD
|
||||
#ifndef EIGEN_ADLOC_FORWARD_MODULE_H
|
||||
#define EIGEN_ADLOC_FORWARD_MODULE_H
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
@@ -156,4 +156,4 @@ protected:
|
||||
|
||||
}
|
||||
|
||||
#endif // EIGEN_ADLOC_FORWARD
|
||||
#endif // EIGEN_ADLOC_FORWARD_MODULE_H
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ALIGNED_VECTOR3
|
||||
#define EIGEN_ALIGNED_VECTOR3
|
||||
#ifndef EIGEN_ALIGNED_VECTOR3_MODULE_H
|
||||
#define EIGEN_ALIGNED_VECTOR3_MODULE_H
|
||||
|
||||
#include "../../Eigen/Geometry"
|
||||
|
||||
@@ -37,23 +37,23 @@ namespace Eigen {
|
||||
*
|
||||
*/
|
||||
// TODO specialize Cwise
|
||||
template<typename _Scalar> class AlignedVector3;
|
||||
template<typename Scalar_> class AlignedVector3;
|
||||
|
||||
namespace internal {
|
||||
template<typename _Scalar> struct traits<AlignedVector3<_Scalar> >
|
||||
: traits<Matrix<_Scalar,3,1,0,4,1> >
|
||||
template<typename Scalar_> struct traits<AlignedVector3<Scalar_> >
|
||||
: traits<Matrix<Scalar_,3,1,0,4,1> >
|
||||
{
|
||||
};
|
||||
}
|
||||
|
||||
template<typename _Scalar> class AlignedVector3
|
||||
: public MatrixBase<AlignedVector3<_Scalar> >
|
||||
template<typename Scalar_> class AlignedVector3
|
||||
: public MatrixBase<AlignedVector3<Scalar_> >
|
||||
{
|
||||
typedef Matrix<_Scalar,4,1> CoeffType;
|
||||
typedef Matrix<Scalar_,4,1> CoeffType;
|
||||
CoeffType m_coeffs;
|
||||
public:
|
||||
|
||||
typedef MatrixBase<AlignedVector3<_Scalar> > Base;
|
||||
typedef MatrixBase<AlignedVector3<Scalar_> > Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(AlignedVector3)
|
||||
using Base::operator*;
|
||||
|
||||
@@ -207,10 +207,10 @@ template<typename _Scalar> class AlignedVector3
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename _Scalar>
|
||||
struct eval<AlignedVector3<_Scalar>, Dense>
|
||||
template<typename Scalar_>
|
||||
struct eval<AlignedVector3<Scalar_>, Dense>
|
||||
{
|
||||
typedef const AlignedVector3<_Scalar>& type;
|
||||
typedef const AlignedVector3<Scalar_>& type;
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
@@ -231,4 +231,4 @@ struct evaluator<AlignedVector3<Scalar> >
|
||||
|
||||
#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_ALIGNED_VECTOR3
|
||||
#endif // EIGEN_ALIGNED_VECTOR3_MODULE_H
|
||||
|
||||
@@ -7,8 +7,10 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_AUTODIFF_MODULE
|
||||
#define EIGEN_AUTODIFF_MODULE
|
||||
#ifndef EIGEN_AUTODIFF_MODULE_H
|
||||
#define EIGEN_AUTODIFF_MODULE_H
|
||||
|
||||
#include "../../Eigen/Core"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
@@ -43,4 +45,4 @@ namespace Eigen {
|
||||
//@}
|
||||
}
|
||||
|
||||
#endif // EIGEN_AUTODIFF_MODULE
|
||||
#endif // EIGEN_AUTODIFF_MODULE_H
|
||||
|
||||
@@ -12,6 +12,7 @@ set(Eigen_HEADERS
|
||||
MatrixFunctions
|
||||
MoreVectorization
|
||||
MPRealSupport
|
||||
NNLS
|
||||
NonLinearOptimization
|
||||
NumericalDiff
|
||||
OpenGLSupport
|
||||
|
||||
@@ -8,13 +8,11 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
//#ifndef EIGEN_CXX11_TENSOR_MODULE
|
||||
//#define EIGEN_CXX11_TENSOR_MODULE
|
||||
//#ifndef EIGEN_CXX11_TENSOR_MODULE_H
|
||||
#define EIGEN_CXX11_TENSOR_MODULE_H
|
||||
|
||||
#include "../../../Eigen/Core"
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
|
||||
#include "../SpecialFunctions"
|
||||
|
||||
#include "../../../Eigen/src/Core/util/DisableStupidWarnings.h"
|
||||
@@ -38,6 +36,8 @@
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <iterator>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
||||
@@ -76,6 +76,8 @@
|
||||
#include "src/Tensor/TensorIntDiv.h"
|
||||
#include "src/Tensor/TensorGlobalFunctions.h"
|
||||
|
||||
#include "src/Tensor/TensorIO.h"
|
||||
|
||||
#include "src/Tensor/TensorBase.h"
|
||||
#include "src/Tensor/TensorBlock.h"
|
||||
|
||||
@@ -129,9 +131,8 @@
|
||||
#include "src/Tensor/TensorMap.h"
|
||||
#include "src/Tensor/TensorRef.h"
|
||||
|
||||
#include "src/Tensor/TensorIO.h"
|
||||
|
||||
|
||||
#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_HAS_CXX11
|
||||
//#endif // EIGEN_CXX11_TENSOR_MODULE
|
||||
//#endif // EIGEN_CXX11_TENSOR_MODULE_H
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE
|
||||
#define EIGEN_CXX11_TENSORSYMMETRY_MODULE
|
||||
#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE_H
|
||||
#define EIGEN_CXX11_TENSORSYMMETRY_MODULE_H
|
||||
|
||||
#include "Tensor"
|
||||
|
||||
@@ -35,8 +35,4 @@
|
||||
|
||||
#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE
|
||||
|
||||
/*
|
||||
* kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
|
||||
*/
|
||||
#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE_H
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CXX11_THREADPOOL_MODULE
|
||||
#define EIGEN_CXX11_THREADPOOL_MODULE
|
||||
#ifndef EIGEN_CXX11_THREADPOOL_MODULE_H
|
||||
#define EIGEN_CXX11_THREADPOOL_MODULE_H
|
||||
|
||||
#include "../../../Eigen/Core"
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
|
||||
// The code depends on CXX11, so only include the module if the
|
||||
// compiler supports it.
|
||||
#if (EIGEN_COMP_CXXVER >= 11)
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <time.h>
|
||||
@@ -67,8 +66,6 @@
|
||||
#include "src/ThreadPool/Barrier.h"
|
||||
#include "src/ThreadPool/NonBlockingThreadPool.h"
|
||||
|
||||
#endif
|
||||
|
||||
#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_CXX11_THREADPOOL_MODULE
|
||||
#endif // EIGEN_CXX11_THREADPOOL_MODULE_H
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_MODULE_H
|
||||
#error "Please include unsupported/Eigen/CXX11/Tensor instead of including headers inside the src directory directly."
|
||||
#endif
|
||||
@@ -120,9 +120,7 @@ specified position. The value returned is of the datatype of the tensor.
|
||||
## TensorLayout
|
||||
|
||||
The tensor library supports 2 layouts: `ColMajor` (the default) and
|
||||
`RowMajor`. Only the default column major layout is currently fully
|
||||
supported, and it is therefore not recommended to attempt to use the row major
|
||||
layout at the moment.
|
||||
`RowMajor`.
|
||||
|
||||
The layout of a tensor is optionally specified as part of its type. If not
|
||||
specified explicitly column major is assumed.
|
||||
@@ -888,6 +886,23 @@ containing the natural logarithms of the original tensor.
|
||||
Returns a tensor of the same type and dimensions as the original tensor
|
||||
containing the absolute values of the original tensor.
|
||||
|
||||
### <Operation> arg()
|
||||
|
||||
Returns a tensor with the same dimensions as the original tensor
|
||||
containing the complex argument (phase angle) of the values of the
|
||||
original tensor.
|
||||
|
||||
### <Operation> real()
|
||||
|
||||
Returns a tensor with the same dimensions as the original tensor
|
||||
containing the real part of the complex values of the original tensor.
|
||||
|
||||
### <Operation> imag()
|
||||
|
||||
Returns a tensor with the same dimensions as the orginal tensor
|
||||
containing the imaginary part of the complex values of the original
|
||||
tensor.
|
||||
|
||||
### <Operation> pow(Scalar exponent)
|
||||
|
||||
Returns a tensor of the same type and dimensions as the original tensor
|
||||
@@ -1466,9 +1481,9 @@ the input tensor.
|
||||
Eigen::Tensor<int, 2> a(4, 3);
|
||||
a.setValues({{0, 100, 200}, {300, 400, 500},
|
||||
{600, 700, 800}, {900, 1000, 1100}});
|
||||
Eigen::array<int, 2> offsets = {1, 0};
|
||||
Eigen::array<int, 2> extents = {2, 2};
|
||||
Eigen::Tensor<int, 1> slice = a.slice(offsets, extents);
|
||||
Eigen::array<Eigen::Index, 2> offsets = {1, 0};
|
||||
Eigen::array<Eigen::Index, 2> extents = {2, 2};
|
||||
Eigen::Tensor<int, 2> slice = a.slice(offsets, extents);
|
||||
cout << "a" << endl << a << endl;
|
||||
=>
|
||||
a
|
||||
@@ -1794,6 +1809,45 @@ but you can easily cast the tensors to floats to do the division:
|
||||
|
||||
TODO
|
||||
|
||||
## Tensor Printing
|
||||
Tensors can be printed into a stream object (e.g. `std::cout`) using different formatting options.
|
||||
|
||||
Eigen::Tensor<float, 3> tensor3d = {4, 3, 2};
|
||||
tensor3d.setValues( {{{1, 2}, {3, 4}, {5, 6}}, {{7, 8}, {9, 10}, {11, 12}}, {{13, 14}, {15, 16}, {17, 18}}, {{19, 20}, {21, 22}, {23, 24}}} );
|
||||
std::cout << tensor3d.format(Eigen::TensorIOFormat::Plain()) << std::endl;
|
||||
==>
|
||||
1 2
|
||||
3 4
|
||||
5 6
|
||||
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
|
||||
13 14
|
||||
15 16
|
||||
17 18
|
||||
|
||||
19 20
|
||||
21 22
|
||||
23 24
|
||||
|
||||
|
||||
In the example, we used the predefined format `Eigen::TensorIOFormat::Plain`.
|
||||
Here is the list of all predefined formats from which you can choose:
|
||||
- `Eigen::TensorIOFormat::Plain()` for a plain output without braces. Different submatrices are separated by a blank line.
|
||||
- `Eigen::TensorIOFormat::Numpy()` for numpy-like output.
|
||||
- `Eigen::TensorIOFormat::Native()` for a `c++` like output which can be directly copy-pasted to setValues().
|
||||
- `Eigen::TensorIOFormat::Legacy()` for a backwards compatible printing of tensors.
|
||||
|
||||
If you send the tensor directly to the stream the default format is called which is `Eigen::IOFormats::Plain()`.
|
||||
|
||||
You can define your own format by explicitly providing a `Eigen::TensorIOFormat` class instance. Here, you can specify:
|
||||
- The overall prefix and suffix with `std::string tenPrefix` and `std::string tenSuffix`
|
||||
- The prefix, separator and suffix for each new element, row, matrix, 3d subtensor, ... with `std::vector<std::string> prefix`, `std::vector<std::string> separator` and `std::vector<std::string> suffix`. Note that the first entry in each of the vectors refer to the last dimension of the tensor, e.g. `separator[0]` will be printed between adjacent elements, `separator[1]` will be printed between adjacent matrices, ...
|
||||
- `char fill`: character which will be placed if the elements are aligned.
|
||||
- `int precision`
|
||||
- `int flags`: an OR-ed combination of flags, the default value is 0, the only currently available flag is `Eigen::DontAlignCols` which allows to disable the alignment of columns, resulting in faster code.
|
||||
|
||||
## Representation of scalar values
|
||||
|
||||
@@ -1808,8 +1862,3 @@ product of 2 1d tensors (through contractions) returns a 0d tensor.
|
||||
* The IndexList class requires a cxx11 compliant compiler. You can use an
|
||||
array of indices instead if you don't have access to a modern compiler.
|
||||
* On GPUs only floating point values are properly tested and optimized for.
|
||||
* Complex and integer values are known to be broken on GPUs. If you try to use
|
||||
them you'll most likely end up triggering a static assertion failure such as
|
||||
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Tensor
|
||||
@@ -42,7 +44,8 @@ namespace Eigen {
|
||||
* \endcode
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN.
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN,
|
||||
* \c EIGEN_TENSORBASE_PLUGIN, and \c EIGEN_READONLY_TENSORBASE_PLUGIN.
|
||||
*
|
||||
* <i><b>Some notes:</b></i>
|
||||
*
|
||||
@@ -73,27 +76,25 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
|
||||
enum {
|
||||
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign),
|
||||
Layout = Options_ & RowMajor ? RowMajor : ColMajor,
|
||||
IsAligned = (EIGEN_MAX_ALIGN_BYTES>0) && !(Options_&DontAlign),
|
||||
CoordAccess = true,
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
static const int Options = Options_;
|
||||
static const int NumIndices = NumIndices_;
|
||||
static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
|
||||
static constexpr int Options = Options_;
|
||||
static constexpr int NumIndices = NumIndices_;
|
||||
typedef DSizes<Index, NumIndices_> Dimensions;
|
||||
|
||||
protected:
|
||||
TensorStorage<Scalar, Dimensions, Options> m_storage;
|
||||
|
||||
#ifdef EIGEN_HAS_SFINAE
|
||||
template<typename CustomIndices>
|
||||
struct isOfNormalIndex{
|
||||
static const bool is_array = internal::is_base_of<array<Index, NumIndices>, CustomIndices>::value;
|
||||
static const bool is_int = NumTraits<CustomIndices>::IsInteger;
|
||||
static const bool value = is_array | is_int;
|
||||
};
|
||||
#endif
|
||||
|
||||
public:
|
||||
// Metadata
|
||||
@@ -110,7 +111,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
inline Self& base() { return *this; }
|
||||
inline const Self& base() const { return *this; }
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
|
||||
{
|
||||
@@ -118,7 +118,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
|
||||
}
|
||||
#endif
|
||||
|
||||
// normal indices
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const
|
||||
@@ -128,7 +127,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
}
|
||||
|
||||
// custom indices
|
||||
#ifdef EIGEN_HAS_SFINAE
|
||||
template<typename CustomIndices,
|
||||
EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
|
||||
>
|
||||
@@ -136,7 +134,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
{
|
||||
return coeff(internal::customIndices2Array<Index,NumIndices>(indices));
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const
|
||||
{
|
||||
@@ -150,7 +147,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes>
|
||||
inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
|
||||
{
|
||||
@@ -158,7 +154,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
|
||||
}
|
||||
#endif
|
||||
|
||||
// normal indices
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
|
||||
@@ -168,7 +163,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
}
|
||||
|
||||
// custom indices
|
||||
#ifdef EIGEN_HAS_SFINAE
|
||||
template<typename CustomIndices,
|
||||
EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
|
||||
>
|
||||
@@ -176,7 +170,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
{
|
||||
return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices));
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef()
|
||||
{
|
||||
@@ -190,7 +183,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes>
|
||||
inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
|
||||
{
|
||||
@@ -198,31 +190,8 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
|
||||
{
|
||||
return coeff(array<Index, 2>(i0, i1));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
|
||||
{
|
||||
return coeff(array<Index, 3>(i0, i1, i2));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
|
||||
{
|
||||
return coeff(array<Index, 4>(i0, i1, i2, i3));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
|
||||
{
|
||||
return coeff(array<Index, 5>(i0, i1, i2, i3, i4));
|
||||
}
|
||||
#endif
|
||||
|
||||
// custom indices
|
||||
#ifdef EIGEN_HAS_SFINAE
|
||||
template<typename CustomIndices,
|
||||
EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
|
||||
>
|
||||
@@ -230,7 +199,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
{
|
||||
return coeff(internal::customIndices2Array<Index,NumIndices>(indices));
|
||||
}
|
||||
#endif
|
||||
|
||||
// normal indices
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
|
||||
@@ -257,7 +225,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
return coeff(index);
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes>
|
||||
inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
|
||||
{
|
||||
@@ -265,28 +232,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
|
||||
{
|
||||
return coeffRef(array<Index, 2>(i0, i1));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
|
||||
{
|
||||
return coeffRef(array<Index, 3>(i0, i1, i2));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
|
||||
{
|
||||
return coeffRef(array<Index, 4>(i0, i1, i2, i3));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
|
||||
{
|
||||
return coeffRef(array<Index, 5>(i0, i1, i2, i3, i4));
|
||||
}
|
||||
#endif
|
||||
|
||||
// normal indices
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
|
||||
@@ -295,7 +240,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
}
|
||||
|
||||
// custom indices
|
||||
#ifdef EIGEN_HAS_SFINAE
|
||||
template<typename CustomIndices,
|
||||
EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
|
||||
>
|
||||
@@ -303,7 +247,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
{
|
||||
return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices));
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index)
|
||||
{
|
||||
@@ -332,11 +275,10 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Tensor(const Self& other)
|
||||
: m_storage(other.m_storage)
|
||||
: Base(other), m_storage(other.m_storage)
|
||||
{
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions)
|
||||
: m_storage(firstDimension, otherDimensions...)
|
||||
@@ -344,33 +286,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
// The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1)
|
||||
: m_storage(dim1, array<Index, 1>(dim1))
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2)
|
||||
: m_storage(dim1*dim2, array<Index, 2>(dim1, dim2))
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3)
|
||||
: m_storage(dim1*dim2*dim3, array<Index, 3>(dim1, dim2, dim3))
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4)
|
||||
: m_storage(dim1*dim2*dim3*dim4, array<Index, 4>(dim1, dim2, dim3, dim4))
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5)
|
||||
: m_storage(dim1*dim2*dim3*dim4*dim5, array<Index, 5>(dim1, dim2, dim3, dim4, dim5))
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Normal Dimension */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array<Index, NumIndices>& dimensions)
|
||||
@@ -399,7 +314,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Tensor(Self&& other)
|
||||
: m_storage(std::move(other.m_storage))
|
||||
@@ -411,7 +325,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
m_storage = std::move(other.m_storage);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other)
|
||||
@@ -433,7 +346,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
return *this;
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
|
||||
void resize(Index firstDimension, IndexTypes... otherDimensions)
|
||||
{
|
||||
@@ -441,7 +353,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
resize(array<Index, NumIndices>{{firstDimension, otherDimensions...}});
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Normal Dimension */
|
||||
EIGEN_DEVICE_FUNC void resize(const array<Index, NumIndices>& dimensions)
|
||||
@@ -477,7 +388,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
// Nothing to do: rank 0 tensors have fixed size
|
||||
}
|
||||
|
||||
#ifdef EIGEN_HAS_INDEX_LIST
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(const Eigen::IndexList<FirstType, OtherTypes...>& dimensions) {
|
||||
@@ -487,10 +397,8 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
}
|
||||
resize(dims);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Custom Dimension */
|
||||
#ifdef EIGEN_HAS_SFINAE
|
||||
template<typename CustomDimension,
|
||||
EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomDimension>::value) )
|
||||
>
|
||||
@@ -498,7 +406,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
{
|
||||
resize(internal::customIndices2Array<Index,NumIndices>(dimensions));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_EMULATE_CXX11_META_H
|
||||
template <typename std::ptrdiff_t... Indices>
|
||||
@@ -522,6 +429,10 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_TENSOR_PLUGIN
|
||||
#include EIGEN_TENSOR_PLUGIN
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
bool checkIndexRange(const array<Index, NumIndices>& indices) const
|
||||
|
||||
@@ -11,60 +11,62 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/** \class TensorIndexTuple
|
||||
/** \class TensorIndexPair
|
||||
* \ingroup CXX11_Tensor_Module
|
||||
*
|
||||
* \brief Tensor + Index Tuple class.
|
||||
* \brief Tensor + Index Pair class.
|
||||
*
|
||||
*
|
||||
*/
|
||||
template<typename XprType>
|
||||
struct traits<TensorIndexTupleOp<XprType> > : public traits<XprType>
|
||||
struct traits<TensorIndexPairOp<XprType> > : public traits<XprType>
|
||||
{
|
||||
typedef traits<XprType> XprTraits;
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef Tuple<Index, typename XprTraits::Scalar> Scalar;
|
||||
typedef Pair<Index, typename XprTraits::Scalar> Scalar;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
};
|
||||
|
||||
template<typename XprType>
|
||||
struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense>
|
||||
struct eval<TensorIndexPairOp<XprType>, Eigen::Dense>
|
||||
{
|
||||
typedef const TensorIndexTupleOp<XprType>EIGEN_DEVICE_REF type;
|
||||
typedef const TensorIndexPairOp<XprType>EIGEN_DEVICE_REF type;
|
||||
};
|
||||
|
||||
template<typename XprType>
|
||||
struct nested<TensorIndexTupleOp<XprType>, 1,
|
||||
typename eval<TensorIndexTupleOp<XprType> >::type>
|
||||
struct nested<TensorIndexPairOp<XprType>, 1,
|
||||
typename eval<TensorIndexPairOp<XprType> >::type>
|
||||
{
|
||||
typedef TensorIndexTupleOp<XprType> type;
|
||||
typedef TensorIndexPairOp<XprType> type;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<typename XprType>
|
||||
class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOnlyAccessors>
|
||||
class TensorIndexPairOp : public TensorBase<TensorIndexPairOp<XprType>, ReadOnlyAccessors>
|
||||
{
|
||||
public:
|
||||
typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Scalar Scalar;
|
||||
typedef typename Eigen::internal::traits<TensorIndexPairOp>::Scalar Scalar;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename Eigen::internal::nested<TensorIndexTupleOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorIndexTupleOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Index Index;
|
||||
typedef Tuple<Index, typename XprType::CoeffReturnType> CoeffReturnType;
|
||||
typedef typename Eigen::internal::nested<TensorIndexPairOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorIndexPairOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorIndexPairOp>::Index Index;
|
||||
typedef Pair<Index, typename XprType::CoeffReturnType> CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexPairOp(const XprType& expr)
|
||||
: m_xpr(expr) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -73,15 +75,15 @@ class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOn
|
||||
|
||||
// Eval as rvalue
|
||||
template<typename ArgType, typename Device>
|
||||
struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
|
||||
struct TensorEvaluator<const TensorIndexPairOp<ArgType>, Device>
|
||||
{
|
||||
typedef TensorIndexTupleOp<ArgType> XprType;
|
||||
typedef TensorIndexPairOp<ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||
static const int NumDims = internal::array_size<Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<Dimensions>::value;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
@@ -90,10 +92,10 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
|
||||
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
@@ -138,59 +140,59 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \class TensorTupleIndex
|
||||
/** \class TensorPairIndex
|
||||
* \ingroup CXX11_Tensor_Module
|
||||
*
|
||||
* \brief Converts to Tensor<Tuple<Index, Scalar> > and reduces to Tensor<Index>.
|
||||
* \brief Converts to Tensor<Pair<Index, Scalar> > and reduces to Tensor<Index>.
|
||||
*
|
||||
*/
|
||||
template<typename ReduceOp, typename Dims, typename XprType>
|
||||
struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType>
|
||||
struct traits<TensorPairReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType>
|
||||
{
|
||||
typedef traits<XprType> XprTraits;
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef Index Scalar;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
};
|
||||
|
||||
template<typename ReduceOp, typename Dims, typename XprType>
|
||||
struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense>
|
||||
struct eval<TensorPairReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense>
|
||||
{
|
||||
typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>EIGEN_DEVICE_REF type;
|
||||
typedef const TensorPairReducerOp<ReduceOp, Dims, XprType>EIGEN_DEVICE_REF type;
|
||||
};
|
||||
|
||||
template<typename ReduceOp, typename Dims, typename XprType>
|
||||
struct nested<TensorTupleReducerOp<ReduceOp, Dims, XprType>, 1,
|
||||
typename eval<TensorTupleReducerOp<ReduceOp, Dims, XprType> >::type>
|
||||
struct nested<TensorPairReducerOp<ReduceOp, Dims, XprType>, 1,
|
||||
typename eval<TensorPairReducerOp<ReduceOp, Dims, XprType> >::type>
|
||||
{
|
||||
typedef TensorTupleReducerOp<ReduceOp, Dims, XprType> type;
|
||||
typedef TensorPairReducerOp<ReduceOp, Dims, XprType> type;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<typename ReduceOp, typename Dims, typename XprType>
|
||||
class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors>
|
||||
class TensorPairReducerOp : public TensorBase<TensorPairReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors>
|
||||
{
|
||||
public:
|
||||
typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Scalar Scalar;
|
||||
typedef typename Eigen::internal::traits<TensorPairReducerOp>::Scalar Scalar;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename Eigen::internal::nested<TensorTupleReducerOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorTupleReducerOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Index Index;
|
||||
typedef typename Eigen::internal::nested<TensorPairReducerOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorPairReducerOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorPairReducerOp>::Index Index;
|
||||
typedef Index CoeffReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPairReducerOp(const XprType& expr,
|
||||
const ReduceOp& reduce_op,
|
||||
const Index return_dim,
|
||||
const Dims& reduce_dims)
|
||||
: m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -211,38 +213,37 @@ class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Di
|
||||
|
||||
// Eval as rvalue
|
||||
template<typename ReduceOp, typename Dims, typename ArgType, typename Device>
|
||||
struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Device>
|
||||
struct TensorEvaluator<const TensorPairReducerOp<ReduceOp, Dims, ArgType>, Device>
|
||||
{
|
||||
typedef TensorTupleReducerOp<ReduceOp, Dims, ArgType> XprType;
|
||||
typedef TensorPairReducerOp<ReduceOp, Dims, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename TensorIndexTupleOp<ArgType>::CoeffReturnType TupleType;
|
||||
typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Dimensions Dimensions;
|
||||
typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions;
|
||||
static const int NumDims = internal::array_size<InputDimensions>::value;
|
||||
typedef typename TensorIndexPairOp<ArgType>::CoeffReturnType PairType;
|
||||
typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexPairOp<ArgType> >, Device>::Dimensions Dimensions;
|
||||
typedef typename TensorEvaluator<const TensorIndexPairOp<ArgType> , Device>::Dimensions InputDimensions;
|
||||
static constexpr int NumDims = internal::array_size<InputDimensions>::value;
|
||||
typedef array<Index, NumDims> StrideDims;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
typedef StorageMemory<TupleType, Device> TupleStorageMem;
|
||||
typedef StorageMemory<PairType, Device> PairStorageMem;
|
||||
|
||||
enum {
|
||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
||||
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexPairOp<ArgType>>, Device>::Layout;
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
: m_orig_impl(op.expression(), device),
|
||||
m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device),
|
||||
m_impl(op.expression().index_pairs().reduce(op.reduce_dims(), op.reduce_op()), device),
|
||||
m_return_dim(op.return_dim())
|
||||
{
|
||||
gen_strides(m_orig_impl.dimensions(), m_strides);
|
||||
@@ -272,7 +273,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||
const TupleType v = m_impl.coeff(index);
|
||||
const PairType v = m_impl.coeff(index);
|
||||
return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div;
|
||||
}
|
||||
|
||||
@@ -316,8 +317,8 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
|
||||
}
|
||||
|
||||
protected:
|
||||
TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> m_orig_impl;
|
||||
TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device> m_impl;
|
||||
TensorEvaluator<const TensorIndexPairOp<ArgType>, Device> m_orig_impl;
|
||||
TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexPairOp<ArgType> >, Device> m_impl;
|
||||
const Index m_return_dim;
|
||||
StrideDims m_strides;
|
||||
Index m_stride_mod;
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorAssign
|
||||
@@ -30,10 +32,10 @@ struct traits<TensorAssignOp<LhsXprType, RhsXprType> >
|
||||
typename traits<RhsXprType>::Index>::type Index;
|
||||
typedef typename LhsXprType::Nested LhsNested;
|
||||
typedef typename RhsXprType::Nested RhsNested;
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
static const std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions;
|
||||
static const int Layout = internal::traits<LhsXprType>::Layout;
|
||||
typedef std::remove_reference_t<LhsNested> LhsNested_;
|
||||
typedef std::remove_reference_t<RhsNested> RhsNested_;
|
||||
static constexpr std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions;
|
||||
static constexpr int Layout = internal::traits<LhsXprType>::Layout;
|
||||
typedef typename traits<LhsXprType>::PointerType PointerType;
|
||||
|
||||
enum {
|
||||
@@ -68,23 +70,23 @@ class TensorAssignOp : public TensorBase<TensorAssignOp<LhsXprType, RhsXprType>
|
||||
typedef typename Eigen::internal::traits<TensorAssignOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorAssignOp>::Index Index;
|
||||
|
||||
static const int NumDims = Eigen::internal::traits<TensorAssignOp>::NumDimensions;
|
||||
static constexpr int NumDims = Eigen::internal::traits<TensorAssignOp>::NumDimensions;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs)
|
||||
: m_lhs_xpr(lhs), m_rhs_xpr(rhs) {}
|
||||
|
||||
/** \returns the nested expressions */
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::remove_all<typename LhsXprType::Nested>::type&
|
||||
lhsExpression() const { return *((typename internal::remove_all<typename LhsXprType::Nested>::type*)&m_lhs_xpr); }
|
||||
internal::remove_all_t<typename LhsXprType::Nested>&
|
||||
lhsExpression() const { return *((internal::remove_all_t<typename LhsXprType::Nested>*)&m_lhs_xpr); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename RhsXprType::Nested>::type&
|
||||
const internal::remove_all_t<typename RhsXprType::Nested>&
|
||||
rhsExpression() const { return m_rhs_xpr; }
|
||||
|
||||
protected:
|
||||
typename internal::remove_all<typename LhsXprType::Nested>::type& m_lhs_xpr;
|
||||
const typename internal::remove_all<typename RhsXprType::Nested>::type& m_rhs_xpr;
|
||||
internal::remove_all_t<typename LhsXprType::Nested>& m_lhs_xpr;
|
||||
const internal::remove_all_t<typename RhsXprType::Nested>& m_rhs_xpr;
|
||||
};
|
||||
|
||||
|
||||
@@ -100,8 +102,9 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static const int NumDims = XprType::NumDims;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int NumDims = XprType::NumDims;
|
||||
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
|
||||
|
||||
enum {
|
||||
IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
|
||||
@@ -112,7 +115,6 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
||||
int(TensorEvaluator<RightArgType, Device>::BlockAccess),
|
||||
PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
|
||||
int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess
|
||||
};
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
|
||||
// clang-format off
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorBase
|
||||
@@ -32,8 +34,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
typedef internal::traits<Derived> DerivedTraits;
|
||||
typedef typename DerivedTraits::Scalar Scalar;
|
||||
typedef typename DerivedTraits::Index Index;
|
||||
typedef typename internal::remove_const<Scalar>::type CoeffReturnType;
|
||||
static const int NumDimensions = DerivedTraits::NumDimensions;
|
||||
typedef std::remove_const_t<Scalar> CoeffReturnType;
|
||||
static constexpr int NumDimensions = DerivedTraits::NumDimensions;
|
||||
|
||||
// Generic nullary operation support.
|
||||
template <typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
@@ -309,6 +311,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return unaryExpr(internal::scalar_abs_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_arg_op<Scalar>, const Derived>
|
||||
arg() const {
|
||||
return unaryExpr(internal::scalar_arg_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_clamp_op<Scalar>, const Derived>
|
||||
clip(Scalar min, Scalar max) const {
|
||||
@@ -316,17 +324,19 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const typename internal::conditional<NumTraits<CoeffReturnType>::IsComplex,
|
||||
TensorCwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
|
||||
Derived>::type
|
||||
EIGEN_STRONG_INLINE const std::conditional_t<NumTraits<CoeffReturnType>::IsComplex,
|
||||
TensorCwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
|
||||
Derived>
|
||||
conjugate() const {
|
||||
return choose(Cond<NumTraits<CoeffReturnType>::IsComplex>(), unaryExpr(internal::scalar_conjugate_op<Scalar>()), derived());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >, const Derived>
|
||||
pow(Scalar exponent) const {
|
||||
return unaryExpr(internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >(exponent));
|
||||
template<typename ScalarExponent>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::enable_if_t<internal::is_arithmetic<typename NumTraits<ScalarExponent>::Real>::value,
|
||||
TensorCwiseUnaryOp<internal::scalar_unary_pow_op<Scalar, ScalarExponent>, const Derived>>
|
||||
pow(ScalarExponent exponent) const
|
||||
{
|
||||
return unaryExpr(internal::scalar_unary_pow_op<Scalar, ScalarExponent>(exponent));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -417,9 +427,9 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
|
||||
template<typename NewType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const typename internal::conditional<internal::is_same<NewType, CoeffReturnType>::value,
|
||||
Derived,
|
||||
TensorConversionOp<NewType, const Derived> >::type
|
||||
EIGEN_STRONG_INLINE const std::conditional_t<internal::is_same<NewType, CoeffReturnType>::value,
|
||||
Derived,
|
||||
TensorConversionOp<NewType, const Derived> >
|
||||
cast() const {
|
||||
return choose(Cond<internal::is_same<NewType, CoeffReturnType>::value>(), derived(), TensorConversionOp<NewType, const Derived>(derived()));
|
||||
}
|
||||
@@ -513,34 +523,34 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
// Comparisons and tests.
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>, const Derived, const OtherDerived>
|
||||
operator<(const OtherDerived& other) const {
|
||||
operator<(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
|
||||
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>());
|
||||
}
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>, const Derived, const OtherDerived>
|
||||
operator<=(const OtherDerived& other) const {
|
||||
operator<=(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
|
||||
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>());
|
||||
}
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>, const Derived, const OtherDerived>
|
||||
operator>(const OtherDerived& other) const {
|
||||
operator>(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
|
||||
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>());
|
||||
}
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>, const Derived, const OtherDerived>
|
||||
operator>=(const OtherDerived& other) const {
|
||||
operator>=(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
|
||||
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>());
|
||||
}
|
||||
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>, const Derived, const OtherDerived>
|
||||
operator==(const OtherDerived& other) const {
|
||||
operator==(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
|
||||
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>());
|
||||
}
|
||||
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived>
|
||||
operator!=(const OtherDerived& other) const {
|
||||
operator!=(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
|
||||
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>());
|
||||
}
|
||||
|
||||
@@ -715,81 +725,81 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
}
|
||||
|
||||
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorReductionOp<internal::AndReducer, const Dims, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
|
||||
const TensorReductionOp<internal::AndReducer, const Dims, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
|
||||
all(const Dims& dims) const {
|
||||
return cast<bool>().reduce(dims, internal::AndReducer());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorReductionOp<internal::AndReducer, const DimensionList<Index, NumDimensions>, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
|
||||
const TensorReductionOp<internal::AndReducer, const DimensionList<Index, NumDimensions>, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
|
||||
all() const {
|
||||
DimensionList<Index, NumDimensions> in_dims;
|
||||
return cast<bool>().reduce(in_dims, internal::AndReducer());
|
||||
}
|
||||
|
||||
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorReductionOp<internal::OrReducer, const Dims, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
|
||||
const TensorReductionOp<internal::OrReducer, const Dims, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
|
||||
any(const Dims& dims) const {
|
||||
return cast<bool>().reduce(dims, internal::OrReducer());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorReductionOp<internal::OrReducer, const DimensionList<Index, NumDimensions>, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
|
||||
const TensorReductionOp<internal::OrReducer, const DimensionList<Index, NumDimensions>, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
|
||||
any() const {
|
||||
DimensionList<Index, NumDimensions> in_dims;
|
||||
return cast<bool>().reduce(in_dims, internal::OrReducer());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorTupleReducerOp<
|
||||
internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
|
||||
const TensorPairReducerOp<
|
||||
internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
|
||||
const array<Index, NumDimensions>, const Derived>
|
||||
argmax() const {
|
||||
array<Index, NumDimensions> in_dims;
|
||||
for (Index d = 0; d < NumDimensions; ++d) in_dims[d] = d;
|
||||
return TensorTupleReducerOp<
|
||||
internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
|
||||
return TensorPairReducerOp<
|
||||
internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
|
||||
const array<Index, NumDimensions>,
|
||||
const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims);
|
||||
const Derived>(derived(), internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >(), -1, in_dims);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorTupleReducerOp<
|
||||
internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
|
||||
const TensorPairReducerOp<
|
||||
internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
|
||||
const array<Index, NumDimensions>, const Derived>
|
||||
argmin() const {
|
||||
array<Index, NumDimensions> in_dims;
|
||||
for (Index d = 0; d < NumDimensions; ++d) in_dims[d] = d;
|
||||
return TensorTupleReducerOp<
|
||||
internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
|
||||
return TensorPairReducerOp<
|
||||
internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
|
||||
const array<Index, NumDimensions>,
|
||||
const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims);
|
||||
const Derived>(derived(), internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >(), -1, in_dims);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorTupleReducerOp<
|
||||
internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
|
||||
const TensorPairReducerOp<
|
||||
internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
|
||||
const array<Index, 1>, const Derived>
|
||||
argmax(const Index return_dim) const {
|
||||
array<Index, 1> in_dims;
|
||||
in_dims[0] = return_dim;
|
||||
return TensorTupleReducerOp<
|
||||
internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
|
||||
return TensorPairReducerOp<
|
||||
internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
|
||||
const array<Index, 1>,
|
||||
const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims);
|
||||
const Derived>(derived(), internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >(), return_dim, in_dims);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorTupleReducerOp<
|
||||
internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
|
||||
const TensorPairReducerOp<
|
||||
internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
|
||||
const array<Index, 1>, const Derived>
|
||||
argmin(const Index return_dim) const {
|
||||
array<Index, 1> in_dims;
|
||||
in_dims[0] = return_dim;
|
||||
return TensorTupleReducerOp<
|
||||
internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
|
||||
return TensorPairReducerOp<
|
||||
internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
|
||||
const array<Index, 1>,
|
||||
const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims);
|
||||
const Derived>(derived(), internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >(), return_dim, in_dims);
|
||||
}
|
||||
|
||||
template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -935,11 +945,11 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return TensorInflationOp<const Strides, const Derived>(derived(), strides);
|
||||
}
|
||||
|
||||
// Returns a tensor containing index/value tuples
|
||||
// Returns a tensor containing index/value pairs
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorIndexTupleOp<const Derived>
|
||||
index_tuples() const {
|
||||
return TensorIndexTupleOp<const Derived>(derived());
|
||||
const TensorIndexPairOp<const Derived>
|
||||
index_pairs() const {
|
||||
return TensorIndexPairOp<const Derived>(derived());
|
||||
}
|
||||
|
||||
// Support for custom unary and binary operations
|
||||
@@ -960,6 +970,15 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return TensorForcedEvalOp<const Derived>(derived());
|
||||
}
|
||||
|
||||
// Returns a formatted tensor ready for printing to a stream
|
||||
inline const TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions> format(const TensorIOFormat& fmt) const {
|
||||
return TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions>(derived(), fmt);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_READONLY_TENSORBASE_PLUGIN
|
||||
#include EIGEN_READONLY_TENSORBASE_PLUGIN
|
||||
#endif
|
||||
|
||||
protected:
|
||||
template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
|
||||
template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
|
||||
@@ -977,7 +996,7 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
|
||||
typedef typename DerivedTraits::Scalar Scalar;
|
||||
typedef typename DerivedTraits::Index Index;
|
||||
typedef Scalar CoeffReturnType;
|
||||
static const int NumDimensions = DerivedTraits::NumDimensions;
|
||||
static constexpr int NumDimensions = DerivedTraits::NumDimensions;
|
||||
|
||||
template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
|
||||
template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
|
||||
@@ -1001,7 +1020,6 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
|
||||
return derived() = this->template random<RandomGenerator>();
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& setValues(
|
||||
const typename internal::Initializer<Derived, NumDimensions>::InitList& vals) {
|
||||
@@ -1009,7 +1027,6 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
|
||||
internal::initialize_tensor<Derived, NumDimensions>(eval, vals);
|
||||
return derived();
|
||||
}
|
||||
#endif // EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator+=(const OtherDerived& other) {
|
||||
@@ -1152,6 +1169,10 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
|
||||
return TensorAsyncDevice<Derived, DeviceType, DoneCallback>(dev, derived(), std::move(done));
|
||||
}
|
||||
|
||||
#ifdef EIGEN_TENSORBASE_PLUGIN
|
||||
#include EIGEN_TENSORBASE_PLUGIN
|
||||
#endif
|
||||
|
||||
protected:
|
||||
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TensorBase)
|
||||
EIGEN_DEFAULT_COPY_CONSTRUCTOR(TensorBase)
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
@@ -242,7 +244,7 @@ class TensorBlockDescriptor {
|
||||
const DestinationBufferKind& kind() const { return m_kind; }
|
||||
|
||||
private:
|
||||
friend class TensorBlockDescriptor;
|
||||
friend class TensorBlockDescriptor<NumDims, IndexType>;
|
||||
|
||||
DestinationBuffer() : m_data(NULL), m_data_type_size(0), m_kind(kEmpty) {}
|
||||
|
||||
@@ -706,7 +708,7 @@ class TensorMaterializedBlock {
|
||||
}
|
||||
|
||||
private:
|
||||
friend class TensorMaterializedBlock;
|
||||
friend class TensorMaterializedBlock<Scalar, NumDims, Layout, IndexType>;
|
||||
|
||||
Storage(Scalar* data, const Dimensions& dimensions,
|
||||
const Dimensions& strides, bool materialized_in_output,
|
||||
@@ -833,14 +835,14 @@ class TensorMaterializedBlock {
|
||||
|
||||
template <typename UnaryOp, typename ArgTensorBlock>
|
||||
class TensorCwiseUnaryBlock {
|
||||
static const bool NoArgBlockAccess =
|
||||
static constexpr bool NoArgBlockAccess =
|
||||
internal::is_void<typename ArgTensorBlock::XprType>::value;
|
||||
|
||||
public:
|
||||
typedef typename conditional<
|
||||
typedef std::conditional_t<
|
||||
NoArgBlockAccess, void,
|
||||
TensorCwiseUnaryOp<UnaryOp, const typename ArgTensorBlock::XprType> >::
|
||||
type XprType;
|
||||
TensorCwiseUnaryOp<UnaryOp, const typename ArgTensorBlock::XprType> >
|
||||
XprType;
|
||||
|
||||
typedef typename XprScalar<XprType>::type Scalar;
|
||||
|
||||
@@ -864,15 +866,15 @@ class TensorCwiseUnaryBlock {
|
||||
|
||||
template <typename BinaryOp, typename LhsTensorBlock, typename RhsTensorBlock>
|
||||
class TensorCwiseBinaryBlock {
|
||||
static const bool NoArgBlockAccess =
|
||||
static constexpr bool NoArgBlockAccess =
|
||||
internal::is_void<typename LhsTensorBlock::XprType>::value ||
|
||||
internal::is_void<typename RhsTensorBlock::XprType>::value;
|
||||
|
||||
public:
|
||||
typedef typename conditional<
|
||||
typedef std::conditional_t<
|
||||
NoArgBlockAccess, void,
|
||||
TensorCwiseBinaryOp<BinaryOp, const typename LhsTensorBlock::XprType,
|
||||
const typename RhsTensorBlock::XprType> >::type
|
||||
const typename RhsTensorBlock::XprType> >
|
||||
XprType;
|
||||
|
||||
typedef typename XprScalar<XprType>::type Scalar;
|
||||
@@ -911,12 +913,12 @@ class TensorCwiseBinaryBlock {
|
||||
template <typename BlockFactory, typename ArgTensorBlock>
|
||||
class TensorUnaryExprBlock {
|
||||
typedef typename ArgTensorBlock::XprType ArgXprType;
|
||||
static const bool NoArgBlockAccess = internal::is_void<ArgXprType>::value;
|
||||
static constexpr bool NoArgBlockAccess = internal::is_void<ArgXprType>::value;
|
||||
|
||||
public:
|
||||
typedef typename conditional<
|
||||
typedef std::conditional_t<
|
||||
NoArgBlockAccess, void,
|
||||
typename BlockFactory::template XprType<ArgXprType>::type>::type XprType;
|
||||
typename BlockFactory::template XprType<ArgXprType>::type> XprType;
|
||||
|
||||
typedef typename XprScalar<XprType>::type Scalar;
|
||||
|
||||
@@ -945,15 +947,15 @@ class TensorTernaryExprBlock {
|
||||
typedef typename Arg2TensorBlock::XprType Arg2XprType;
|
||||
typedef typename Arg3TensorBlock::XprType Arg3XprType;
|
||||
|
||||
static const bool NoArgBlockAccess = internal::is_void<Arg1XprType>::value ||
|
||||
internal::is_void<Arg2XprType>::value ||
|
||||
internal::is_void<Arg3XprType>::value;
|
||||
static constexpr bool NoArgBlockAccess = internal::is_void<Arg1XprType>::value ||
|
||||
internal::is_void<Arg2XprType>::value ||
|
||||
internal::is_void<Arg3XprType>::value;
|
||||
|
||||
public:
|
||||
typedef typename conditional<
|
||||
typedef std::conditional_t<
|
||||
NoArgBlockAccess, void,
|
||||
typename BlockFactory::template XprType<Arg1XprType, Arg2XprType,
|
||||
Arg3XprType>::type>::type XprType;
|
||||
Arg3XprType>::type> XprType;
|
||||
|
||||
typedef typename XprScalar<XprType>::type Scalar;
|
||||
|
||||
@@ -1141,7 +1143,7 @@ class StridedLinearBufferCopy {
|
||||
|
||||
template <typename Scalar, typename IndexType, int NumDims, int Layout>
|
||||
class TensorBlockIO {
|
||||
static const bool IsColMajor = (Layout == ColMajor);
|
||||
static constexpr bool IsColMajor = (Layout == ColMajor);
|
||||
|
||||
typedef StridedLinearBufferCopy<Scalar, IndexType> LinCopy;
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorBroadcasting
|
||||
@@ -28,9 +30,9 @@ struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -83,7 +85,7 @@ class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, X
|
||||
const Broadcast& broadcast() const { return m_broadcast; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -98,14 +100,14 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
{
|
||||
typedef TensorBroadcastingOp<Broadcast, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator wont be standard layout;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator won't be standard layout;
|
||||
bool isCopy, nByOne, oneByN;
|
||||
public:
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
@@ -116,18 +118,18 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
// We do block based broadcasting using a trick with 2x tensor rank and 0
|
||||
// strides. See block method implementation for details.
|
||||
typedef DSizes<Index, 2 * NumDims> BroadcastDimensions;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
|
||||
@@ -144,7 +146,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
{
|
||||
|
||||
// The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
|
||||
// and store the result in a scalar. Instead one should reshape the scalar into a a N-D
|
||||
// and store the result in a scalar. Instead one should reshape the scalar into a N-D
|
||||
// tensor with N >= 1 of 1 element first and then broadcast.
|
||||
EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
const InputDimensions& input_dims = m_impl.dimensions();
|
||||
@@ -229,7 +231,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
|
||||
if (internal::is_input_scalar<internal::remove_all_t<InputDimensions>>::value) {
|
||||
return m_impl.coeff(0);
|
||||
}
|
||||
|
||||
@@ -322,7 +324,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
|
||||
if (internal::is_input_scalar<internal::remove_all_t<InputDimensions>>::value) {
|
||||
return internal::pset1<PacketReturnType>(m_impl.coeff(0));
|
||||
}
|
||||
|
||||
@@ -368,10 +370,9 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByNByOne
|
||||
(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
Index startDim, endDim;
|
||||
Index inputIndex, outputOffset, batchedIndex;
|
||||
|
||||
@@ -410,25 +411,23 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
// Consider the flattened tensor [v0, ..., vN],
|
||||
// Concatenates m_broadcast[dim] copies,
|
||||
// [v0, ..., vN, v0, ..., vN, ... ]
|
||||
// with dim == NumDims - 1 for col-major, dim == 0 for row-major.
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
Index dim, inputIndex;
|
||||
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
dim = NumDims - 1;
|
||||
} else {
|
||||
dim = 0;
|
||||
}
|
||||
|
||||
inputIndex = index % m_inputStrides[dim];
|
||||
if (inputIndex + PacketSize <= m_inputStrides[dim]) {
|
||||
// Size of flattened tensor.
|
||||
const Index M = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ?
|
||||
m_inputStrides[NumDims - 1] : m_inputStrides[0];
|
||||
Index inputIndex = index % M;
|
||||
if (inputIndex + PacketSize <= M) {
|
||||
return m_impl.template packet<Unaligned>(inputIndex);
|
||||
} else {
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
if (inputIndex > m_inputStrides[dim]-1) {
|
||||
if (inputIndex > M - 1) {
|
||||
inputIndex = 0;
|
||||
}
|
||||
values[i] = m_impl.coeff(inputIndex++);
|
||||
@@ -440,32 +439,29 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
// Consider the flattened tensor [v0, ..., vN],
|
||||
// Interleaves m_broadcast[dim] copies,
|
||||
// [v0, v0, ..., v1, v1, ..., vN, vN, ... ]
|
||||
// with dim == 0 for col-major, dim == NumDims - 1 for row-major.
|
||||
eigen_assert(index + PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
Index dim, inputIndex, outputOffset;
|
||||
const Index M = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ?
|
||||
m_broadcast[0] : m_broadcast[NumDims - 1];
|
||||
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
dim = 1;
|
||||
} else {
|
||||
dim = NumDims - 2;
|
||||
}
|
||||
|
||||
inputIndex = index / m_outputStrides[dim];
|
||||
outputOffset = index % m_outputStrides[dim];
|
||||
if (outputOffset + PacketSize <= m_outputStrides[dim]) {
|
||||
values[0] = m_impl.coeff(inputIndex);
|
||||
return internal::pload1<PacketReturnType>(values);
|
||||
Index inputIndex = index / M;
|
||||
Index outputOffset = index % M;
|
||||
if (outputOffset + PacketSize <= M) {
|
||||
return internal::pset1<PacketReturnType>(m_impl.coeff(inputIndex));
|
||||
} else {
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
|
||||
if (outputOffset + cur < m_outputStrides[dim]) {
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
if (outputOffset < M) {
|
||||
values[i] = m_impl.coeff(inputIndex);
|
||||
++outputOffset;
|
||||
} else {
|
||||
values[i] = m_impl.coeff(++inputIndex);
|
||||
outputOffset = 0;
|
||||
cur = 0;
|
||||
outputOffset = 1; // Next offset.
|
||||
}
|
||||
}
|
||||
return internal::pload<PacketReturnType>(values);
|
||||
@@ -477,7 +473,6 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
const Index originalIndex = index;
|
||||
@@ -517,7 +512,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) {
|
||||
return m_impl.template packet<Unaligned>(inputIndex);
|
||||
} else {
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
values[0] = m_impl.coeff(inputIndex);
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 1; i < PacketSize; ++i) {
|
||||
@@ -535,7 +530,6 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
const Index originalIndex = index;
|
||||
@@ -575,7 +569,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) {
|
||||
return m_impl.template packet<Unaligned>(inputIndex);
|
||||
} else {
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
values[0] = m_impl.coeff(inputIndex);
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 1; i < PacketSize; ++i) {
|
||||
@@ -701,7 +695,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
}
|
||||
#endif
|
||||
private:
|
||||
static const bool IsColMajor =
|
||||
static constexpr bool IsColMajor =
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor);
|
||||
|
||||
// We will build a general case block broadcasting on top of broadcasting
|
||||
@@ -1080,7 +1074,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
|
||||
protected:
|
||||
const Device EIGEN_DEVICE_REF m_device;
|
||||
const typename internal::remove_reference<Broadcast>::type m_broadcast;
|
||||
const std::remove_reference_t<Broadcast> m_broadcast;
|
||||
Dimensions m_dimensions;
|
||||
array<Index, NumDims> m_outputStrides;
|
||||
array<Index, NumDims> m_inputStrides;
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorKChippingReshaping
|
||||
@@ -29,9 +31,9 @@ struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType>
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions - 1;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions - 1;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -98,7 +100,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
|
||||
const Index dim() const { return m_dim.actualDim(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorChippingOp)
|
||||
@@ -115,31 +117,31 @@ template<DenseIndex DimId, typename ArgType, typename Device>
|
||||
struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
{
|
||||
typedef TensorChippingOp<DimId, ArgType> XprType;
|
||||
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static const int NumDims = NumInputDims-1;
|
||||
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = NumInputDims-1;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
|
||||
enum {
|
||||
// Alignment can't be guaranteed at compile time since it depends on the
|
||||
// slice offsets.
|
||||
IsAligned = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||
// Chipping of outer-most dimension is a trivial operation, because we can
|
||||
// read and write directly from the underlying tensor using single offset.
|
||||
IsOuterChipping = (static_cast<int>(Layout) == ColMajor && DimId == NumInputDims - 1) ||
|
||||
(static_cast<int>(Layout) == RowMajor && DimId == 0),
|
||||
IsOuterChipping = (Layout == ColMajor && DimId == NumInputDims - 1) ||
|
||||
(Layout == RowMajor && DimId == 0),
|
||||
// Chipping inner-most dimension.
|
||||
IsInnerChipping = (static_cast<int>(Layout) == ColMajor && DimId == 0) ||
|
||||
(static_cast<int>(Layout) == RowMajor && DimId == NumInputDims - 1),
|
||||
IsInnerChipping = (Layout == ColMajor && DimId == 0) ||
|
||||
(Layout == RowMajor && DimId == NumInputDims - 1),
|
||||
// Prefer block access if the underlying expression prefers it, otherwise
|
||||
// only if chipping is not trivial.
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess ||
|
||||
@@ -148,7 +150,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
@@ -217,14 +219,13 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
if (isInnerChipping()) {
|
||||
// m_stride is equal to 1, so let's avoid the integer division.
|
||||
eigen_assert(m_stride == 1);
|
||||
Index inputIndex = index * m_inputStride + m_inputOffset;
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
values[i] = m_impl.coeff(inputIndex);
|
||||
@@ -244,7 +245,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
return m_impl.template packet<LoadMode>(inputIndex);
|
||||
} else {
|
||||
// Cross the stride boundary. Fallback to slow path.
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
values[i] = coeff(index);
|
||||
@@ -412,14 +413,14 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
||||
{
|
||||
typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base;
|
||||
typedef TensorChippingOp<DimId, ArgType> XprType;
|
||||
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static const int NumDims = NumInputDims-1;
|
||||
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = NumInputDims-1;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
|
||||
enum {
|
||||
IsAligned = false,
|
||||
@@ -445,12 +446,10 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
||||
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketReturnType& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
|
||||
if (this->isInnerChipping()) {
|
||||
// m_stride is equal to 1, so let's avoid the integer division.
|
||||
eigen_assert(this->m_stride == 1);
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
||||
Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
|
||||
EIGEN_UNROLL_LOOP
|
||||
@@ -470,7 +469,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
||||
this->m_impl.template writePacket<StoreMode>(inputIndex, x);
|
||||
} else {
|
||||
// Cross stride boundary. Fallback to slow path.
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
@@ -484,7 +483,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
||||
template <typename TensorBlock>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
|
||||
const TensorBlockDesc& desc, const TensorBlock& block) {
|
||||
assert(this->m_impl.data() != NULL);
|
||||
eigen_assert(this->m_impl.data() != NULL);
|
||||
|
||||
const Index chip_dim = this->m_dim.actualDim();
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorConcatenationOp
|
||||
@@ -32,13 +34,13 @@ struct traits<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >
|
||||
typename traits<RhsXprType>::Index>::type Index;
|
||||
typedef typename LhsXprType::Nested LhsNested;
|
||||
typedef typename RhsXprType::Nested RhsNested;
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
static const int NumDimensions = traits<LhsXprType>::NumDimensions;
|
||||
static const int Layout = traits<LhsXprType>::Layout;
|
||||
typedef std::remove_reference_t<LhsNested> LhsNested_;
|
||||
typedef std::remove_reference_t<RhsNested> RhsNested_;
|
||||
static constexpr int NumDimensions = traits<LhsXprType>::NumDimensions;
|
||||
static constexpr int Layout = traits<LhsXprType>::Layout;
|
||||
enum { Flags = 0 };
|
||||
typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
|
||||
typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType>::type PointerType;
|
||||
typedef std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
|
||||
typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType> PointerType;
|
||||
};
|
||||
|
||||
template<typename Axis, typename LhsXprType, typename RhsXprType>
|
||||
@@ -73,11 +75,11 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX
|
||||
: m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename LhsXprType::Nested>::type&
|
||||
const internal::remove_all_t<typename LhsXprType::Nested>&
|
||||
lhsExpression() const { return m_lhs_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename RhsXprType::Nested>::type&
|
||||
const internal::remove_all_t<typename RhsXprType::Nested>&
|
||||
rhsExpression() const { return m_rhs_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC const Axis& axis() const { return m_axis; }
|
||||
@@ -96,14 +98,15 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
||||
{
|
||||
typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
|
||||
static const int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
|
||||
static constexpr int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &&
|
||||
@@ -111,7 +114,6 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
@@ -303,6 +305,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
|
||||
typedef TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> Base;
|
||||
typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType;
|
||||
typedef typename Base::Dimensions Dimensions;
|
||||
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &&
|
||||
@@ -310,7 +313,6 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorContraction
|
||||
@@ -25,8 +27,8 @@ template<typename Dimensions, typename LhsXprType, typename RhsXprType, typename
|
||||
struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKernelType> >
|
||||
{
|
||||
// Type promotion to handle the case where the types of the lhs and the rhs are different.
|
||||
typedef typename gebp_traits<typename remove_const<typename LhsXprType::Scalar>::type,
|
||||
typename remove_const<typename RhsXprType::Scalar>::type>::ResScalar Scalar;
|
||||
typedef typename gebp_traits<std::remove_const_t<typename LhsXprType::Scalar>,
|
||||
std::remove_const_t<typename RhsXprType::Scalar>>::ResScalar Scalar;
|
||||
|
||||
typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
|
||||
typename traits<RhsXprType>::StorageKind>::ret StorageKind;
|
||||
@@ -34,15 +36,15 @@ struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKern
|
||||
typename traits<RhsXprType>::Index>::type Index;
|
||||
typedef typename LhsXprType::Nested LhsNested;
|
||||
typedef typename RhsXprType::Nested RhsNested;
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
typedef std::remove_reference_t<LhsNested> LhsNested_;
|
||||
typedef std::remove_reference_t<RhsNested> RhsNested_;
|
||||
|
||||
// From NumDims below.
|
||||
static const int NumDimensions = traits<LhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value;
|
||||
static const int Layout = traits<LhsXprType>::Layout;
|
||||
typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
|
||||
typename traits<LhsXprType>::PointerType,
|
||||
typename traits<RhsXprType>::PointerType>::type
|
||||
static constexpr int NumDimensions = traits<LhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value;
|
||||
static constexpr int Layout = traits<LhsXprType>::Layout;
|
||||
typedef std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
|
||||
typename traits<LhsXprType>::PointerType,
|
||||
typename traits<RhsXprType>::PointerType>
|
||||
PointerType;
|
||||
|
||||
enum {
|
||||
@@ -71,7 +73,7 @@ struct traits<TensorEvaluator<const TensorContractionOp<Indices_, LeftArgType_,
|
||||
typedef Device_ Device;
|
||||
|
||||
// From NumDims below.
|
||||
static const int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value;
|
||||
static constexpr int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value;
|
||||
};
|
||||
|
||||
// Helper class to allocate and deallocate temporary memory for packed buffers.
|
||||
@@ -343,11 +345,11 @@ class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXp
|
||||
|
||||
/** \returns the nested expressions */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename LhsXprType::Nested>::type&
|
||||
const internal::remove_all_t<typename LhsXprType::Nested>&
|
||||
lhsExpression() const { return m_lhs_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename RhsXprType::Nested>::type&
|
||||
const internal::remove_all_t<typename RhsXprType::Nested>&
|
||||
rhsExpression() const { return m_rhs_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -371,19 +373,19 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
|
||||
typedef typename internal::traits<Derived>::Device Device;
|
||||
|
||||
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
typedef StorageMemory<Scalar, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = true,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = true
|
||||
};
|
||||
@@ -396,20 +398,20 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
|
||||
// inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
|
||||
// If we want to compute A * B = C, where A is LHS and B is RHS, the code
|
||||
// will pretend B is LHS and A is RHS.
|
||||
typedef typename internal::conditional<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
|
||||
typedef typename internal::conditional<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
|
||||
typedef std::conditional_t<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
|
||||
typedef std::conditional_t<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
|
||||
|
||||
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluatorType;
|
||||
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluatorType;
|
||||
|
||||
static const int LDims =
|
||||
static constexpr int LDims =
|
||||
internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
|
||||
static const int RDims =
|
||||
static constexpr int RDims =
|
||||
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
|
||||
static const int ContractDims = internal::array_size<Indices>::value;
|
||||
static const int NumDims = LDims + RDims - 2 * ContractDims;
|
||||
static constexpr int ContractDims = internal::array_size<Indices>::value;
|
||||
static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
|
||||
|
||||
typedef array<Index, ContractDims> contract_t;
|
||||
typedef array<Index, LDims - ContractDims> left_nocontract_t;
|
||||
@@ -733,8 +735,8 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
|
||||
const Index rows = m_i_size;
|
||||
const Index cols = m_k_size;
|
||||
|
||||
typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
|
||||
typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
|
||||
typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
|
||||
typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
|
||||
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
|
||||
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
|
||||
const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size;
|
||||
@@ -762,7 +764,7 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
|
||||
const Index resIncr(1);
|
||||
|
||||
// zero out the result buffer (which must be of size at least rows * sizeof(Scalar)
|
||||
m_device.memset(buffer, 0, rows * sizeof(Scalar));
|
||||
m_device.fill(buffer, buffer + rows, Scalar(0));
|
||||
|
||||
internal::general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,false,RhsScalar,RhsMapper,false>::run(
|
||||
rows, cols, lhs, rhs,
|
||||
@@ -810,8 +812,8 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
|
||||
const Index n = this->m_j_size;
|
||||
|
||||
// define data mappers for Lhs and Rhs
|
||||
typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
|
||||
typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
|
||||
typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
|
||||
typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
|
||||
|
||||
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
|
||||
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
|
||||
@@ -869,7 +871,7 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
|
||||
// If a contraction kernel does not support beta, explicitly initialize
|
||||
// output buffer with zeroes.
|
||||
if (!TensorContractionKernel::HasBeta) {
|
||||
this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
|
||||
this->m_device.fill(buffer, buffer + m * n, Scalar(0));
|
||||
}
|
||||
|
||||
for(Index i2=0; i2<m; i2+=mc)
|
||||
@@ -976,35 +978,31 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
typedef TensorContractionEvaluatorBase<Self> Base;
|
||||
|
||||
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
|
||||
enum {
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout
|
||||
};
|
||||
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
|
||||
|
||||
// Most of the code is assuming that both input tensors are ColMajor. If the
|
||||
// inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
|
||||
// If we want to compute A * B = C, where A is LHS and B is RHS, the code
|
||||
// will pretend B is LHS and A is RHS.
|
||||
typedef typename internal::conditional<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
|
||||
typedef typename internal::conditional<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
|
||||
typedef std::conditional_t<Layout == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
|
||||
typedef std::conditional_t<Layout == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
|
||||
|
||||
static const int LDims =
|
||||
static constexpr int LDims =
|
||||
internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
|
||||
static const int RDims =
|
||||
static constexpr int RDims =
|
||||
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
|
||||
static const int ContractDims = internal::array_size<Indices>::value;
|
||||
static constexpr int ContractDims = internal::array_size<Indices>::value;
|
||||
|
||||
typedef array<Index, ContractDims> contract_t;
|
||||
typedef array<Index, LDims - ContractDims> left_nocontract_t;
|
||||
typedef array<Index, RDims - ContractDims> right_nocontract_t;
|
||||
|
||||
static const int NumDims = LDims + RDims - 2 * ContractDims;
|
||||
static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
|
||||
|
||||
// Could we use NumDimensions here?
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H
|
||||
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
|
||||
#if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Scalar, typename Index, typename LhsMapper,
|
||||
@@ -233,7 +235,7 @@ EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
|
||||
} \
|
||||
} \
|
||||
|
||||
#define writeRegToShmem(_) \
|
||||
#define writeRegToShmem() \
|
||||
lhs_shmem[lhs_store_idx_0] = lhs_pf0; \
|
||||
rhs_shmem[rhs_store_idx_0] = rhs_pf0; \
|
||||
\
|
||||
@@ -1225,29 +1227,25 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
typedef TensorContractionEvaluatorBase<Self> Base;
|
||||
|
||||
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
|
||||
|
||||
enum {
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
};
|
||||
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
|
||||
|
||||
// Most of the code is assuming that both input tensors are ColMajor. If the
|
||||
// inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
|
||||
// If we want to compute A * B = C, where A is LHS and B is RHS, the code
|
||||
// will pretend B is LHS and A is RHS.
|
||||
typedef typename internal::conditional<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
|
||||
typedef typename internal::conditional<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
|
||||
typedef std::conditional_t<Layout == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
|
||||
typedef std::conditional_t<Layout == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
|
||||
|
||||
static const int LDims =
|
||||
static constexpr int LDims =
|
||||
internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
|
||||
static const int RDims =
|
||||
static constexpr int RDims =
|
||||
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
|
||||
static const int ContractDims = internal::array_size<Indices>::value;
|
||||
static constexpr int ContractDims = internal::array_size<Indices>::value;
|
||||
|
||||
typedef array<Index, LDims> left_dim_mapper_t;
|
||||
typedef array<Index, RDims> right_dim_mapper_t;
|
||||
@@ -1256,13 +1254,13 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
typedef array<Index, LDims - ContractDims> left_nocontract_t;
|
||||
typedef array<Index, RDims - ContractDims> right_nocontract_t;
|
||||
|
||||
static const int NumDims = LDims + RDims - 2 * ContractDims;
|
||||
static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
|
||||
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
|
||||
// typedefs needed in evalTo
|
||||
typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
|
||||
typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
|
||||
typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
|
||||
typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
|
||||
|
||||
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
|
||||
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
|
||||
@@ -1370,8 +1368,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
// columns in right side
|
||||
const Index n = this->m_j_size;
|
||||
|
||||
// zero out the result buffer (which must be of size at least m * n * sizeof(Scalar)
|
||||
this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
|
||||
// zero out the result buffer (which must be of size at least m * n * sizeof(Scalar))
|
||||
this->m_device.fill(buffer, buffer + m * n, Scalar(0));
|
||||
|
||||
typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
|
||||
LeftEvaluator, left_nocontract_t,
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -294,7 +296,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar,
|
||||
|
||||
template <typename PacketT,int AlignmentType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename internal::enable_if<internal::unpacket_traits<PacketT>::size==packet_size,PacketT>::type
|
||||
std::enable_if_t<internal::unpacket_traits<PacketT>::size==packet_size,PacketT>
|
||||
load(Index i, Index j) const
|
||||
{
|
||||
// whole method makes column major assumption
|
||||
@@ -340,7 +342,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar,
|
||||
|
||||
template <typename PacketT,int AlignmentType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename internal::enable_if<internal::unpacket_traits<PacketT>::size!=packet_size,PacketT>::type
|
||||
std::enable_if_t<internal::unpacket_traits<PacketT>::size!=packet_size,PacketT>
|
||||
load(Index i, Index j) const
|
||||
{
|
||||
const Index requested_packet_size = internal::unpacket_traits<PacketT>::size;
|
||||
@@ -414,6 +416,7 @@ class TensorContractionSubMapper {
|
||||
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> ParentMapper;
|
||||
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> Self;
|
||||
typedef Self LinearMapper;
|
||||
typedef Self SubMapper;
|
||||
|
||||
enum {
|
||||
// We can use direct offsets iff the parent mapper supports then and we can compute the strides.
|
||||
@@ -483,6 +486,13 @@ class TensorContractionSubMapper {
|
||||
return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubMapper getSubMapper(Index i, Index j) const {
|
||||
if (UseDirectOffsets) {
|
||||
return SubMapper(m_base_mapper, i, j);
|
||||
}
|
||||
return SubMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
|
||||
}
|
||||
|
||||
template <typename PacketT, int AlignmentType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const {
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<PacketT, PacketT>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
@@ -529,6 +539,7 @@ class TensorContractionInputMapper
|
||||
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> Base;
|
||||
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> SubMapper;
|
||||
typedef SubMapper VectorMapper;
|
||||
typedef SubMapper LinearMapper;
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor,
|
||||
const nocontract_t& nocontract_strides,
|
||||
@@ -542,6 +553,10 @@ class TensorContractionInputMapper
|
||||
return SubMapper(*this, i, j);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
|
||||
return LinearMapper(*this, i, j);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
|
||||
return VectorMapper(*this, i, j);
|
||||
}
|
||||
|
||||
69
libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h
Executable file → Normal file
69
libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h
Executable file → Normal file
@@ -19,6 +19,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace TensorSycl {
|
||||
@@ -110,7 +112,7 @@ struct TTPanelSize {
|
||||
// BC : determines if supporting bank conflict is required
|
||||
static EIGEN_CONSTEXPR bool BC = true;
|
||||
// DoubleBuffer: determines if double buffering technique should be used (This can be disabled by
|
||||
// EIGEN_SYCL_DISABLE_DOUBLE_BUFFER macro when the device doesnot have sufficient local memory)
|
||||
// EIGEN_SYCL_DISABLE_DOUBLE_BUFFER macro when the device does not have sufficient local memory)
|
||||
static EIGEN_CONSTEXPR bool DoubleBuffer =
|
||||
#ifdef EIGEN_SYCL_DISABLE_DOUBLE_BUFFER
|
||||
false;
|
||||
@@ -156,7 +158,7 @@ enum class data_source { global_mem, local_mem, private_mem };
|
||||
*/
|
||||
template <bool PacketLoad, bool is_coalesced_layout, bool, typename PacketType, typename TensorMapper,
|
||||
typename StorageIndex>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<PacketLoad, PacketType>::type read(
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<PacketLoad, PacketType> read(
|
||||
const TensorMapper &tensorMapper, const StorageIndex &NCIndex, const StorageIndex &CIndex, const StorageIndex &ld) {
|
||||
const StorageIndex row = (is_coalesced_layout) ? NCIndex : CIndex;
|
||||
const StorageIndex col = (is_coalesced_layout) ? CIndex : NCIndex;
|
||||
@@ -186,7 +188,7 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_
|
||||
* \param CIndex: is the contracting dim index
|
||||
*/
|
||||
template <bool PacketLoad, bool, bool IsRhs, typename PacketType, typename TensorMapper, typename StorageIndex>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!PacketLoad, PacketType>::type read(
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!PacketLoad, PacketType> read(
|
||||
const TensorMapper &tensorMapper, const StorageIndex &NCIndex, const StorageIndex &CIndex, const StorageIndex &) {
|
||||
const StorageIndex row = (IsRhs) ? CIndex : NCIndex;
|
||||
const StorageIndex col = (IsRhs) ? NCIndex : CIndex;
|
||||
@@ -216,7 +218,7 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_
|
||||
|
||||
template <typename StorageIndex, StorageIndex ld, data_source dt, typename PacketType, typename DataScalar>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename ::Eigen::internal::enable_if<dt != data_source::global_mem, void>::type
|
||||
std::enable_if_t<dt != data_source::global_mem, void>
|
||||
write(PacketType &packet_data, DataScalar ptr) {
|
||||
EIGEN_CONSTEXPR int PacketSize = Eigen::internal::unpacket_traits<PacketType>::size;
|
||||
EIGEN_UNROLL_LOOP
|
||||
@@ -242,8 +244,8 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
*/
|
||||
|
||||
template <data_source dt, typename PacketType, typename DataScalar>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<
|
||||
Eigen::internal::unpacket_traits<PacketType>::size != 1 && dt == data_source::global_mem, void>::type
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename std::enable_if_t<
|
||||
Eigen::internal::unpacket_traits<PacketType>::size != 1 && dt == data_source::global_mem, void>
|
||||
write(PacketType &packet_data, DataScalar *ptr) {
|
||||
::Eigen::internal::pstoreu<DataScalar, PacketType>(ptr, packet_data);
|
||||
}
|
||||
@@ -262,8 +264,8 @@ write(PacketType &packet_data, DataScalar *ptr) {
|
||||
* \param ptr: a pointer to the local memory
|
||||
*/
|
||||
template <data_source dt, typename PacketType, typename DataScalar>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<
|
||||
Eigen::internal::unpacket_traits<PacketType>::size == 1 && dt == data_source::global_mem, void>::type
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename std::enable_if_t<
|
||||
Eigen::internal::unpacket_traits<PacketType>::size == 1 && dt == data_source::global_mem, void>
|
||||
write(PacketType &packet_data, DataScalar *ptr) {
|
||||
*ptr = packet_data;
|
||||
}
|
||||
@@ -319,7 +321,7 @@ struct BlockProperties {
|
||||
static EIGEN_CONSTEXPR bool packet_load = packet_load_;
|
||||
typedef typename Eigen::internal::unpacket_traits<PacketType>::type OutScalar;
|
||||
static EIGEN_CONSTEXPR bool is_rhs = is_rhs_;
|
||||
typedef typename Eigen::internal::conditional<packet_load, PacketType, OutScalar>::type OutType;
|
||||
typedef std::conditional_t<packet_load, PacketType, OutScalar> OutType;
|
||||
static EIGEN_CONSTEXPR int elements_per_access = Eigen::internal::unpacket_traits<OutType>::size;
|
||||
static EIGEN_CONSTEXPR bool is_coalesced_layout = !(is_transposed ^ is_rhs);
|
||||
static EIGEN_CONSTEXPR int nc_stride = (is_coalesced_layout ? elements_per_access : 1);
|
||||
@@ -428,7 +430,7 @@ struct ThreadProperties {
|
||||
Otherwise, the result of contraction will be written iin a temporary buffer. This is the case when Tall/Skinny
|
||||
contraction is used. So in this case, a final reduction step is required to compute final output.
|
||||
|
||||
* \tparam contraction_tp: it is an enum value representing whether the local memroy/no local memory implementation of
|
||||
* \tparam contraction_tp: it is an enum value representing whether the local memory/no local memory implementation of
|
||||
the algorithm to be used
|
||||
*
|
||||
* \param scratch: local memory containing tiles of LHS and RHS tensors for each work-group
|
||||
@@ -475,8 +477,7 @@ class TensorContractionKernel {
|
||||
typedef cl::sycl::accessor<OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local> Scratch;
|
||||
typedef cl::sycl::multi_ptr<OutScalar, cl::sycl::access::address_space::local_space> local_ptr;
|
||||
typedef OutScalar * /*cl::sycl::multi_ptr<OutScalar, cl::sycl::access::address_space::private_space>*/ private_ptr;
|
||||
typedef
|
||||
typename ::Eigen::internal::conditional<contraction_tp == contraction_type::local, local_ptr, private_ptr>::type
|
||||
typedef std::conditional_t<contraction_tp == contraction_type::local, local_ptr, private_ptr>
|
||||
tile_ptr;
|
||||
static EIGEN_CONSTEXPR StorageIndex LSDL = contraction_tp == contraction_type::local
|
||||
? Properties::TileSizeDimM + Properties::BC
|
||||
@@ -493,7 +494,7 @@ class TensorContractionKernel {
|
||||
* the TiledMemory for both local and private memory, the MemHolder structs is used as a helper to abstract out
|
||||
* different type of memory needed when local/no_local memory computation is called.
|
||||
*
|
||||
* \tparam contraction_type: it is an enum value representing whether the local memroy/no local memory implementation
|
||||
* \tparam contraction_type: it is an enum value representing whether the local memory/no local memory implementation
|
||||
of the algorithm to be used
|
||||
* \tparam the private memory size
|
||||
* \param ptr the tile memory pointer type
|
||||
@@ -520,10 +521,10 @@ class TensorContractionKernel {
|
||||
* \param rhs_scratch_extract : determines the RHS tile memory. It is either private or local memory based on the
|
||||
* selected contraction_type.
|
||||
*
|
||||
* \param lhs_extract_index: determins the position of each thread on a local memory for lhs input. When private
|
||||
* \param lhs_extract_index: determines the position of each thread on a local memory for lhs input. When private
|
||||
* memory is used this is set to zero as this is not applicable in case of private memory.
|
||||
*
|
||||
* \param rhs_extract_index: determins the position of each thread on a local memory for rhs input. When private
|
||||
* \param rhs_extract_index: determines the position of each thread on a local memory for rhs input. When private
|
||||
* memory is used this is set to zero as this is not applicable in case of private memory.
|
||||
*
|
||||
* \param lhs_scratch_compute : determines the location to load for computation for lhs_local memory. This is the
|
||||
@@ -542,7 +543,7 @@ class TensorContractionKernel {
|
||||
template <contraction_type tp = contraction_tp>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TiledMemory(const ThreadProperties<StorageIndex> &, local_ptr,
|
||||
typename ::Eigen::internal::enable_if<tp == contraction_type::no_local>::type * = 0)
|
||||
std::enable_if_t<tp == contraction_type::no_local> * = 0)
|
||||
: lhs_scratch_extract{},
|
||||
rhs_scratch_extract{},
|
||||
lhs_scratch_ptr_compute(lhs_scratch_extract.ptr),
|
||||
@@ -553,7 +554,7 @@ class TensorContractionKernel {
|
||||
template <contraction_type tp = contraction_tp>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TiledMemory(const ThreadProperties<StorageIndex> &thread_properties, local_ptr block_start_ptr,
|
||||
typename ::Eigen::internal::enable_if<tp == contraction_type::local>::type * = 0)
|
||||
std::enable_if_t<tp == contraction_type::local> * = 0)
|
||||
: lhs_scratch_extract{block_start_ptr},
|
||||
rhs_scratch_extract{lhs_scratch_extract.ptr +
|
||||
((Properties::DoubleBuffer + 1) * LSDL * Properties::TileSizeDimK)},
|
||||
@@ -710,7 +711,7 @@ class TensorContractionKernel {
|
||||
template <typename InputBlockProperties, bool is_internal_block, typename Input, typename PrivateReg,
|
||||
contraction_type contract_tp = contraction_tp>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename ::Eigen::internal::enable_if<contract_tp == contraction_type::no_local>::type
|
||||
std::enable_if_t<contract_tp == contraction_type::no_local>
|
||||
extract_block(const Input &inpt, PrivateReg private_ptr, const std::pair<StorageIndex, StorageIndex> &,
|
||||
const StorageIndex &ncOffset, const StorageIndex cOffset) {
|
||||
EIGEN_CONSTEXPR StorageIndex LocalThreadSizeNC =
|
||||
@@ -783,28 +784,28 @@ class TensorContractionKernel {
|
||||
|
||||
template <bool db = Properties::DoubleBuffer, contraction_type ctp = contraction_tp>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename ::Eigen::internal::enable_if<db && ctp == contraction_type::local>::type
|
||||
std::enable_if_t<db && ctp == contraction_type::local>
|
||||
sync_mem(const cl::sycl::nd_item<1> &, bool &db_offset) noexcept {
|
||||
db_offset = !db_offset;
|
||||
}
|
||||
|
||||
template <bool db = Properties::DoubleBuffer, contraction_type ctp = contraction_tp>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename ::Eigen::internal::enable_if<!db && ctp == contraction_type::local>::type
|
||||
std::enable_if_t<!db && ctp == contraction_type::local>
|
||||
sync_mem(const cl::sycl::nd_item<1> &itemID, bool &) noexcept {
|
||||
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
||||
}
|
||||
|
||||
template <contraction_type ctp = contraction_tp>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename ::Eigen::internal::enable_if<ctp == contraction_type::no_local>::type
|
||||
std::enable_if_t<ctp == contraction_type::no_local>
|
||||
sync_mem(const cl::sycl::nd_item<1> &, bool &) noexcept {
|
||||
return;
|
||||
}
|
||||
|
||||
template <bool need_sync, contraction_type ctp = contraction_tp>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename ::Eigen::internal::enable_if<need_sync && ctp == contraction_type::no_local>::type
|
||||
std::enable_if_t<need_sync && ctp == contraction_type::no_local>
|
||||
sync_thread(const cl::sycl::nd_item<1> &
|
||||
#ifdef EIGEN_SYCL_ARM_GPU_CACHE_OPTIMISATION
|
||||
itemID
|
||||
@@ -818,12 +819,12 @@ class TensorContractionKernel {
|
||||
}
|
||||
template <bool need_sync, contraction_type ctp = contraction_tp>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename ::Eigen::internal::enable_if<need_sync && ctp == contraction_type::local>::type
|
||||
std::enable_if_t<need_sync && ctp == contraction_type::local>
|
||||
sync_thread(const cl::sycl::nd_item<1> &itemID) {
|
||||
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
||||
}
|
||||
template <bool need_sync>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!need_sync>::type sync_thread(
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!need_sync> sync_thread(
|
||||
const cl::sycl::nd_item<1> &) {
|
||||
return;
|
||||
}
|
||||
@@ -894,7 +895,7 @@ class TensorContractionKernel {
|
||||
template <typename InputBlockProperties, bool is_internal_block, typename Input, typename Local,
|
||||
contraction_type contract_tp = contraction_tp>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename ::Eigen::internal::enable_if<contract_tp == contraction_type::local>::type
|
||||
std::enable_if_t<contract_tp == contraction_type::local>
|
||||
extract_block(const Input &inpt, Local local_ptr, const std::pair<StorageIndex, StorageIndex>& local_index,
|
||||
const StorageIndex &ncOffset, const StorageIndex cOffset) {
|
||||
EIGEN_CONSTEXPR StorageIndex TileSizeDimNC =
|
||||
@@ -1234,7 +1235,7 @@ struct GeneralVectorTensor {
|
||||
*
|
||||
* \param out_res: determines the output tensor containing the contraction result
|
||||
*
|
||||
* \param rng: determins the total input data size
|
||||
* \param rng: determines the total input data size
|
||||
*/
|
||||
template <typename OutScalar, typename LhsScalar, typename RhsScalar, typename OutAccessor, typename LhsMapper,
|
||||
typename RhsMapper, typename StorageIndex, bool Vectorizable>
|
||||
@@ -1292,7 +1293,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> Self;
|
||||
typedef TensorContractionEvaluatorBase<Self> Base;
|
||||
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
|
||||
typedef typename XprType::Index StorageIndex;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
@@ -1305,14 +1306,14 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
TripleDim(const StorageIndex M_, const StorageIndex N_, const StorageIndex K_) : M(M_), N(N_), K(K_) {}
|
||||
};
|
||||
enum {
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccess = false,
|
||||
};
|
||||
|
||||
static EIGEN_CONSTEXPR int LDims = Base::LDims;
|
||||
static EIGEN_CONSTEXPR int RDims = Base::RDims;
|
||||
static EIGEN_CONSTEXPR int ContractDims = Base::ContractDims;
|
||||
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
|
||||
static constexpr int LDims = Base::LDims;
|
||||
static constexpr int RDims = Base::RDims;
|
||||
static constexpr int ContractDims = Base::ContractDims;
|
||||
|
||||
typedef array<StorageIndex, LDims> left_dim_mapper_t;
|
||||
typedef array<StorageIndex, RDims> right_dim_mapper_t;
|
||||
@@ -1321,14 +1322,14 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
typedef array<StorageIndex, LDims - ContractDims> left_nocontract_t;
|
||||
typedef array<StorageIndex, RDims - ContractDims> right_nocontract_t;
|
||||
|
||||
static const int NumDims = LDims + RDims - 2 * ContractDims;
|
||||
static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
|
||||
|
||||
typedef DSizes<StorageIndex, NumDims> Dimensions;
|
||||
|
||||
typedef TensorEvaluator<typename Base::EvalLeftArgType, Device> LeftEvaluator;
|
||||
typedef TensorEvaluator<typename Base::EvalRightArgType, Device> RightEvaluator;
|
||||
typedef typename Eigen::internal::remove_const<typename LeftEvaluator::CoeffReturnType>::type LhsScalar;
|
||||
typedef typename Eigen::internal::remove_const<typename RightEvaluator::CoeffReturnType>::type RhsScalar;
|
||||
typedef std::remove_const_t<typename LeftEvaluator::CoeffReturnType> LhsScalar;
|
||||
typedef std::remove_const_t<typename RightEvaluator::CoeffReturnType> RhsScalar;
|
||||
|
||||
typedef typename LeftEvaluator::Dimensions LeftDimensions;
|
||||
typedef typename RightEvaluator::Dimensions RightDimensions;
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
// evaluator for thread pool device
|
||||
#ifdef EIGEN_USE_THREADS
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Indices, typename LeftArgType, typename RightArgType, typename OutputKernelType>
|
||||
@@ -25,29 +27,27 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
typedef TensorContractionEvaluatorBase<Self> Base;
|
||||
|
||||
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
|
||||
enum {
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
};
|
||||
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
|
||||
|
||||
// Most of the code is assuming that both input tensors are ColMajor. If the
|
||||
// inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
|
||||
// If we want to compute A * B = C, where A is LHS and B is RHS, the code
|
||||
// will pretend B is LHS and A is RHS.
|
||||
typedef typename internal::conditional<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
|
||||
typedef typename internal::conditional<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
|
||||
typedef std::conditional_t<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
|
||||
typedef std::conditional_t<
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
|
||||
|
||||
static const int LDims =
|
||||
static constexpr int LDims =
|
||||
internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
|
||||
static const int RDims =
|
||||
static constexpr int RDims =
|
||||
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
|
||||
static const int ContractDims = internal::array_size<Indices>::value;
|
||||
static constexpr int ContractDims = internal::array_size<Indices>::value;
|
||||
|
||||
typedef array<Index, LDims> left_dim_mapper_t;
|
||||
typedef array<Index, RDims> right_dim_mapper_t;
|
||||
@@ -56,13 +56,13 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
typedef array<Index, LDims - ContractDims> left_nocontract_t;
|
||||
typedef array<Index, RDims - ContractDims> right_nocontract_t;
|
||||
|
||||
static const int NumDims = LDims + RDims - 2 * ContractDims;
|
||||
static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
|
||||
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
|
||||
// typedefs needed in evalTo
|
||||
typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
|
||||
typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
|
||||
typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
|
||||
typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
|
||||
typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits;
|
||||
|
||||
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
|
||||
@@ -96,7 +96,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
// context from the heap.
|
||||
//
|
||||
// (*) EvalParallelContext & EvalShardedByInnerDimContext owns all the state
|
||||
// and temporary buffers, requried for executing the tensor contraction.
|
||||
// and temporary buffers, required for executing the tensor contraction.
|
||||
// They are responsible for cleaning it up after contraction is done.
|
||||
static const bool IsEvalInSyncMode =
|
||||
std::is_same<DoneCallback, NoCallback>::value;
|
||||
@@ -599,7 +599,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
// These variable are rolling over 3 consecutive k slices: first two we are
|
||||
// actively executing + one to track completion of kernels in the second
|
||||
// slice.
|
||||
static const Index P = 3;
|
||||
static constexpr Index P = 3;
|
||||
|
||||
// Handle to the allocated temporary storage for Lhs/Rhs blocks.
|
||||
BlockMemHandle packed_mem_;
|
||||
@@ -698,7 +698,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
!is_rhs && std::is_same<BlockType, LhsBlock>::value;
|
||||
static const bool kIsRhs =
|
||||
is_rhs && std::is_same<BlockType, RhsBlock>::value;
|
||||
static_assert(kIsLhs || kIsRhs, "Unkown block type");
|
||||
static_assert(kIsLhs || kIsRhs, "Unknown block type");
|
||||
|
||||
using Blocks = ThreadLocalBlocks<BlockType>;
|
||||
|
||||
@@ -874,7 +874,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
lhs_.getSubMapper(m1 * bm_, k * bk_), bk(k), bm(m1));
|
||||
|
||||
if (!parallel_pack_ && shard_by_col_) {
|
||||
assert(!use_thread_local);
|
||||
eigen_assert(!use_thread_local);
|
||||
signal_packing(k);
|
||||
} else {
|
||||
signal_switch(k + 1);
|
||||
@@ -895,7 +895,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
} else {
|
||||
// If we can't guarantee that all kernels in `k` slice will be
|
||||
// executed sequentially in current thread, it's no longer safe to use
|
||||
// thread local memory in followig slices along the k dimensions.
|
||||
// thread local memory in following slices along the k dimensions.
|
||||
eigen_assert(k > 0);
|
||||
can_use_thread_local_packed_[n].store(false,
|
||||
std::memory_order_relaxed);
|
||||
@@ -912,9 +912,9 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
// On 10000x2x10000 mm zeroing can easily take half of time. Zero (bn
|
||||
// x m) row. Safe to do here because all kernels that will write to
|
||||
// this memory depend on completion of this task. Note: don't call
|
||||
// device_.memset() here. device_.memset() blocks on thread pool
|
||||
// device_.fill() here. device_.fill() blocks on thread pool
|
||||
// worker thread, which can lead to underutilization and deadlocks.
|
||||
memset(buffer_ + n1 * bn_ * m_, 0, bn(n1) * m_ * sizeof(Scalar));
|
||||
std::fill_n(buffer_ + n1 * bn_ * m_, bn(n1) * m_, Scalar(0));
|
||||
}
|
||||
kernel_.packRhs(&packed_rhs(n, k, n1, use_thread_local),
|
||||
rhs_.getSubMapper(k * bk_, n1 * bn_), bk(k), bn(n1));
|
||||
@@ -927,7 +927,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
signal_kernel(m, n, k, sync, use_thread_local);
|
||||
}
|
||||
} else {
|
||||
assert(!use_thread_local);
|
||||
eigen_assert(!use_thread_local);
|
||||
signal_packing(k);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorConversionOp
|
||||
@@ -28,9 +30,9 @@ struct traits<TensorConversionOp<TargetType, XprType> >
|
||||
typedef typename traits<XprType>::StorageKind StorageKind;
|
||||
typedef typename traits<XprType>::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = traits<XprType>::NumDimensions;
|
||||
static const int Layout = traits<XprType>::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = traits<XprType>::NumDimensions;
|
||||
static constexpr int Layout = traits<XprType>::Layout;
|
||||
enum { Flags = 0 };
|
||||
typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType;
|
||||
};
|
||||
@@ -187,7 +189,7 @@ class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprT
|
||||
: m_xpr(xpr) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -250,12 +252,12 @@ struct PacketConv {
|
||||
typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
|
||||
typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
|
||||
|
||||
static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
|
||||
static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
|
||||
|
||||
template <typename ArgType, typename Device>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
|
||||
internal::scalar_cast_op<SrcType, TargetType> converter;
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
values[i] = converter(impl.coeff(index+i));
|
||||
@@ -283,11 +285,11 @@ struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
|
||||
template <typename SrcPacket, typename TargetPacket, int LoadMode>
|
||||
struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> {
|
||||
typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
|
||||
static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
|
||||
static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
|
||||
|
||||
template <typename ArgType, typename Device>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
|
||||
for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i);
|
||||
return internal::pload<TargetPacket>(values);
|
||||
}
|
||||
@@ -312,11 +314,11 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||
typedef TargetType Scalar;
|
||||
typedef TargetType CoeffReturnType;
|
||||
typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
|
||||
typedef internal::remove_all_t<typename internal::traits<ArgType>::Scalar> SrcType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
typedef typename PacketType<SrcType, Device>::type PacketSourceType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static const bool IsSameType = internal::is_same<TargetType, SrcType>::value;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr bool IsSameType = internal::is_same<TargetType, SrcType>::value;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
@@ -331,11 +333,11 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
||||
#endif
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
static const int NumDims = internal::array_size<Dimensions>::value;
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
static constexpr int NumDims = internal::array_size<Dimensions>::value;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorConvolution
|
||||
@@ -206,7 +208,7 @@ class IndexMapper {
|
||||
}
|
||||
|
||||
private:
|
||||
static const int NumDims = internal::array_size<InputDims>::value;
|
||||
static constexpr int NumDims = internal::array_size<InputDims>::value;
|
||||
array<Index, NumDims> m_inputStrides;
|
||||
array<Index, NumDims> m_outputStrides;
|
||||
array<Index, NumDims> m_gpuInputStrides;
|
||||
@@ -227,12 +229,12 @@ struct traits<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >
|
||||
typename traits<KernelXprType>::Index>::type Index;
|
||||
typedef typename InputXprType::Nested LhsNested;
|
||||
typedef typename KernelXprType::Nested RhsNested;
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
static const int NumDimensions = traits<InputXprType>::NumDimensions;
|
||||
static const int Layout = traits<InputXprType>::Layout;
|
||||
typedef typename conditional<Pointer_type_promotion<typename InputXprType::Scalar, Scalar>::val,
|
||||
typename traits<InputXprType>::PointerType, typename traits<KernelXprType>::PointerType>::type PointerType;
|
||||
typedef std::remove_reference_t<LhsNested> LhsNested_;
|
||||
typedef std::remove_reference_t<RhsNested> RhsNested_;
|
||||
static constexpr int NumDimensions = traits<InputXprType>::NumDimensions;
|
||||
static constexpr int Layout = traits<InputXprType>::Layout;
|
||||
typedef std::conditional_t<Pointer_type_promotion<typename InputXprType::Scalar, Scalar>::val,
|
||||
typename traits<InputXprType>::PointerType, typename traits<KernelXprType>::PointerType> PointerType;
|
||||
|
||||
enum {
|
||||
Flags = 0
|
||||
@@ -275,11 +277,11 @@ class TensorConvolutionOp : public TensorBase<TensorConvolutionOp<Indices, Input
|
||||
|
||||
/** \returns the nested expressions */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const typename internal::remove_all<typename InputXprType::Nested>::type&
|
||||
const internal::remove_all_t<typename InputXprType::Nested>&
|
||||
inputExpression() const { return m_input_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const typename internal::remove_all<typename KernelXprType::Nested>::type&
|
||||
const internal::remove_all_t<typename KernelXprType::Nested>&
|
||||
kernelExpression() const { return m_kernel_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -294,24 +296,24 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
{
|
||||
typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
|
||||
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
|
||||
static const int NumKernelDims = internal::array_size<Indices>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumKernelDims = internal::array_size<Indices>::value;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef StorageMemory<Scalar, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<InputArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = int(TensorEvaluator<InputArgType, Device>::IsAligned) & int(TensorEvaluator<KernelArgType, Device>::IsAligned),
|
||||
PacketAccess = int(TensorEvaluator<InputArgType, Device>::PacketAccess) & int(TensorEvaluator<KernelArgType, Device>::PacketAccess),
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<InputArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -777,18 +779,18 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
{
|
||||
typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
|
||||
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value;
|
||||
static const int NumKernelDims = internal::array_size<Indices>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value;
|
||||
static constexpr int NumKernelDims = internal::array_size<Indices>::value;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions KernelDimensions;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout;
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
|
||||
PacketAccess = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -818,7 +820,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
|
||||
typedef typename InputArgType::Scalar Scalar;
|
||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
static constexpr int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
|
||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; }
|
||||
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_SYCL_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_SYCL_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorConvolution
|
||||
@@ -275,9 +277,9 @@ template <typename Indices, typename InputArgType, typename KernelArgType>
|
||||
struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, Eigen::SyclDevice> {
|
||||
typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
|
||||
|
||||
static const int NumDims =
|
||||
static constexpr int NumDims =
|
||||
internal::array_size<typename TensorEvaluator<InputArgType, Eigen::SyclDevice>::Dimensions>::value;
|
||||
static const int NumKernelDims = internal::array_size<Indices>::value;
|
||||
static constexpr int NumKernelDims = internal::array_size<Indices>::value;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename TensorEvaluator<KernelArgType, Eigen::SyclDevice>::Dimensions KernelDimensions;
|
||||
@@ -285,18 +287,18 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Eigen::SyclDevice>::type PacketReturnType;
|
||||
typedef typename InputArgType::Scalar Scalar;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Eigen::SyclDevice> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
typedef StorageMemory<const CoeffReturnType, Eigen::SyclDevice> KernelStorage;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<InputArgType, Eigen::SyclDevice>::Layout;
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<InputArgType, Eigen::SyclDevice>::IsAligned &
|
||||
TensorEvaluator<KernelArgType, Eigen::SyclDevice>::IsAligned,
|
||||
PacketAccess = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<InputArgType, Eigen::SyclDevice>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -392,8 +394,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
const size_t numX = dimensions()[m_indices[0]];
|
||||
const size_t numP = dimensions().TotalSize() / numX;
|
||||
const auto input_dim = std::array<size_t, 2>{numX, numP};
|
||||
auto global_range = cl::sycl::range<2>{};
|
||||
auto local_range = cl::sycl::range<2>{};
|
||||
auto global_range = cl::sycl::range<2>{1, 1};
|
||||
auto local_range = cl::sycl::range<2>{1, 1};
|
||||
const size_t kernel_size = m_kernelImpl.dimensions().TotalSize();
|
||||
|
||||
m_device.parallel_for_setup(input_dim, global_range, local_range);
|
||||
@@ -423,8 +425,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
const size_t numP = dimensions().TotalSize() / (numX * numY);
|
||||
auto input_dim = std::array<size_t, 3>{numX, numY, numP};
|
||||
|
||||
auto global_range = cl::sycl::range<3>{};
|
||||
auto local_range = cl::sycl::range<3>{};
|
||||
auto global_range = cl::sycl::range<3>{1, 1, 1};
|
||||
auto local_range = cl::sycl::range<3>{1, 1, 1};
|
||||
|
||||
m_device.parallel_for_setup(input_dim, global_range, local_range);
|
||||
|
||||
@@ -467,8 +469,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
|
||||
internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
|
||||
|
||||
auto global_range = cl::sycl::range<3>{};
|
||||
auto local_range = cl::sycl::range<3>{};
|
||||
auto global_range = cl::sycl::range<3>{1, 1, 1};
|
||||
auto local_range = cl::sycl::range<3>{1, 1, 1};
|
||||
|
||||
m_device.parallel_for_setup(input_dim, global_range, local_range);
|
||||
auto local_memory_range = (local_range + kernel_size - 1);
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorEvaluator
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorCustomUnaryOp
|
||||
@@ -27,9 +29,9 @@ struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
|
||||
typedef typename XprType::StorageKind StorageKind;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = traits<XprType>::NumDimensions;
|
||||
static const int Layout = traits<XprType>::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = traits<XprType>::NumDimensions;
|
||||
static constexpr int Layout = traits<XprType>::Layout;
|
||||
typedef typename traits<XprType>::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -67,7 +69,7 @@ class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFun
|
||||
const CustomUnaryFunc& func() const { return m_func; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_expr; }
|
||||
|
||||
protected:
|
||||
@@ -82,22 +84,22 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
||||
{
|
||||
typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType;
|
||||
typedef typename internal::traits<ArgType>::Index Index;
|
||||
static const int NumDims = internal::traits<ArgType>::NumDimensions;
|
||||
static constexpr int NumDims = internal::traits<ArgType>::NumDimensions;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename internal::remove_const<typename ArgType::Scalar>::type Scalar;
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef std::remove_const_t<typename ArgType::Scalar> Scalar;
|
||||
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<XprType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<XprType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -191,12 +193,12 @@ struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
|
||||
typename traits<RhsXprType>::Index>::type Index;
|
||||
typedef typename LhsXprType::Nested LhsNested;
|
||||
typedef typename RhsXprType::Nested RhsNested;
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
static const int NumDimensions = traits<LhsXprType>::NumDimensions;
|
||||
static const int Layout = traits<LhsXprType>::Layout;
|
||||
typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
|
||||
typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType>::type PointerType;
|
||||
typedef std::remove_reference_t<LhsNested> LhsNested_;
|
||||
typedef std::remove_reference_t<RhsNested> RhsNested_;
|
||||
static constexpr int NumDimensions = traits<LhsXprType>::NumDimensions;
|
||||
static constexpr int Layout = traits<LhsXprType>::Layout;
|
||||
typedef std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
|
||||
typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType> PointerType;
|
||||
};
|
||||
|
||||
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
|
||||
@@ -234,11 +236,11 @@ class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinary
|
||||
const CustomBinaryFunc& func() const { return m_func; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename LhsXprType::Nested>::type&
|
||||
const internal::remove_all_t<typename LhsXprType::Nested>&
|
||||
lhsExpression() const { return m_lhs_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename RhsXprType::Nested>::type&
|
||||
const internal::remove_all_t<typename RhsXprType::Nested>&
|
||||
rhsExpression() const { return m_rhs_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -254,23 +256,23 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
||||
{
|
||||
typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType;
|
||||
typedef typename internal::traits<XprType>::Index Index;
|
||||
static const int NumDims = internal::traits<XprType>::NumDimensions;
|
||||
static constexpr int NumDims = internal::traits<XprType>::NumDimensions;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
|
||||
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<LhsXprType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<LhsXprType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorDevice
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H
|
||||
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// Default device for the machine (typically a single cpu core)
|
||||
@@ -39,6 +41,17 @@ struct DefaultDevice {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
||||
::memset(buffer, c, n);
|
||||
}
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
|
||||
#ifdef EIGEN_GPU_COMPILE_PHASE
|
||||
// std::fill is not a device function, so resort to simple loop.
|
||||
for (T* it = begin; it != end; ++it) {
|
||||
*it = value;
|
||||
}
|
||||
#else
|
||||
std::fill(begin, end, value);
|
||||
#endif
|
||||
}
|
||||
template<typename Type>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const {
|
||||
return data;
|
||||
@@ -82,6 +95,10 @@ struct DefaultDevice {
|
||||
return firstLevelCacheSize();
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
|
||||
// Nothing. Default device operations are synchronous.
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||
#if !defined(EIGEN_GPU_COMPILE_PHASE)
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
// A separate header (included at the end of this file) will undefine all
|
||||
#include "TensorGpuHipCudaDefines.h"
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
static const int kGpuScratchSize = 1024;
|
||||
@@ -128,7 +130,13 @@ class GpuStreamDevice : public StreamInterface {
|
||||
public:
|
||||
// Use the default stream on the current device
|
||||
GpuStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) {
|
||||
gpuGetDevice(&device_);
|
||||
gpuError_t status = gpuGetDevice(&device_);
|
||||
if (status != gpuSuccess) {
|
||||
std::cerr << "Failed to get the GPU devices "
|
||||
<< gpuGetErrorString(status)
|
||||
<< std::endl;
|
||||
gpu_assert(status == gpuSuccess);
|
||||
}
|
||||
}
|
||||
// Use the default stream on the specified device
|
||||
GpuStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) {}
|
||||
@@ -139,7 +147,13 @@ class GpuStreamDevice : public StreamInterface {
|
||||
GpuStreamDevice(const gpuStream_t* stream, int device = -1)
|
||||
: stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) {
|
||||
if (device < 0) {
|
||||
gpuGetDevice(&device_);
|
||||
gpuError_t status = gpuGetDevice(&device_);
|
||||
if (status != gpuSuccess) {
|
||||
std::cerr << "Failed to get the GPU devices "
|
||||
<< gpuGetErrorString(status)
|
||||
<< std::endl;
|
||||
gpu_assert(status == gpuSuccess);
|
||||
}
|
||||
} else {
|
||||
int num_devices;
|
||||
gpuError_t err = gpuGetDeviceCount(&num_devices);
|
||||
@@ -281,10 +295,49 @@ struct GpuDevice {
|
||||
EIGEN_UNUSED_VARIABLE(err)
|
||||
gpu_assert(err == gpuSuccess);
|
||||
#else
|
||||
EIGEN_UNUSED_VARIABLE(buffer)
|
||||
EIGEN_UNUSED_VARIABLE(c)
|
||||
EIGEN_UNUSED_VARIABLE(n)
|
||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
|
||||
#ifndef EIGEN_GPU_COMPILE_PHASE
|
||||
const size_t count = end - begin;
|
||||
// Split value into bytes and run memset with stride.
|
||||
const int value_size = sizeof(value);
|
||||
char* buffer = (char*)begin;
|
||||
char* value_bytes = (char*)(&value);
|
||||
gpuError_t err;
|
||||
EIGEN_UNUSED_VARIABLE(err)
|
||||
|
||||
// If all value bytes are equal, then a single memset can be much faster.
|
||||
bool use_single_memset = true;
|
||||
for (int i=1; i<value_size; ++i) {
|
||||
if (value_bytes[i] != value_bytes[0]) {
|
||||
use_single_memset = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (use_single_memset) {
|
||||
err = gpuMemsetAsync(buffer, value_bytes[0], count * sizeof(T), stream_->stream());
|
||||
gpu_assert(err == gpuSuccess);
|
||||
} else {
|
||||
for (int b=0; b<value_size; ++b) {
|
||||
err = gpuMemset2DAsync(buffer+b, value_size, value_bytes[b], 1, count, stream_->stream());
|
||||
gpu_assert(err == gpuSuccess);
|
||||
}
|
||||
}
|
||||
#else
|
||||
EIGEN_UNUSED_VARIABLE(begin)
|
||||
EIGEN_UNUSED_VARIABLE(end)
|
||||
EIGEN_UNUSED_VARIABLE(value)
|
||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE size_t numThreads() const {
|
||||
// FIXME
|
||||
return 32;
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
|
||||
#include <unordered_set>
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace TensorSycl {
|
||||
@@ -134,6 +136,15 @@ class QueueInterface {
|
||||
this->exception_caught_ = this->sycl_async_handler(l);
|
||||
},
|
||||
num_threads) {}
|
||||
|
||||
explicit QueueInterface(
|
||||
const cl::sycl::queue& q, unsigned num_threads = std::thread::hardware_concurrency())
|
||||
: m_queue(q),
|
||||
#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS
|
||||
m_prog(m_queue.get_context(), get_sycl_supported_devices()),
|
||||
#endif
|
||||
m_thread_pool(num_threads),
|
||||
m_device_info(m_queue) {}
|
||||
|
||||
#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS
|
||||
EIGEN_STRONG_INLINE cl::sycl::program &program() const { return m_prog; }
|
||||
@@ -244,7 +255,7 @@ class QueueInterface {
|
||||
}
|
||||
|
||||
/// The memcpyHostToDevice is used to copy the data from host to device
|
||||
/// The destination pointer could be deleted before the copy happend which is
|
||||
/// The destination pointer could be deleted before the copy happened which is
|
||||
/// why a callback function is needed. By default if none is provided, the
|
||||
/// function is blocking.
|
||||
EIGEN_STRONG_INLINE void memcpyHostToDevice(
|
||||
@@ -272,7 +283,7 @@ class QueueInterface {
|
||||
}
|
||||
|
||||
/// The memcpyDeviceToHost is used to copy the data from device to host.
|
||||
/// The source pointer could be deleted before the copy happend which is
|
||||
/// The source pointer could be deleted before the copy happened which is
|
||||
/// why a callback function is needed. By default if none is provided, the
|
||||
/// function is blocking.
|
||||
EIGEN_STRONG_INLINE void memcpyDeviceToHost(
|
||||
@@ -327,13 +338,27 @@ class QueueInterface {
|
||||
if (n == 0) {
|
||||
return;
|
||||
}
|
||||
n /= sizeof(buffer_scalar_t);
|
||||
auto f = [&](cl::sycl::handler &cgh) {
|
||||
auto dst_acc = get_range_accessor<write_mode>(cgh, data, n);
|
||||
// The cast to uint8_t is here to match the behaviour of the standard
|
||||
// memset. The cast to buffer_scalar_t is needed to match the type of the
|
||||
// accessor (in case buffer_scalar_t is not uint8_t)
|
||||
cgh.fill(dst_acc, static_cast<buffer_scalar_t>(static_cast<uint8_t>(c)));
|
||||
// Get a typed range accesser to ensure we fill each byte, in case
|
||||
// `buffer_scalar_t` is not (u)int8_t.
|
||||
auto dst_acc = get_typed_range_accessor<write_mode, uint8_t>(cgh, data, n);
|
||||
cgh.fill(dst_acc, static_cast<uint8_t>(c));
|
||||
};
|
||||
cl::sycl::event e;
|
||||
EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f));
|
||||
async_synchronize(e);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
|
||||
static const auto write_mode = cl::sycl::access::mode::discard_write;
|
||||
if (begin == end) {
|
||||
return;
|
||||
}
|
||||
const ptrdiff_t count = end - begin;
|
||||
auto f = [&](cl::sycl::handler &cgh) {
|
||||
auto dst_acc = get_typed_range_accessor<write_mode, T>(cgh, begin, count);
|
||||
cgh.fill(dst_acc, value);
|
||||
};
|
||||
cl::sycl::event e;
|
||||
EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f));
|
||||
@@ -359,15 +384,17 @@ class QueueInterface {
|
||||
|
||||
auto original_buffer = pMapper.get_buffer(ptr);
|
||||
const ptrdiff_t offset = pMapper.get_offset(ptr);
|
||||
eigen_assert(offset % sizeof(T) == 0 && "The offset must be a multiple of sizeof(T)");
|
||||
eigen_assert(original_buffer.get_size() % sizeof(T) == 0 && "The buffer size must be a multiple of sizeof(T)");
|
||||
const ptrdiff_t typed_offset = offset / sizeof(T);
|
||||
eigen_assert(typed_offset >= 0);
|
||||
const auto typed_size = original_buffer.get_size() / sizeof(T);
|
||||
auto buffer = original_buffer.template reinterpret<
|
||||
typename Eigen::internal::remove_const<T>::type>(
|
||||
std::remove_const_t<T>>(
|
||||
cl::sycl::range<1>(typed_size));
|
||||
const ptrdiff_t size = buffer.get_count() - typed_offset;
|
||||
eigen_assert(size >= 0);
|
||||
typedef cl::sycl::accessor<typename Eigen::internal::remove_const<T>::type,
|
||||
typedef cl::sycl::accessor<std::remove_const_t<T>,
|
||||
1, AcMd, global_access, is_place_holder>
|
||||
placeholder_accessor_t;
|
||||
const auto start_ptr = static_cast<internal_ptr_t>(ptr) - offset;
|
||||
@@ -395,6 +422,40 @@ class QueueInterface {
|
||||
cgh, cl::sycl::range<1>(n_bytes), cl::sycl::id<1>(offset));
|
||||
}
|
||||
|
||||
/// Get a range accessor to the virtual pointer's device memory with a
|
||||
/// specified type and count.
|
||||
template <cl::sycl::access::mode AcMd, typename T, typename Index>
|
||||
EIGEN_STRONG_INLINE cl::sycl::accessor<
|
||||
T, 1, AcMd, cl::sycl::access::target::global_buffer>
|
||||
get_typed_range_accessor(cl::sycl::handler &cgh, const void *ptr,
|
||||
const Index count) const {
|
||||
static const auto global_access = cl::sycl::access::target::global_buffer;
|
||||
eigen_assert(count >= 0);
|
||||
std::lock_guard<std::mutex> lock(pmapper_mutex_);
|
||||
auto buffer = pMapper.get_buffer(ptr);
|
||||
const ptrdiff_t offset = pMapper.get_offset(ptr);
|
||||
eigen_assert(offset >= 0);
|
||||
|
||||
// Technically we should create a subbuffer for the desired range,
|
||||
// then reinterpret that. However, I was not able to get changes to reflect
|
||||
// in the original buffer (only the subbuffer and reinterpretted buffer).
|
||||
// This current implementation now has the restriction that the buffer
|
||||
// offset and original buffer size must be a multiple of sizeof(T).
|
||||
// Note that get_range_accessor(void*) currently has the same restriction.
|
||||
//
|
||||
// auto subbuffer = cl::sycl::buffer<buffer_scalar_t, 1>(buffer,
|
||||
// cl::sycl::id<1>(offset), cl::sycl::range<1>(n_bytes));
|
||||
eigen_assert(offset % sizeof(T) == 0 && "The offset must be a multiple of sizeof(T)");
|
||||
eigen_assert(buffer.get_size() % sizeof(T) == 0 && "The buffer size must be a multiple of sizeof(T)");
|
||||
const ptrdiff_t typed_offset = offset / sizeof(T);
|
||||
const size_t typed_size = buffer.get_size() / sizeof(T);
|
||||
auto reint = buffer.template reinterpret<
|
||||
std::remove_const_t<T>>(
|
||||
cl::sycl::range<1>(typed_size));
|
||||
return reint.template get_access<AcMd, global_access>(
|
||||
cgh, cl::sycl::range<1>(count), cl::sycl::id<1>(typed_offset));
|
||||
}
|
||||
|
||||
/// Creation of sycl accessor for a buffer. This function first tries to find
|
||||
/// the buffer in the buffer_map. If found it gets the accessor from it, if
|
||||
/// not, the function then adds an entry by creating a sycl buffer for that
|
||||
@@ -663,7 +724,7 @@ class QueueInterface {
|
||||
EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
|
||||
|
||||
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
|
||||
// OpenCL doesnot have such concept
|
||||
// OpenCL does not have such a concept
|
||||
return 2;
|
||||
}
|
||||
|
||||
@@ -951,6 +1012,11 @@ struct SyclDevice : public SyclDeviceBase {
|
||||
EIGEN_STRONG_INLINE void memset(void *data, int c, size_t n) const {
|
||||
queue_stream()->memset(data, c, n);
|
||||
}
|
||||
/// the fill function
|
||||
template<typename T>
|
||||
EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
|
||||
queue_stream()->fill(begin, end, value);
|
||||
}
|
||||
/// returning the sycl queue
|
||||
EIGEN_STRONG_INLINE cl::sycl::queue &sycl_queue() const {
|
||||
return queue_stream()->sycl_queue();
|
||||
@@ -978,7 +1044,7 @@ struct SyclDevice : public SyclDeviceBase {
|
||||
return queue_stream()->maxWorkItemSizes();
|
||||
}
|
||||
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
|
||||
// OpenCL doesnot have such concept
|
||||
// OpenCL does not have such a concept
|
||||
return queue_stream()->maxSyclThreadsPerMultiProcessor();
|
||||
}
|
||||
EIGEN_STRONG_INLINE size_t sharedMemPerBlock() const {
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H)
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// Runs an arbitrary function and then calls Notify() on the passed in
|
||||
@@ -122,6 +124,11 @@ struct ThreadPoolDevice {
|
||||
::memset(buffer, c, n);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
|
||||
std::fill(begin, end, value);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE int numThreads() const {
|
||||
return num_threads_;
|
||||
}
|
||||
@@ -140,6 +147,10 @@ struct ThreadPoolDevice {
|
||||
// The l3 cache size is shared between all the cores.
|
||||
return l3CacheSize() / num_threads_;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
|
||||
// Nothing. Threadpool device operations are synchronous.
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||
// Should return an enum that encodes the ISA supported by the CPU
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \internal
|
||||
@@ -43,8 +45,6 @@ template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(c
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
#if EIGEN_HAS_CONSTEXPR
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_known_statically_impl<DimensionList<Index, Rank> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) {
|
||||
@@ -136,99 +136,6 @@ struct index_statically_lt_impl<const DimensionList<Index, Rank> > {
|
||||
}
|
||||
};
|
||||
|
||||
#else
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_known_statically_impl<DimensionList<Index, Rank> > {
|
||||
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_known_statically_impl<const DimensionList<Index, Rank> > {
|
||||
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct all_indices_known_statically_impl<DimensionList<Index, Rank> > {
|
||||
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct all_indices_known_statically_impl<const DimensionList<Index, Rank> > {
|
||||
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct indices_statically_known_to_increase_impl<DimensionList<Index, Rank> > {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct indices_statically_known_to_increase_impl<const DimensionList<Index, Rank> > {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_eq_impl<DimensionList<Index, Rank> > {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_eq_impl<const DimensionList<Index, Rank> > {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_ne_impl<DimensionList<Index, Rank> > {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex){
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_ne_impl<const DimensionList<Index, Rank> > {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_gt_impl<DimensionList<Index, Rank> > {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_gt_impl<const DimensionList<Index, Rank> > {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_lt_impl<DimensionList<Index, Rank> > {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_lt_impl<const DimensionList<Index, Rank> > {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H
|
||||
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \internal
|
||||
@@ -109,12 +111,10 @@ struct Sizes {
|
||||
explicit EIGEN_DEVICE_FUNC Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
|
||||
// todo: add assertion
|
||||
}
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { }
|
||||
explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
|
||||
// todo: add assertion
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename T> Sizes& operator = (const T& /*other*/) {
|
||||
// add assertion failure if the size of other is different
|
||||
@@ -171,28 +171,16 @@ template <std::ptrdiff_t V1=0, std::ptrdiff_t V2=0, std::ptrdiff_t V3=0, std::pt
|
||||
explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
|
||||
// todo: add assertion
|
||||
}
|
||||
|
||||
template <typename T> Sizes& operator = (const T& /*other*/) {
|
||||
// add assertion failure if the size of other is different
|
||||
return *this;
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { }
|
||||
explicit Sizes(std::initializer_list<std::ptrdiff_t>) {
|
||||
// todo: add assertion
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) {
|
||||
}
|
||||
EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex) {
|
||||
}
|
||||
EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex) {
|
||||
}
|
||||
EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
|
||||
}
|
||||
EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index operator[] (const Index index) const {
|
||||
switch (index) {
|
||||
@@ -296,20 +284,19 @@ struct DSizes : array<DenseIndex, NumDims> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit DSizes(const array<OtherIndex, NumDims>& other,
|
||||
// Default template parameters require c++11.
|
||||
typename internal::enable_if<
|
||||
std::enable_if_t<
|
||||
internal::is_same<
|
||||
DenseIndex,
|
||||
typename internal::promote_index_type<
|
||||
DenseIndex,
|
||||
OtherIndex
|
||||
>::type
|
||||
>::value, void*>::type = 0) {
|
||||
>::value, void*> = 0) {
|
||||
for (int i = 0; i < NumDims; ++i) {
|
||||
(*this)[i] = static_cast<DenseIndex>(other[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef EIGEN_HAS_INDEX_LIST
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit DSizes(const Eigen::IndexList<FirstType, OtherTypes...>& dimensions) {
|
||||
@@ -317,7 +304,6 @@ struct DSizes : array<DenseIndex, NumDims> {
|
||||
(*this)[i] = dimensions[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_EMULATE_CXX11_META_H
|
||||
template <typename std::ptrdiff_t... Indices>
|
||||
@@ -335,39 +321,10 @@ struct DSizes : array<DenseIndex, NumDims> {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension, IndexTypes... otherDimensions) : Base({{firstDimension, secondDimension, otherDimensions...}}) {
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1) {
|
||||
eigen_assert(NumDims == 2);
|
||||
(*this)[0] = i0;
|
||||
(*this)[1] = i1;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) {
|
||||
eigen_assert(NumDims == 3);
|
||||
(*this)[0] = i0;
|
||||
(*this)[1] = i1;
|
||||
(*this)[2] = i2;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) {
|
||||
eigen_assert(NumDims == 4);
|
||||
(*this)[0] = i0;
|
||||
(*this)[1] = i1;
|
||||
(*this)[2] = i2;
|
||||
(*this)[3] = i3;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) {
|
||||
eigen_assert(NumDims == 5);
|
||||
(*this)[0] = i0;
|
||||
(*this)[1] = i1;
|
||||
(*this)[2] = i2;
|
||||
(*this)[3] = i3;
|
||||
(*this)[4] = i4;
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC DSizes& operator = (const array<DenseIndex, NumDims>& other) {
|
||||
*static_cast<Base*>(this) = other;
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorForcedEval
|
||||
@@ -29,9 +31,9 @@ struct traits<TensorEvalToOp<XprType, MakePointer_> >
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename MakePointer_<Scalar>::Type PointerType;
|
||||
|
||||
enum {
|
||||
@@ -70,19 +72,19 @@ class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType, MakePointer_>,
|
||||
public:
|
||||
typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
|
||||
typedef typename MakePointer_<CoeffReturnType>::Type PointerType;
|
||||
typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index;
|
||||
|
||||
static const int NumDims = Eigen::internal::traits<TensorEvalToOp>::NumDimensions;
|
||||
static constexpr int NumDims = Eigen::internal::traits<TensorEvalToOp>::NumDimensions;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr)
|
||||
: m_xpr(expr), m_buffer(buffer) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; }
|
||||
@@ -101,9 +103,9 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
||||
typedef typename ArgType::Scalar Scalar;
|
||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
@@ -112,12 +114,12 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = true,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
static const int NumDims = internal::traits<ArgType>::NumDimensions;
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
static constexpr int NumDims = internal::traits<ArgType>::NumDimensions;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorEvaluator
|
||||
@@ -33,26 +35,26 @@ struct TensorEvaluator
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
typedef typename Derived::Dimensions Dimensions;
|
||||
typedef Derived XprType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
|
||||
typedef StorageMemory<Scalar, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
// NumDimensions is -1 for variable dim tensors
|
||||
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
|
||||
internal::traits<Derived>::NumDimensions : 0;
|
||||
static constexpr int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
|
||||
internal::traits<Derived>::NumDimensions : 0;
|
||||
static constexpr int Layout = Derived::Layout;
|
||||
|
||||
enum {
|
||||
IsAligned = Derived::IsAligned,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
|
||||
BlockAccess = internal::is_arithmetic<std::remove_const_t<Scalar>>::value,
|
||||
PreferBlockAccess = false,
|
||||
Layout = Derived::Layout,
|
||||
CoordAccess = NumCoords > 0,
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
|
||||
@@ -73,7 +75,7 @@ struct TensorEvaluator
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
|
||||
|
||||
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
|
||||
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
|
||||
if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && dest) {
|
||||
m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
|
||||
return false;
|
||||
}
|
||||
@@ -113,7 +115,7 @@ struct TensorEvaluator
|
||||
// float element will be loaded, otherwise 0 will be loaded.
|
||||
// Function has been templatized to enable Sfinae.
|
||||
template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
|
||||
std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
|
||||
partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
|
||||
{
|
||||
return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
|
||||
@@ -157,14 +159,14 @@ struct TensorEvaluator
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
assert(m_data != NULL);
|
||||
eigen_assert(m_data != NULL);
|
||||
return TensorBlock::materialize(m_data, m_dims, desc, scratch);
|
||||
}
|
||||
|
||||
template<typename TensorBlock>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
|
||||
const TensorBlockDesc& desc, const TensorBlock& block) {
|
||||
assert(m_data != NULL);
|
||||
eigen_assert(m_data != NULL);
|
||||
|
||||
typedef typename TensorBlock::XprType TensorBlockExpr;
|
||||
typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
|
||||
@@ -192,7 +194,7 @@ struct TensorEvaluator
|
||||
const Device EIGEN_DEVICE_REF m_device;
|
||||
};
|
||||
|
||||
namespace {
|
||||
namespace internal {
|
||||
template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T loadConstant(const T* address) {
|
||||
return *address;
|
||||
@@ -219,8 +221,7 @@ T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address
|
||||
return *address;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
// Default evaluator for rvalues
|
||||
template<typename Derived, typename Device>
|
||||
@@ -236,19 +237,19 @@ struct TensorEvaluator<const Derived, Device>
|
||||
typedef StorageMemory<const Scalar, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
// NumDimensions is -1 for variable dim tensors
|
||||
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
|
||||
internal::traits<Derived>::NumDimensions : 0;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
|
||||
internal::traits<Derived>::NumDimensions : 0;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int Layout = Derived::Layout;
|
||||
|
||||
enum {
|
||||
IsAligned = Derived::IsAligned,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
|
||||
PreferBlockAccess = false,
|
||||
Layout = Derived::Layout,
|
||||
CoordAccess = NumCoords > 0,
|
||||
RawAccess = true
|
||||
};
|
||||
@@ -269,7 +270,7 @@ struct TensorEvaluator<const Derived, Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
|
||||
|
||||
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
|
||||
if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && data) {
|
||||
m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
|
||||
return false;
|
||||
}
|
||||
@@ -289,7 +290,7 @@ struct TensorEvaluator<const Derived, Device>
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||
eigen_assert(m_data != NULL);
|
||||
return loadConstant(m_data+index);
|
||||
return internal::loadConstant(m_data+index);
|
||||
}
|
||||
|
||||
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -304,7 +305,7 @@ struct TensorEvaluator<const Derived, Device>
|
||||
// float element will be loaded, otherwise 0 will be loaded.
|
||||
// Function has been templatized to enable Sfinae.
|
||||
template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
|
||||
std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
|
||||
partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
|
||||
{
|
||||
return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
|
||||
@@ -314,7 +315,7 @@ struct TensorEvaluator<const Derived, Device>
|
||||
eigen_assert(m_data != NULL);
|
||||
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
|
||||
: m_dims.IndexOfRowMajor(coords);
|
||||
return loadConstant(m_data+index);
|
||||
return internal::loadConstant(m_data+index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
||||
@@ -330,7 +331,7 @@ struct TensorEvaluator<const Derived, Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
assert(m_data != NULL);
|
||||
eigen_assert(m_data != NULL);
|
||||
return TensorBlock::materialize(m_data, m_dims, desc, scratch);
|
||||
}
|
||||
|
||||
@@ -365,11 +366,12 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = true,
|
||||
PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
|
||||
@@ -379,7 +381,6 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
|
||||
,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -443,13 +444,13 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
||||
{
|
||||
typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||
PacketAccess = int(TensorEvaluator<ArgType, Device>::PacketAccess) &
|
||||
int(internal::functor_traits<UnaryOp>::PacketAccess),
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -462,14 +463,14 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
||||
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
static const int NumDims = internal::array_size<Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<Dimensions>::value;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
@@ -555,6 +556,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
{
|
||||
typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
|
||||
int(TensorEvaluator<RightArgType, Device>::IsAligned),
|
||||
@@ -565,7 +567,6 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
int(TensorEvaluator<RightArgType, Device>::BlockAccess),
|
||||
PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
|
||||
int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -584,12 +585,12 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static const int NumDims = internal::array_size<
|
||||
static constexpr int NumDims = internal::array_size<
|
||||
typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
@@ -693,6 +694,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
|
||||
{
|
||||
typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<Arg1Type, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess &&
|
||||
@@ -703,7 +705,6 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
|
||||
PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<Arg2Type, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<Arg3Type, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<Arg1Type, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -736,7 +737,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
@@ -811,6 +812,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<IfArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned &
|
||||
TensorEvaluator<ElseArgType, Device>::IsAligned,
|
||||
@@ -823,7 +825,6 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<ThenArgType, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<ElseArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<IfArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -842,12 +843,12 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static const int NumDims = internal::array_size<Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<Dimensions>::value;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/**
|
||||
@@ -165,12 +167,12 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
|
||||
/*Tiling=*/TiledEvaluation::On> {
|
||||
public:
|
||||
typedef typename traits<Expression>::Scalar Scalar;
|
||||
typedef typename remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
typedef TensorEvaluator<Expression, DefaultDevice> Evaluator;
|
||||
typedef typename traits<Expression>::Index StorageIndex;
|
||||
|
||||
static const int NumDims = traits<Expression>::NumDimensions;
|
||||
static constexpr int NumDims = traits<Expression>::NumDimensions;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(const Expression& expr,
|
||||
@@ -282,7 +284,7 @@ struct EvalRange {
|
||||
|
||||
template <typename Evaluator, typename StorageIndex>
|
||||
struct EvalRange<Evaluator, StorageIndex, /*Vectorizable*/ true> {
|
||||
static const int PacketSize =
|
||||
static constexpr int PacketSize =
|
||||
unpacket_traits<typename Evaluator::PacketReturnType>::size;
|
||||
|
||||
static void run(Evaluator* evaluator_in, const StorageIndex firstIdx,
|
||||
@@ -351,9 +353,9 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
|
||||
public:
|
||||
typedef typename traits<Expression>::Index IndexType;
|
||||
typedef typename traits<Expression>::Scalar Scalar;
|
||||
typedef typename remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
static const int NumDims = traits<Expression>::NumDimensions;
|
||||
static constexpr int NumDims = traits<Expression>::NumDimensions;
|
||||
|
||||
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
|
||||
typedef TensorBlockMapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
|
||||
@@ -459,9 +461,9 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
|
||||
public:
|
||||
typedef typename traits<Expression>::Index IndexType;
|
||||
typedef typename traits<Expression>::Scalar Scalar;
|
||||
typedef typename remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
static const int NumDims = traits<Expression>::NumDimensions;
|
||||
static constexpr int NumDims = traits<Expression>::NumDimensions;
|
||||
|
||||
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
|
||||
typedef TensorBlockMapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
|
||||
@@ -551,11 +553,59 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable, Tiling> {
|
||||
};
|
||||
|
||||
#if defined(EIGEN_GPUCC)
|
||||
// Returns 1 if lhs + rhs would overflow, -1 if it would underflow, otherwise 0.
|
||||
template <typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int sum_will_overflow(Index lhs,
|
||||
Index rhs) {
|
||||
const Index highest = NumTraits<Index>::highest();
|
||||
const Index lowest = NumTraits<Index>::lowest();
|
||||
if (lhs > 0 && rhs > 0) {
|
||||
return lhs > highest - rhs ? 1 : 0;
|
||||
} else if (lhs < 0 && rhs < 0) {
|
||||
return lhs < lowest - rhs ? -1 : 0;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns lhs + rhs, saturating to the highest/lowest representable value on
|
||||
// overflow/underflow respectively.
|
||||
template <typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index saturate_add(Index lhs, Index rhs) {
|
||||
const Index highest = NumTraits<Index>::highest();
|
||||
const Index lowest = NumTraits<Index>::lowest();
|
||||
int overflow = sum_will_overflow(lhs, rhs);
|
||||
return overflow == 1 ? highest : overflow == -1 ? lowest : lhs + rhs;
|
||||
}
|
||||
|
||||
// A functor that adds step_size to a given index, saturating to avoid
|
||||
// overflow/underflow. If overflow/underflow is not possible, regular addition
|
||||
// is used (for efficiency).
|
||||
template <typename Index>
|
||||
struct SafeStep {
|
||||
// lastIdx is one past the end of the possible indexes.
|
||||
// step_size is the value that will be added to the given index when the
|
||||
// functor is called.
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SafeStep(Index lastIdx, Index step_size)
|
||||
: can_overflow_(sum_will_overflow(lastIdx, step_size)),
|
||||
step_size_(step_size) {}
|
||||
|
||||
// Adds step_size to index, saturating on overflow (if overflow is possible).
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index operator()(Index index) const {
|
||||
return can_overflow_ ? saturate_add(index, step_size_) : index + step_size_;
|
||||
}
|
||||
|
||||
private:
|
||||
const bool can_overflow_;
|
||||
const Index step_size_;
|
||||
};
|
||||
|
||||
template <typename Evaluator, typename StorageIndex, bool Vectorizable>
|
||||
struct EigenMetaKernelEval {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
void run(Evaluator& eval, StorageIndex firstIdx, StorageIndex lastIdx, StorageIndex step_size) {
|
||||
for (StorageIndex i = firstIdx; i < lastIdx; i += step_size) {
|
||||
SafeStep<StorageIndex> safe_step(lastIdx, step_size);
|
||||
for (StorageIndex i = firstIdx; i < lastIdx; i = safe_step(i)) {
|
||||
eval.evalScalar(i);
|
||||
}
|
||||
}
|
||||
@@ -569,12 +619,16 @@ struct EigenMetaKernelEval<Evaluator, StorageIndex, true> {
|
||||
const StorageIndex vectorized_size = (lastIdx / PacketSize) * PacketSize;
|
||||
const StorageIndex vectorized_step_size = step_size * PacketSize;
|
||||
|
||||
SafeStep<StorageIndex> safe_vectorized_step(vectorized_size,
|
||||
vectorized_step_size);
|
||||
// Use the vector path
|
||||
for (StorageIndex i = firstIdx * PacketSize; i < vectorized_size;
|
||||
i += vectorized_step_size) {
|
||||
i = safe_vectorized_step(i)) {
|
||||
eval.evalPacket(i);
|
||||
}
|
||||
for (StorageIndex i = vectorized_size + firstIdx; i < lastIdx; i += step_size) {
|
||||
SafeStep<StorageIndex> safe_step(lastIdx, step_size);
|
||||
for (StorageIndex i = saturate_add(vectorized_size, firstIdx); i < lastIdx;
|
||||
i = safe_step(i)) {
|
||||
eval.evalScalar(i);
|
||||
}
|
||||
}
|
||||
@@ -601,8 +655,11 @@ EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Til
|
||||
if (needs_assign) {
|
||||
|
||||
const int block_size = device.maxGpuThreadsPerBlock();
|
||||
const int max_blocks = device.getNumGpuMultiProcessors() *
|
||||
device.maxGpuThreadsPerMultiProcessor() / block_size;
|
||||
const int max_blocks =
|
||||
numext::mini<int64_t>(device.getNumGpuMultiProcessors() *
|
||||
device.maxGpuThreadsPerMultiProcessor(),
|
||||
NumTraits<StorageIndex>::highest()) /
|
||||
block_size;
|
||||
const StorageIndex size = array_prod(evaluator.dimensions());
|
||||
// Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
|
||||
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size)), 1);
|
||||
@@ -635,7 +692,7 @@ struct ExecExprFunctorKernel {
|
||||
compute(itemID);
|
||||
}
|
||||
template <bool is_vec = Evaluator::PacketAccess>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<!is_vec>::type
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t<!is_vec>
|
||||
compute(const cl::sycl::nd_item<1>& itemID) {
|
||||
Index gId = static_cast<Index>(itemID.get_global_linear_id());
|
||||
Index total_threads = itemID.get_global_range(0);
|
||||
@@ -645,7 +702,7 @@ struct ExecExprFunctorKernel {
|
||||
}
|
||||
}
|
||||
template <bool is_vec = Evaluator::PacketAccess>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<is_vec>::type
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t<is_vec>
|
||||
compute(const cl::sycl::nd_item<1>& itemID) {
|
||||
const Index vectorizedRange =
|
||||
(range / Evaluator::PacketSize) * Evaluator::PacketSize;
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorExpr
|
||||
@@ -35,9 +37,9 @@ struct traits<TensorCwiseNullaryOp<NullaryOp, XprType> >
|
||||
typedef traits<XprType> XprTraits;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::Nested XprTypeNested;
|
||||
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<XprTypeNested> XprTypeNested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
enum {
|
||||
Flags = 0
|
||||
@@ -63,7 +65,7 @@ class TensorCwiseNullaryOp : public TensorBase<TensorCwiseNullaryOp<NullaryOp, X
|
||||
: m_xpr(xpr), m_functor(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
nestedExpression() const { return m_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -86,9 +88,9 @@ struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> >
|
||||
typedef typename result_of<UnaryOp(typename XprType::Scalar)>::type Scalar;
|
||||
typedef traits<XprType> XprTraits;
|
||||
typedef typename XprType::Nested XprTypeNested;
|
||||
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<XprTypeNested> XprTypeNested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename TypeConversion<Scalar,
|
||||
typename XprTraits::PointerType
|
||||
>::type
|
||||
@@ -132,7 +134,7 @@ class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
nestedExpression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -161,14 +163,14 @@ struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >
|
||||
typename traits<RhsXprType>::Index>::type Index;
|
||||
typedef typename LhsXprType::Nested LhsNested;
|
||||
typedef typename RhsXprType::Nested RhsNested;
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<LhsNested> LhsNested_;
|
||||
typedef std::remove_reference_t<RhsNested> RhsNested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename TypeConversion<Scalar,
|
||||
typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
|
||||
std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
|
||||
typename traits<LhsXprType>::PointerType,
|
||||
typename traits<RhsXprType>::PointerType>::type
|
||||
typename traits<RhsXprType>::PointerType>
|
||||
>::type
|
||||
PointerType;
|
||||
enum {
|
||||
@@ -213,11 +215,11 @@ class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsX
|
||||
|
||||
/** \returns the nested expressions */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename LhsXprType::Nested>::type&
|
||||
const internal::remove_all_t<typename LhsXprType::Nested>&
|
||||
lhsExpression() const { return m_lhs_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename RhsXprType::Nested>::type&
|
||||
const internal::remove_all_t<typename RhsXprType::Nested>&
|
||||
rhsExpression() const { return m_rhs_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -242,15 +244,15 @@ struct traits<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprT
|
||||
typedef typename Arg1XprType::Nested Arg1Nested;
|
||||
typedef typename Arg2XprType::Nested Arg2Nested;
|
||||
typedef typename Arg3XprType::Nested Arg3Nested;
|
||||
typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
|
||||
typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
|
||||
typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Arg1Nested> Arg1Nested_;
|
||||
typedef std::remove_reference_t<Arg2Nested> Arg2Nested_;
|
||||
typedef std::remove_reference_t<Arg3Nested> Arg3Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename TypeConversion<Scalar,
|
||||
typename conditional<Pointer_type_promotion<typename Arg2XprType::Scalar, Scalar>::val,
|
||||
std::conditional_t<Pointer_type_promotion<typename Arg2XprType::Scalar, Scalar>::val,
|
||||
typename traits<Arg2XprType>::PointerType,
|
||||
typename traits<Arg3XprType>::PointerType>::type
|
||||
typename traits<Arg3XprType>::PointerType>
|
||||
>::type
|
||||
PointerType;
|
||||
enum {
|
||||
@@ -293,15 +295,15 @@ class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, A
|
||||
|
||||
/** \returns the nested expressions */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename Arg1XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename Arg1XprType::Nested>&
|
||||
arg1Expression() const { return m_arg1_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename Arg2XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename Arg2XprType::Nested>&
|
||||
arg2Expression() const { return m_arg2_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename Arg3XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename Arg3XprType::Nested>&
|
||||
arg3Expression() const { return m_arg3_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -326,11 +328,11 @@ struct traits<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >
|
||||
typedef typename IfXprType::Nested IfNested;
|
||||
typedef typename ThenXprType::Nested ThenNested;
|
||||
typedef typename ElseXprType::Nested ElseNested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef typename conditional<Pointer_type_promotion<typename ThenXprType::Scalar, Scalar>::val,
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef std::conditional_t<Pointer_type_promotion<typename ThenXprType::Scalar, Scalar>::val,
|
||||
typename traits<ThenXprType>::PointerType,
|
||||
typename traits<ElseXprType>::PointerType>::type PointerType;
|
||||
typename traits<ElseXprType>::PointerType> PointerType;
|
||||
};
|
||||
|
||||
template<typename IfXprType, typename ThenXprType, typename ElseXprType>
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorFFT
|
||||
@@ -60,13 +62,13 @@ struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits
|
||||
typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar;
|
||||
typedef typename std::complex<RealScalar> ComplexScalar;
|
||||
typedef typename XprTraits::Scalar InputScalar;
|
||||
typedef typename conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
|
||||
typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename traits<XprType>::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -88,7 +90,7 @@ class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, F
|
||||
typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename std::complex<RealScalar> ComplexScalar;
|
||||
typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
|
||||
typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
|
||||
typedef OutputScalar CoeffReturnType;
|
||||
typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind;
|
||||
@@ -101,7 +103,7 @@ class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, F
|
||||
const FFT& fft() const { return m_fft; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& expression() const {
|
||||
const internal::remove_all_t<typename XprType::Nested>& expression() const {
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
@@ -115,7 +117,7 @@ template <typename FFT, typename ArgType, typename Device, int FFTResultType, in
|
||||
struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> {
|
||||
typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
@@ -123,19 +125,19 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
|
||||
typedef internal::traits<XprType> XprTraits;
|
||||
typedef typename XprTraits::Scalar InputScalar;
|
||||
typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
|
||||
typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
|
||||
typedef OutputScalar CoeffReturnType;
|
||||
typedef typename PacketType<OutputScalar, Device>::type PacketReturnType;
|
||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
static constexpr int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = true,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorFixedSize
|
||||
@@ -36,14 +38,14 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
|
||||
static const int Options = Options_;
|
||||
static constexpr int Options = Options_;
|
||||
static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
|
||||
|
||||
enum {
|
||||
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0),
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = Options_ & RowMajor ? RowMajor : ColMajor,
|
||||
CoordAccess = true,
|
||||
RawAccess = true
|
||||
};
|
||||
@@ -53,7 +55,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
typedef Dimensions_ Dimensions;
|
||||
static const std::size_t NumIndices = Dimensions::count;
|
||||
static constexpr std::size_t NumIndices = Dimensions::count;
|
||||
|
||||
protected:
|
||||
TensorStorage<Scalar, Dimensions, Options> m_storage;
|
||||
@@ -61,7 +63,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions dimensions() const { return m_storage.dimensions(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); }
|
||||
@@ -72,7 +74,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
inline Self& base() { return *this; }
|
||||
inline const Self& base() const { return *this; }
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const
|
||||
{
|
||||
@@ -80,7 +81,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return coeff(array<Index, NumIndices>{{firstIndex, otherIndices...}});
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const
|
||||
@@ -104,7 +104,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
}
|
||||
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices)
|
||||
{
|
||||
@@ -112,7 +111,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return coeffRef(array<Index, NumIndices>{{firstIndex, otherIndices...}});
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
|
||||
@@ -135,7 +133,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
return m_storage.data()[0];
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const
|
||||
{
|
||||
@@ -143,53 +140,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
|
||||
{
|
||||
if (Options&RowMajor) {
|
||||
const Index index = i1 + i0 * m_storage.dimensions()[1];
|
||||
return m_storage.data()[index];
|
||||
} else {
|
||||
const Index index = i0 + i1 * m_storage.dimensions()[0];
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
|
||||
{
|
||||
if (Options&RowMajor) {
|
||||
const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0);
|
||||
return m_storage.data()[index];
|
||||
} else {
|
||||
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2);
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
|
||||
{
|
||||
if (Options&RowMajor) {
|
||||
const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0));
|
||||
return m_storage.data()[index];
|
||||
} else {
|
||||
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3));
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
|
||||
{
|
||||
if (Options&RowMajor) {
|
||||
const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)));
|
||||
return m_storage.data()[index];
|
||||
} else {
|
||||
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4)));
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
|
||||
@@ -220,7 +170,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
return coeff(index);
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
|
||||
{
|
||||
@@ -228,52 +177,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
|
||||
{
|
||||
if (Options&RowMajor) {
|
||||
const Index index = i1 + i0 * m_storage.dimensions()[1];
|
||||
return m_storage.data()[index];
|
||||
} else {
|
||||
const Index index = i0 + i1 * m_storage.dimensions()[0];
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
|
||||
{
|
||||
if (Options&RowMajor) {
|
||||
const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0);
|
||||
return m_storage.data()[index];
|
||||
} else {
|
||||
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2);
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
|
||||
{
|
||||
if (Options&RowMajor) {
|
||||
const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0));
|
||||
return m_storage.data()[index];
|
||||
} else {
|
||||
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3));
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
|
||||
{
|
||||
if (Options&RowMajor) {
|
||||
const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)));
|
||||
return m_storage.data()[index];
|
||||
} else {
|
||||
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4)));
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
|
||||
@@ -312,16 +215,14 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TensorFixedSize(const Self& other)
|
||||
: m_storage(other.m_storage)
|
||||
: Base(other), m_storage(other.m_storage)
|
||||
{
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFixedSize(Self&& other)
|
||||
: m_storage(other.m_storage)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorForcedEval
|
||||
@@ -29,9 +31,9 @@ struct traits<TensorForcedEvalOp<XprType> >
|
||||
typedef typename traits<XprType>::StorageKind StorageKind;
|
||||
typedef typename traits<XprType>::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
|
||||
enum {
|
||||
@@ -61,7 +63,7 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
|
||||
public:
|
||||
typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
|
||||
typedef typename Eigen::internal::nested<TensorForcedEvalOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorForcedEvalOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Index Index;
|
||||
@@ -70,7 +72,7 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
|
||||
: m_xpr(expr) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -103,14 +105,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index, StorageType) {
|
||||
template<typename ArgType_, typename Device>
|
||||
struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
{
|
||||
typedef const typename internal::remove_all<ArgType_>::type ArgType;
|
||||
typedef const internal::remove_all_t<ArgType_> ArgType;
|
||||
typedef TensorForcedEvalOp<ArgType> XprType;
|
||||
typedef typename ArgType::Scalar Scalar;
|
||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
@@ -120,11 +122,11 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccess = internal::is_arithmetic<CoeffReturnType>::value,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
static const int NumDims = internal::traits<ArgType>::NumDimensions;
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
static constexpr int NumDims = internal::traits<ArgType>::NumDimensions;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
@@ -148,11 +150,11 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
|
||||
internal::non_integral_type_placement_new<Device, CoeffReturnType>()(numValues, m_buffer);
|
||||
|
||||
typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
|
||||
typedef TensorEvalToOp< const std::remove_const_t<ArgType> > EvalTo;
|
||||
EvalTo evalToTmp(m_device.get(m_buffer), m_op);
|
||||
|
||||
internal::TensorExecutor<
|
||||
const EvalTo, typename internal::remove_const<Device>::type,
|
||||
const EvalTo, std::remove_const_t<Device>,
|
||||
/*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value,
|
||||
/*Tiling=*/internal::IsTileable<Device, const ArgType>::value>::
|
||||
run(evalToTmp, m_device);
|
||||
@@ -167,14 +169,14 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
const Index numValues = internal::array_prod(m_impl.dimensions());
|
||||
m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(
|
||||
numValues * sizeof(CoeffReturnType)));
|
||||
typedef TensorEvalToOp<const typename internal::remove_const<ArgType>::type>
|
||||
typedef TensorEvalToOp<const std::remove_const_t<ArgType>>
|
||||
EvalTo;
|
||||
EvalTo evalToTmp(m_device.get(m_buffer), m_op);
|
||||
|
||||
auto on_done = std::bind([](EvalSubExprsCallback done_) { done_(true); },
|
||||
std::move(done));
|
||||
internal::TensorAsyncExecutor<
|
||||
const EvalTo, typename internal::remove_const<Device>::type,
|
||||
const EvalTo, std::remove_const_t<Device>,
|
||||
decltype(on_done),
|
||||
/*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value,
|
||||
/*Tiling=*/internal::IsTileable<Device, const ArgType>::value>::
|
||||
@@ -206,7 +208,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
assert(m_buffer != NULL);
|
||||
eigen_assert(m_buffer != NULL);
|
||||
return TensorBlock::materialize(m_buffer, m_impl.dimensions(), desc, scratch);
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// MakePointer class is used as a container of the address space of the pointer
|
||||
@@ -29,7 +31,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* constCast(const T* data) {
|
||||
}
|
||||
|
||||
// The StorageMemory class is a container of the device specific pointer
|
||||
// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
|
||||
// used for referring to a Pointer on TensorEvaluator class. While the TensorExpression
|
||||
// is a device-agnostic type and need MakePointer class for type conversion,
|
||||
// the TensorEvaluator class can be specialized for a device, hence it is possible
|
||||
// to construct different types of temproray storage memory in TensorEvaluator
|
||||
@@ -61,8 +63,8 @@ template<typename BinaryOp, typename LeftXprType, typename RightXprType> class T
|
||||
template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> class TensorCwiseTernaryOp;
|
||||
template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp;
|
||||
template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_ = MakePointer > class TensorReductionOp;
|
||||
template<typename XprType> class TensorIndexTupleOp;
|
||||
template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp;
|
||||
template<typename XprType> class TensorIndexPairOp;
|
||||
template<typename ReduceOp, typename Dims, typename XprType> class TensorPairReducerOp;
|
||||
template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp;
|
||||
template<typename Dimensions, typename LeftXprType, typename RightXprType, typename OutputKernelType> class TensorContractionOp;
|
||||
template<typename TargetType, typename XprType> class TensorConversionOp;
|
||||
@@ -165,7 +167,7 @@ struct IsTileable {
|
||||
// Check that block evaluation is supported and it's a preferred option (at
|
||||
// least one sub-expression has much faster block evaluation, e.g.
|
||||
// broadcasting).
|
||||
static const bool BlockAccess =
|
||||
static constexpr bool BlockAccess =
|
||||
TensorEvaluator<Expression, Device>::BlockAccess &&
|
||||
TensorEvaluator<Expression, Device>::PreferBlockAccess;
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
@@ -33,7 +35,6 @@ struct functor_traits<scalar_mod_op<Scalar> >
|
||||
*/
|
||||
template <typename Scalar>
|
||||
struct scalar_mod2_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; }
|
||||
};
|
||||
template <typename Scalar>
|
||||
@@ -42,7 +43,6 @@ struct functor_traits<scalar_mod2_op<Scalar> >
|
||||
|
||||
template <typename Scalar>
|
||||
struct scalar_fmod_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar
|
||||
operator()(const Scalar& a, const Scalar& b) const {
|
||||
return numext::fmod(a, b);
|
||||
@@ -367,7 +367,7 @@ struct reducer_traits<OrReducer, Device> {
|
||||
|
||||
// Argmin/Argmax reducers. Returns the first occurrence if multiple locations
|
||||
// contain the same min/max value.
|
||||
template <typename T> struct ArgMaxTupleReducer
|
||||
template <typename T> struct ArgMaxPairReducer
|
||||
{
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||
if (t.second < accum->second) {
|
||||
@@ -385,7 +385,7 @@ template <typename T> struct ArgMaxTupleReducer
|
||||
};
|
||||
|
||||
template <typename T, typename Device>
|
||||
struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
|
||||
struct reducer_traits<ArgMaxPairReducer<T>, Device> {
|
||||
enum {
|
||||
Cost = NumTraits<T>::AddCost,
|
||||
PacketAccess = false,
|
||||
@@ -395,7 +395,7 @@ struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
|
||||
};
|
||||
|
||||
|
||||
template <typename T> struct ArgMinTupleReducer
|
||||
template <typename T> struct ArgMinPairReducer
|
||||
{
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const {
|
||||
if (t.second > accum->second) {
|
||||
@@ -413,7 +413,7 @@ template <typename T> struct ArgMinTupleReducer
|
||||
};
|
||||
|
||||
template <typename T, typename Device>
|
||||
struct reducer_traits<ArgMinTupleReducer<T>, Device> {
|
||||
struct reducer_traits<ArgMinPairReducer<T>, Device> {
|
||||
enum {
|
||||
Cost = NumTraits<T>::AddCost,
|
||||
PacketAccess = false,
|
||||
@@ -426,7 +426,7 @@ struct reducer_traits<ArgMinTupleReducer<T>, Device> {
|
||||
template <typename T, typename Index, size_t NumDims>
|
||||
class GaussianGenerator {
|
||||
public:
|
||||
static const bool PacketAccess = false;
|
||||
static constexpr bool PacketAccess = false;
|
||||
|
||||
EIGEN_DEVICE_FUNC GaussianGenerator(const array<T, NumDims>& means,
|
||||
const array<T, NumDims>& std_devs)
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorGeneratorOp
|
||||
@@ -28,9 +30,9 @@ struct traits<TensorGeneratorOp<Generator, XprType> > : public traits<XprType>
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -68,7 +70,7 @@ class TensorGeneratorOp : public TensorBase<TensorGeneratorOp<Generator, XprType
|
||||
const Generator& generator() const { return m_generator; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -84,18 +86,18 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
typedef TensorGeneratorOp<Generator, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||
static const int NumDims = internal::array_size<Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<Dimensions>::value;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccess = true,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -153,10 +155,9 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
const int packetSize = PacketType<CoeffReturnType, Device>::size;
|
||||
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
|
||||
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
|
||||
for (int i = 0; i < packetSize; ++i) {
|
||||
values[i] = coeff(index+i);
|
||||
}
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors.
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
#define gpuMalloc hipMalloc
|
||||
#define gpuFree hipFree
|
||||
#define gpuMemsetAsync hipMemsetAsync
|
||||
#define gpuMemset2DAsync hipMemset2DAsync
|
||||
#define gpuMemcpyAsync hipMemcpyAsync
|
||||
#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
|
||||
#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
|
||||
@@ -71,6 +72,7 @@
|
||||
#define gpuMalloc cudaMalloc
|
||||
#define gpuFree cudaFree
|
||||
#define gpuMemsetAsync cudaMemsetAsync
|
||||
#define gpuMemset2DAsync cudaMemset2DAsync
|
||||
#define gpuMemcpyAsync cudaMemcpyAsync
|
||||
#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
|
||||
#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
|
||||
@@ -91,7 +93,7 @@
|
||||
// HIPCC do not support the use of assert on the GPU side.
|
||||
#define gpu_assert(COND)
|
||||
#else
|
||||
#define gpu_assert(COND) assert(COND)
|
||||
#define gpu_assert(COND) eigen_assert(COND)
|
||||
#endif
|
||||
|
||||
#endif // gpu_assert
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#undef gpuMalloc
|
||||
#undef gpuFree
|
||||
#undef gpuMemsetAsync
|
||||
#undef gpuMemset2DAsync
|
||||
#undef gpuMemcpyAsync
|
||||
#undef gpuMemcpyDeviceToDevice
|
||||
#undef gpuMemcpyDeviceToHost
|
||||
|
||||
@@ -10,70 +10,365 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_IO_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_IO_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
struct TensorIOFormat;
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Print the tensor as a 2d matrix
|
||||
template <typename Tensor, int Rank>
|
||||
struct TensorPrinter {
|
||||
static void run (std::ostream& os, const Tensor& tensor) {
|
||||
typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
|
||||
typedef typename Tensor::Index Index;
|
||||
const Index total_size = internal::array_prod(tensor.dimensions());
|
||||
if (total_size > 0) {
|
||||
const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions());
|
||||
static const int layout = Tensor::Layout;
|
||||
Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim);
|
||||
os << matrix;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Print the tensor as a vector
|
||||
template <typename Tensor>
|
||||
struct TensorPrinter<Tensor, 1> {
|
||||
static void run (std::ostream& os, const Tensor& tensor) {
|
||||
typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
|
||||
typedef typename Tensor::Index Index;
|
||||
const Index total_size = internal::array_prod(tensor.dimensions());
|
||||
if (total_size > 0) {
|
||||
Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size);
|
||||
os << array;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Print the tensor as a scalar
|
||||
template <typename Tensor>
|
||||
struct TensorPrinter<Tensor, 0> {
|
||||
static void run (std::ostream& os, const Tensor& tensor) {
|
||||
os << tensor.coeff(0);
|
||||
}
|
||||
};
|
||||
template <typename Tensor, std::size_t rank>
|
||||
struct TensorPrinter;
|
||||
}
|
||||
|
||||
struct TensorIOFormat {
|
||||
TensorIOFormat(const std::vector<std::string>& _separator, const std::vector<std::string>& _prefix,
|
||||
const std::vector<std::string>& _suffix, int _precision = StreamPrecision, int _flags = 0,
|
||||
const std::string& _tenPrefix = "", const std::string& _tenSuffix = "", const char _fill = ' ')
|
||||
: tenPrefix(_tenPrefix),
|
||||
tenSuffix(_tenSuffix),
|
||||
prefix(_prefix),
|
||||
suffix(_suffix),
|
||||
separator(_separator),
|
||||
fill(_fill),
|
||||
precision(_precision),
|
||||
flags(_flags) {
|
||||
init_spacer();
|
||||
}
|
||||
|
||||
TensorIOFormat(int _precision = StreamPrecision, int _flags = 0, const std::string& _tenPrefix = "",
|
||||
const std::string& _tenSuffix = "", const char _fill = ' ')
|
||||
: tenPrefix(_tenPrefix), tenSuffix(_tenSuffix), fill(_fill), precision(_precision), flags(_flags) {
|
||||
// default values of prefix, suffix and separator
|
||||
prefix = {"", "["};
|
||||
suffix = {"", "]"};
|
||||
separator = {", ", "\n"};
|
||||
|
||||
init_spacer();
|
||||
}
|
||||
|
||||
void init_spacer() {
|
||||
if ((flags & DontAlignCols)) return;
|
||||
spacer.resize(prefix.size());
|
||||
spacer[0] = "";
|
||||
int i = int(tenPrefix.length()) - 1;
|
||||
while (i >= 0 && tenPrefix[i] != '\n') {
|
||||
spacer[0] += ' ';
|
||||
i--;
|
||||
}
|
||||
|
||||
for (std::size_t k = 1; k < prefix.size(); k++) {
|
||||
int j = int(prefix[k].length()) - 1;
|
||||
while (j >= 0 && prefix[k][j] != '\n') {
|
||||
spacer[k] += ' ';
|
||||
j--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline const TensorIOFormat Numpy() {
|
||||
std::vector<std::string> prefix = {"", "["};
|
||||
std::vector<std::string> suffix = {"", "]"};
|
||||
std::vector<std::string> separator = {" ", "\n"};
|
||||
return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "[", "]");
|
||||
}
|
||||
|
||||
static inline const TensorIOFormat Plain() {
|
||||
std::vector<std::string> separator = {" ", "\n", "\n", ""};
|
||||
std::vector<std::string> prefix = {""};
|
||||
std::vector<std::string> suffix = {""};
|
||||
return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "", "", ' ');
|
||||
}
|
||||
|
||||
static inline const TensorIOFormat Native() {
|
||||
std::vector<std::string> separator = {", ", ",\n", "\n"};
|
||||
std::vector<std::string> prefix = {"", "{"};
|
||||
std::vector<std::string> suffix = {"", "}"};
|
||||
return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "{", "}", ' ');
|
||||
}
|
||||
|
||||
static inline const TensorIOFormat Legacy() {
|
||||
TensorIOFormat LegacyFormat(StreamPrecision, 0, "", "", ' ');
|
||||
LegacyFormat.legacy_bit = true;
|
||||
return LegacyFormat;
|
||||
}
|
||||
|
||||
std::string tenPrefix;
|
||||
std::string tenSuffix;
|
||||
std::vector<std::string> prefix;
|
||||
std::vector<std::string> suffix;
|
||||
std::vector<std::string> separator;
|
||||
char fill;
|
||||
int precision;
|
||||
int flags;
|
||||
std::vector<std::string> spacer{};
|
||||
bool legacy_bit = false;
|
||||
};
|
||||
|
||||
template <typename T, int Layout, int rank>
|
||||
class TensorWithFormat;
|
||||
// specialize for Layout=ColMajor, Layout=RowMajor and rank=0.
|
||||
template <typename T, int rank>
|
||||
class TensorWithFormat<T, RowMajor, rank> {
|
||||
public:
|
||||
TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, RowMajor, rank>& wf) {
|
||||
// Evaluate the expression if needed
|
||||
typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
|
||||
TensorForcedEvalOp<const T> eval = wf.t_tensor.eval();
|
||||
Evaluator tensor(eval, DefaultDevice());
|
||||
tensor.evalSubExprsIfNeeded(NULL);
|
||||
internal::TensorPrinter<Evaluator, rank>::run(os, tensor, wf.t_format);
|
||||
// Cleanup.
|
||||
tensor.cleanup();
|
||||
return os;
|
||||
}
|
||||
|
||||
protected:
|
||||
T t_tensor;
|
||||
TensorIOFormat t_format;
|
||||
};
|
||||
|
||||
template <typename T, int rank>
|
||||
class TensorWithFormat<T, ColMajor, rank> {
|
||||
public:
|
||||
TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, ColMajor, rank>& wf) {
|
||||
// Switch to RowMajor storage and print afterwards
|
||||
typedef typename T::Index IndexType;
|
||||
std::array<IndexType, rank> shuffle;
|
||||
std::array<IndexType, rank> id;
|
||||
std::iota(id.begin(), id.end(), IndexType(0));
|
||||
std::copy(id.begin(), id.end(), shuffle.rbegin());
|
||||
auto tensor_row_major = wf.t_tensor.swap_layout().shuffle(shuffle);
|
||||
|
||||
// Evaluate the expression if needed
|
||||
typedef TensorEvaluator<const TensorForcedEvalOp<const decltype(tensor_row_major)>, DefaultDevice> Evaluator;
|
||||
TensorForcedEvalOp<const decltype(tensor_row_major)> eval = tensor_row_major.eval();
|
||||
Evaluator tensor(eval, DefaultDevice());
|
||||
tensor.evalSubExprsIfNeeded(NULL);
|
||||
internal::TensorPrinter<Evaluator, rank>::run(os, tensor, wf.t_format);
|
||||
// Cleanup.
|
||||
tensor.cleanup();
|
||||
return os;
|
||||
}
|
||||
|
||||
protected:
|
||||
T t_tensor;
|
||||
TensorIOFormat t_format;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
std::ostream& operator << (std::ostream& os, const TensorBase<T, ReadOnlyAccessors>& expr) {
|
||||
typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
|
||||
typedef typename Evaluator::Dimensions Dimensions;
|
||||
class TensorWithFormat<T, ColMajor, 0> {
|
||||
public:
|
||||
TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
|
||||
|
||||
// Evaluate the expression if needed
|
||||
TensorForcedEvalOp<const T> eval = expr.eval();
|
||||
Evaluator tensor(eval, DefaultDevice());
|
||||
tensor.evalSubExprsIfNeeded(NULL);
|
||||
friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, ColMajor, 0>& wf) {
|
||||
// Evaluate the expression if needed
|
||||
typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
|
||||
TensorForcedEvalOp<const T> eval = wf.t_tensor.eval();
|
||||
Evaluator tensor(eval, DefaultDevice());
|
||||
tensor.evalSubExprsIfNeeded(NULL);
|
||||
internal::TensorPrinter<Evaluator, 0>::run(os, tensor, wf.t_format);
|
||||
// Cleanup.
|
||||
tensor.cleanup();
|
||||
return os;
|
||||
}
|
||||
|
||||
// Print the result
|
||||
static const int rank = internal::array_size<Dimensions>::value;
|
||||
internal::TensorPrinter<Evaluator, rank>::run(os, tensor);
|
||||
protected:
|
||||
T t_tensor;
|
||||
TensorIOFormat t_format;
|
||||
};
|
||||
|
||||
// Cleanup.
|
||||
tensor.cleanup();
|
||||
return os;
|
||||
namespace internal {
|
||||
template <typename Tensor, std::size_t rank>
|
||||
struct TensorPrinter {
|
||||
static void run(std::ostream& s, const Tensor& _t, const TensorIOFormat& fmt) {
|
||||
typedef std::remove_const_t<typename Tensor::Scalar> Scalar;
|
||||
typedef typename Tensor::Index IndexType;
|
||||
static const int layout = Tensor::Layout;
|
||||
// backwards compatibility case: print tensor after reshaping to matrix of size dim(0) x
|
||||
// (dim(1)*dim(2)*...*dim(rank-1)).
|
||||
if (fmt.legacy_bit) {
|
||||
const IndexType total_size = internal::array_prod(_t.dimensions());
|
||||
if (total_size > 0) {
|
||||
const IndexType first_dim = Eigen::internal::array_get<0>(_t.dimensions());
|
||||
Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(_t.data(), first_dim,
|
||||
total_size / first_dim);
|
||||
s << matrix;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
eigen_assert(layout == RowMajor);
|
||||
typedef std::conditional_t<is_same<Scalar, char>::value || is_same<Scalar, unsigned char>::value ||
|
||||
is_same<Scalar, numext::int8_t>::value || is_same<Scalar, numext::uint8_t>::value,
|
||||
int,
|
||||
std::conditional_t<is_same<Scalar, std::complex<char> >::value ||
|
||||
is_same<Scalar, std::complex<unsigned char> >::value ||
|
||||
is_same<Scalar, std::complex<numext::int8_t> >::value ||
|
||||
is_same<Scalar, std::complex<numext::uint8_t> >::value,
|
||||
std::complex<int>, const Scalar&>> PrintType;
|
||||
|
||||
const IndexType total_size = array_prod(_t.dimensions());
|
||||
|
||||
std::streamsize explicit_precision;
|
||||
if (fmt.precision == StreamPrecision) {
|
||||
explicit_precision = 0;
|
||||
} else if (fmt.precision == FullPrecision) {
|
||||
if (NumTraits<Scalar>::IsInteger) {
|
||||
explicit_precision = 0;
|
||||
} else {
|
||||
explicit_precision = significant_decimals_impl<Scalar>::run();
|
||||
}
|
||||
} else {
|
||||
explicit_precision = fmt.precision;
|
||||
}
|
||||
|
||||
std::streamsize old_precision = 0;
|
||||
if (explicit_precision) old_precision = s.precision(explicit_precision);
|
||||
|
||||
IndexType width = 0;
|
||||
|
||||
bool align_cols = !(fmt.flags & DontAlignCols);
|
||||
if (align_cols) {
|
||||
// compute the largest width
|
||||
for (IndexType i = 0; i < total_size; i++) {
|
||||
std::stringstream sstr;
|
||||
sstr.copyfmt(s);
|
||||
sstr << static_cast<PrintType>(_t.data()[i]);
|
||||
width = std::max<IndexType>(width, IndexType(sstr.str().length()));
|
||||
}
|
||||
}
|
||||
std::streamsize old_width = s.width();
|
||||
char old_fill_character = s.fill();
|
||||
|
||||
s << fmt.tenPrefix;
|
||||
for (IndexType i = 0; i < total_size; i++) {
|
||||
std::array<bool, rank> is_at_end{};
|
||||
std::array<bool, rank> is_at_begin{};
|
||||
|
||||
// is the ith element the end of an coeff (always true), of a row, of a matrix, ...?
|
||||
for (std::size_t k = 0; k < rank; k++) {
|
||||
if ((i + 1) % (std::accumulate(_t.dimensions().rbegin(), _t.dimensions().rbegin() + k, 1,
|
||||
std::multiplies<IndexType>())) ==
|
||||
0) {
|
||||
is_at_end[k] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// is the ith element the begin of an coeff (always true), of a row, of a matrix, ...?
|
||||
for (std::size_t k = 0; k < rank; k++) {
|
||||
if (i % (std::accumulate(_t.dimensions().rbegin(), _t.dimensions().rbegin() + k, 1,
|
||||
std::multiplies<IndexType>())) ==
|
||||
0) {
|
||||
is_at_begin[k] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// do we have a line break?
|
||||
bool is_at_begin_after_newline = false;
|
||||
for (std::size_t k = 0; k < rank; k++) {
|
||||
if (is_at_begin[k]) {
|
||||
std::size_t separator_index = (k < fmt.separator.size()) ? k : fmt.separator.size() - 1;
|
||||
if (fmt.separator[separator_index].find('\n') != std::string::npos) {
|
||||
is_at_begin_after_newline = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool is_at_end_before_newline = false;
|
||||
for (std::size_t k = 0; k < rank; k++) {
|
||||
if (is_at_end[k]) {
|
||||
std::size_t separator_index = (k < fmt.separator.size()) ? k : fmt.separator.size() - 1;
|
||||
if (fmt.separator[separator_index].find('\n') != std::string::npos) {
|
||||
is_at_end_before_newline = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::stringstream suffix, prefix, separator;
|
||||
for (std::size_t k = 0; k < rank; k++) {
|
||||
std::size_t suffix_index = (k < fmt.suffix.size()) ? k : fmt.suffix.size() - 1;
|
||||
if (is_at_end[k]) {
|
||||
suffix << fmt.suffix[suffix_index];
|
||||
}
|
||||
}
|
||||
for (std::size_t k = 0; k < rank; k++) {
|
||||
std::size_t separator_index = (k < fmt.separator.size()) ? k : fmt.separator.size() - 1;
|
||||
if (is_at_end[k] &&
|
||||
(!is_at_end_before_newline || fmt.separator[separator_index].find('\n') != std::string::npos)) {
|
||||
separator << fmt.separator[separator_index];
|
||||
}
|
||||
}
|
||||
for (std::size_t k = 0; k < rank; k++) {
|
||||
std::size_t spacer_index = (k < fmt.spacer.size()) ? k : fmt.spacer.size() - 1;
|
||||
if (i != 0 && is_at_begin_after_newline && (!is_at_begin[k] || k == 0)) {
|
||||
prefix << fmt.spacer[spacer_index];
|
||||
}
|
||||
}
|
||||
for (int k = rank - 1; k >= 0; k--) {
|
||||
std::size_t prefix_index = (static_cast<std::size_t>(k) < fmt.prefix.size()) ? k : fmt.prefix.size() - 1;
|
||||
if (is_at_begin[k]) {
|
||||
prefix << fmt.prefix[prefix_index];
|
||||
}
|
||||
}
|
||||
|
||||
s << prefix.str();
|
||||
if (width) {
|
||||
s.fill(fmt.fill);
|
||||
s.width(width);
|
||||
s << std::right;
|
||||
}
|
||||
s << _t.data()[i];
|
||||
s << suffix.str();
|
||||
if (i < total_size - 1) {
|
||||
s << separator.str();
|
||||
}
|
||||
}
|
||||
s << fmt.tenSuffix;
|
||||
if (explicit_precision) s.precision(old_precision);
|
||||
if (width) {
|
||||
s.fill(old_fill_character);
|
||||
s.width(old_width);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Tensor>
|
||||
struct TensorPrinter<Tensor, 0> {
|
||||
static void run(std::ostream& s, const Tensor& _t, const TensorIOFormat& fmt) {
|
||||
typedef typename Tensor::Scalar Scalar;
|
||||
|
||||
std::streamsize explicit_precision;
|
||||
if (fmt.precision == StreamPrecision) {
|
||||
explicit_precision = 0;
|
||||
} else if (fmt.precision == FullPrecision) {
|
||||
if (NumTraits<Scalar>::IsInteger) {
|
||||
explicit_precision = 0;
|
||||
} else {
|
||||
explicit_precision = significant_decimals_impl<Scalar>::run();
|
||||
}
|
||||
} else {
|
||||
explicit_precision = fmt.precision;
|
||||
}
|
||||
|
||||
std::streamsize old_precision = 0;
|
||||
if (explicit_precision) old_precision = s.precision(explicit_precision);
|
||||
|
||||
s << fmt.tenPrefix << _t.coeff(0) << fmt.tenSuffix;
|
||||
if (explicit_precision) s.precision(old_precision);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
template <typename T>
|
||||
std::ostream& operator<<(std::ostream& s, const TensorBase<T, ReadOnlyAccessors>& t) {
|
||||
s << t.format(TensorIOFormat::Plain());
|
||||
return s;
|
||||
}
|
||||
} // end namespace Eigen
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorImagePatch
|
||||
@@ -31,14 +33,14 @@ namespace internal {
|
||||
template<DenseIndex Rows, DenseIndex Cols, typename XprType>
|
||||
struct traits<TensorImagePatchOp<Rows, Cols, XprType> > : public traits<XprType>
|
||||
{
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
|
||||
typedef traits<XprType> XprTraits;
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions + 1;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions + 1;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -187,7 +189,7 @@ class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprT
|
||||
Scalar padding_value() const { return m_padding_value; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -215,25 +217,25 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
||||
{
|
||||
typedef TensorImagePatchOp<Rows, Cols, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static const int NumDims = NumInputDims + 1;
|
||||
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = NumInputDims + 1;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
|
||||
typedef TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>,
|
||||
Device> Self;
|
||||
typedef TensorEvaluator<ArgType, Device> Impl;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -447,7 +449,6 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) {
|
||||
@@ -540,7 +541,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||
{
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
values[i] = coeff(index+i);
|
||||
|
||||
@@ -10,10 +10,7 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
|
||||
|
||||
|
||||
#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
|
||||
#define EIGEN_HAS_INDEX_LIST
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
@@ -246,7 +243,7 @@ struct tuple_coeff {
|
||||
|
||||
template <typename... T>
|
||||
EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const Index i, const IndexTuple<T...>& t) {
|
||||
return ((i == Idx) & is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value) ||
|
||||
return ((i == Idx) && is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value) ||
|
||||
tuple_coeff<Idx-1, ValueT>::value_known_statically(i, t);
|
||||
}
|
||||
|
||||
@@ -308,6 +305,11 @@ struct IndexList : internal::IndexTuple<FirstType, OtherTypes...> {
|
||||
return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, Index>::set(i, *this, value);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr std::size_t size() const {
|
||||
return 1 + sizeof...(OtherTypes);
|
||||
};
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC constexpr IndexList(const internal::IndexTuple<FirstType, OtherTypes...>& other) : internal::IndexTuple<FirstType, OtherTypes...>(other) { }
|
||||
EIGEN_DEVICE_FUNC constexpr IndexList(FirstType& first, OtherTypes... other) : internal::IndexTuple<FirstType, OtherTypes...>(first, other...) { }
|
||||
EIGEN_DEVICE_FUNC constexpr IndexList() : internal::IndexTuple<FirstType, OtherTypes...>() { }
|
||||
@@ -379,10 +381,10 @@ template<typename FirstType, typename... OtherTypes> struct array_size<const Ind
|
||||
};
|
||||
|
||||
template<typename FirstType, typename... OtherTypes> struct array_size<IndexPairList<FirstType, OtherTypes...> > {
|
||||
static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value;
|
||||
static const size_t value = 1 + sizeof...(OtherTypes);
|
||||
};
|
||||
template<typename FirstType, typename... OtherTypes> struct array_size<const IndexPairList<FirstType, OtherTypes...> > {
|
||||
static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value;
|
||||
static const size_t value = 1 + sizeof...(OtherTypes);
|
||||
};
|
||||
|
||||
template<Index N, typename FirstType, typename... OtherTypes> EIGEN_DEVICE_FUNC constexpr Index array_get(IndexList<FirstType, OtherTypes...>& a) {
|
||||
@@ -468,7 +470,7 @@ struct index_statically_eq_impl {
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_statically_eq_impl<IndexList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexList<FirstType, OtherTypes...>().get(i) == value);
|
||||
}
|
||||
};
|
||||
@@ -476,7 +478,7 @@ struct index_statically_eq_impl<IndexList<FirstType, OtherTypes...> > {
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_statically_eq_impl<const IndexList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexList<FirstType, OtherTypes...>().get(i) == value);
|
||||
}
|
||||
};
|
||||
@@ -492,7 +494,7 @@ struct index_statically_ne_impl {
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_statically_ne_impl<IndexList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexList<FirstType, OtherTypes...>().get(i) != value);
|
||||
}
|
||||
};
|
||||
@@ -500,7 +502,7 @@ struct index_statically_ne_impl<IndexList<FirstType, OtherTypes...> > {
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_statically_ne_impl<const IndexList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexList<FirstType, OtherTypes...>().get(i) != value);
|
||||
}
|
||||
};
|
||||
@@ -516,7 +518,7 @@ struct index_statically_gt_impl {
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_statically_gt_impl<IndexList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexList<FirstType, OtherTypes...>().get(i) > value);
|
||||
}
|
||||
};
|
||||
@@ -524,7 +526,7 @@ struct index_statically_gt_impl<IndexList<FirstType, OtherTypes...> > {
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_statically_gt_impl<const IndexList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexList<FirstType, OtherTypes...>().get(i) > value);
|
||||
}
|
||||
};
|
||||
@@ -541,7 +543,7 @@ struct index_statically_lt_impl {
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_statically_lt_impl<IndexList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexList<FirstType, OtherTypes...>().get(i) < value);
|
||||
}
|
||||
};
|
||||
@@ -549,7 +551,7 @@ struct index_statically_lt_impl<IndexList<FirstType, OtherTypes...> > {
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_statically_lt_impl<const IndexList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexList<FirstType, OtherTypes...>().get(i) < value);
|
||||
}
|
||||
};
|
||||
@@ -566,7 +568,7 @@ struct index_pair_first_statically_eq_impl {
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_pair_first_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value);
|
||||
}
|
||||
};
|
||||
@@ -574,7 +576,7 @@ struct index_pair_first_statically_eq_impl<IndexPairList<FirstType, OtherTypes..
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_pair_first_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value);
|
||||
}
|
||||
};
|
||||
@@ -591,7 +593,7 @@ struct index_pair_second_statically_eq_impl {
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_pair_second_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value);
|
||||
}
|
||||
};
|
||||
@@ -599,7 +601,7 @@ struct index_pair_second_statically_eq_impl<IndexPairList<FirstType, OtherTypes.
|
||||
template <typename FirstType, typename... OtherTypes>
|
||||
struct index_pair_second_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > {
|
||||
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
|
||||
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
|
||||
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
|
||||
(IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value);
|
||||
}
|
||||
};
|
||||
@@ -608,81 +610,6 @@ struct index_pair_second_statically_eq_impl<const IndexPairList<FirstType, Other
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#else
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
template <typename T>
|
||||
struct index_known_statically_impl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const Index) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct all_indices_known_statically_impl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct indices_statically_known_to_increase_impl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct index_statically_eq_impl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct index_statically_ne_impl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct index_statically_gt_impl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct index_statically_lt_impl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Tx>
|
||||
struct index_pair_first_statically_eq_impl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Tx>
|
||||
struct index_pair_second_statically_eq_impl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorInflation
|
||||
@@ -28,9 +30,9 @@ struct traits<TensorInflationOp<Strides, XprType> > : public traits<XprType>
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -66,7 +68,7 @@ class TensorInflationOp : public TensorBase<TensorInflationOp<Strides, XprType>,
|
||||
const Strides& strides() const { return m_strides; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -80,21 +82,21 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
|
||||
{
|
||||
typedef TensorInflationOp<Strides, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -202,7 +204,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
values[i] = coeff(index+i);
|
||||
|
||||
@@ -10,10 +10,10 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
|
||||
#include <initializer_list>
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorInitializer
|
||||
@@ -77,6 +77,4 @@ void initialize_tensor(TensorEvaluator<Derived, DefaultDevice>& tensor,
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
|
||||
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \internal
|
||||
@@ -28,12 +30,10 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
namespace {
|
||||
|
||||
// Note: result is undefined if val == 0
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val)
|
||||
std::enable_if_t<sizeof(T)==4,int> count_leading_zeros(const T val)
|
||||
{
|
||||
#ifdef EIGEN_GPU_COMPILE_PHASE
|
||||
return __clz(val);
|
||||
@@ -51,7 +51,7 @@ namespace {
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val)
|
||||
std::enable_if_t<sizeof(T)==8,int> count_leading_zeros(const T val)
|
||||
{
|
||||
#ifdef EIGEN_GPU_COMPILE_PHASE
|
||||
return __clzll(val);
|
||||
@@ -79,13 +79,13 @@ namespace {
|
||||
|
||||
template <typename T>
|
||||
struct UnsignedTraits {
|
||||
typedef typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type type;
|
||||
typedef std::conditional_t<sizeof(T) == 8, uint64_t, uint32_t> type;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct DividerTraits {
|
||||
typedef typename UnsignedTraits<T>::type type;
|
||||
static const int N = sizeof(T) * 8;
|
||||
static constexpr int N = sizeof(T) * 8;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
@@ -135,8 +135,6 @@ namespace {
|
||||
#endif
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
template <typename T, bool div_gt_one = false>
|
||||
struct TensorIntDivisor {
|
||||
@@ -252,7 +250,7 @@ private:
|
||||
|
||||
|
||||
template <typename T, bool div_gt_one>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
|
||||
return divisor.divide(numerator);
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorLayoutSwap
|
||||
@@ -43,9 +45,9 @@ struct traits<TensorLayoutSwapOp<XprType> > : public traits<XprType>
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = traits<XprType>::NumDimensions;
|
||||
static const int Layout = (traits<XprType>::Layout == ColMajor) ? RowMajor : ColMajor;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = traits<XprType>::NumDimensions;
|
||||
static constexpr int Layout = (traits<XprType>::Layout == ColMajor) ? RowMajor : ColMajor;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -72,7 +74,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
|
||||
typedef TensorBase<TensorLayoutSwapOp<XprType>, WriteAccessors> Base;
|
||||
typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Scalar Scalar;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
|
||||
typedef typename Eigen::internal::nested<TensorLayoutSwapOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Index Index;
|
||||
@@ -81,7 +83,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
|
||||
: m_xpr(expr) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorLayoutSwapOp)
|
||||
@@ -96,15 +98,15 @@ struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
|
||||
{
|
||||
typedef TensorLayoutSwapOp<ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
|
||||
static constexpr int Layout = (TensorEvaluator<ArgType, Device>::Layout == static_cast<int>(ColMajor)) ? RowMajor : ColMajor;
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
|
||||
};
|
||||
@@ -178,12 +180,12 @@ template<typename ArgType, typename Device>
|
||||
typedef TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> Base;
|
||||
typedef TensorLayoutSwapOp<ArgType> XprType;
|
||||
|
||||
static constexpr int Layout = (TensorEvaluator<ArgType, Device>::Layout == static_cast<int>(ColMajor)) ? RowMajor : ColMajor;
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
|
||||
CoordAccess = false // to be implemented
|
||||
};
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
/** use this macro in sfinae selection in templated functions
|
||||
*
|
||||
* template<typename T,
|
||||
* typename std::enable_if< isBanana<T>::value , int >::type = 0
|
||||
* std::enable_if_t< isBanana<T>::value , int > = 0
|
||||
* >
|
||||
* void foo(){}
|
||||
*
|
||||
@@ -26,22 +26,8 @@
|
||||
* void foo(){}
|
||||
*/
|
||||
|
||||
// SFINAE requires variadic templates
|
||||
#if !defined(EIGEN_GPUCC)
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
// SFINAE doesn't work for gcc <= 4.7
|
||||
#ifdef EIGEN_COMP_GNUC
|
||||
#if EIGEN_GNUC_AT_LEAST(4,8)
|
||||
#define EIGEN_HAS_SFINAE
|
||||
#endif
|
||||
#else
|
||||
#define EIGEN_HAS_SFINAE
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \
|
||||
typename internal::enable_if< ( __condition__ ) , int >::type = 0
|
||||
std::enable_if_t< ( __condition__ ) , int > = 0
|
||||
|
||||
// Define a macro to use a reference on the host but a value on the device
|
||||
#if defined(SYCL_DEVICE_ONLY)
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_MAP_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_MAP_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// FIXME use proper doxygen documentation (e.g. \tparam MakePointer_)
|
||||
@@ -32,7 +34,7 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
|
||||
typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self;
|
||||
typedef TensorBase<TensorMap<PlainObjectType, Options_, MakePointer_> > Base;
|
||||
#ifdef EIGEN_USE_SYCL
|
||||
typedef typename Eigen::internal::remove_reference<typename Eigen::internal::nested<Self>::type>::type Nested;
|
||||
typedef std::remove_reference_t<typename Eigen::internal::nested<Self>::type> Nested;
|
||||
#else
|
||||
typedef typename Eigen::internal::nested<Self>::type Nested;
|
||||
#endif
|
||||
@@ -49,29 +51,29 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
|
||||
// example in TensorMap<Tensor<const Scalar, ...>> expression. This type of
|
||||
// expression should be illegal, but adding this restriction is not possible
|
||||
// in practice (see https://bitbucket.org/eigen/eigen/pull-requests/488).
|
||||
typedef typename internal::conditional<
|
||||
typedef std::conditional_t<
|
||||
bool(internal::is_lvalue<PlainObjectType>::value),
|
||||
PointerType, // use simple pointer in lvalue expressions
|
||||
PointerConstType // use const pointer in rvalue expressions
|
||||
>::type StoragePointerType;
|
||||
> StoragePointerType;
|
||||
|
||||
// If TensorMap was constructed over rvalue expression (e.g. const Tensor),
|
||||
// we should return a reference to const from operator() (and others), even
|
||||
// if TensorMap itself is not const.
|
||||
typedef typename internal::conditional<
|
||||
typedef std::conditional_t<
|
||||
bool(internal::is_lvalue<PlainObjectType>::value),
|
||||
Scalar&,
|
||||
const Scalar&
|
||||
>::type StorageRefType;
|
||||
> StorageRefType;
|
||||
|
||||
static const int Options = Options_;
|
||||
static constexpr int Options = Options_;
|
||||
|
||||
static const Index NumIndices = PlainObjectType::NumIndices;
|
||||
static constexpr Index NumIndices = PlainObjectType::NumIndices;
|
||||
typedef typename PlainObjectType::Dimensions Dimensions;
|
||||
|
||||
static constexpr int Layout = PlainObjectType::Layout;
|
||||
enum {
|
||||
IsAligned = ((int(Options_)&Aligned)==Aligned),
|
||||
Layout = PlainObjectType::Layout,
|
||||
CoordAccess = true,
|
||||
RawAccess = true
|
||||
};
|
||||
@@ -82,35 +84,11 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
|
||||
EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) {
|
||||
// The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) {
|
||||
// The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2) : m_data(dataPtr), m_dimensions(dim1, dim2) {
|
||||
EIGEN_STATIC_ASSERT(2 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3) {
|
||||
EIGEN_STATIC_ASSERT(3 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4) {
|
||||
EIGEN_STATIC_ASSERT(4 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4, dim5) {
|
||||
EIGEN_STATIC_ASSERT(5 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, const array<Index, NumIndices>& dimensions)
|
||||
: m_data(dataPtr), m_dimensions(dimensions)
|
||||
@@ -165,7 +143,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
|
||||
return m_data[index];
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE StorageRefType operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
|
||||
{
|
||||
@@ -179,52 +156,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1) const
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i1 + i0 * m_dimensions[1];
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + i1 * m_dimensions[0];
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2) const
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0);
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3) const
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE StorageRefType operator()(const array<Index, NumIndices>& indices)
|
||||
@@ -253,7 +184,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
|
||||
return m_data[index];
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE StorageRefType operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
|
||||
{
|
||||
@@ -268,52 +198,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1)
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i1 + i0 * m_dimensions[1];
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + i1 * m_dimensions[0];
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2)
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0);
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3)
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorMap)
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_META_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<bool cond> struct Cond {};
|
||||
@@ -28,13 +30,15 @@ const T2& choose(Cond<false>, const T1&, const T2& second) {
|
||||
template <typename T, typename X, typename Y>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T divup(const X x, const Y y) {
|
||||
return static_cast<T>((x + y - 1) / y);
|
||||
// Note: This form is used because it cannot overflow.
|
||||
return static_cast<T>(x == 0 ? 0 : (x - 1) / y + 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T divup(const T x, const T y) {
|
||||
return static_cast<T>((x + y - 1) / y);
|
||||
// Note: This form is used because it cannot overflow.
|
||||
return static_cast<T>(x == 0 ? 0 : (x - 1) / y + 1);
|
||||
}
|
||||
|
||||
template <size_t n> struct max_n_1 {
|
||||
@@ -52,7 +56,7 @@ struct PacketType : internal::packet_traits<Scalar> {
|
||||
};
|
||||
|
||||
// For CUDA packet types when using a GpuDevice
|
||||
#if defined(EIGEN_USE_GPU) && defined(EIGEN_HAS_GPU_FP16)
|
||||
#if defined(EIGEN_USE_GPU) && defined(EIGEN_HAS_GPU_FP16) && defined(EIGEN_GPU_COMPILE_PHASE)
|
||||
|
||||
typedef ulonglong2 Packet4h2;
|
||||
template<>
|
||||
@@ -118,13 +122,13 @@ struct static_for<Index, end, end, step, StepOp> {
|
||||
|
||||
template <typename OutScalar, typename Device, bool Vectorizable>
|
||||
struct Vectorise {
|
||||
static const int PacketSize = 1;
|
||||
static constexpr int PacketSize = 1;
|
||||
typedef OutScalar PacketReturnType;
|
||||
};
|
||||
|
||||
template <typename OutScalar, typename Device>
|
||||
struct Vectorise<OutScalar, Device, true> {
|
||||
static const int PacketSize = Eigen::PacketType<OutScalar, Device>::size;
|
||||
static constexpr int PacketSize = Eigen::PacketType<OutScalar, Device>::size;
|
||||
typedef typename Eigen::PacketType<OutScalar, Device>::type PacketReturnType;
|
||||
};
|
||||
|
||||
@@ -207,9 +211,11 @@ template<> struct PacketType<const half, const SyclDevice>: PacketType<half, Syc
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Tuple mimics std::pair but works on e.g. nvcc.
|
||||
template <typename U, typename V> struct Tuple {
|
||||
// Pair mimics std::pair but works on e.g. nvcc.
|
||||
template <typename U, typename V> struct Pair {
|
||||
public:
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
|
||||
|
||||
U first;
|
||||
V second;
|
||||
|
||||
@@ -217,13 +223,13 @@ template <typename U, typename V> struct Tuple {
|
||||
typedef V second_type;
|
||||
|
||||
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Tuple() : first(), second() {}
|
||||
Pair() : first(), second() {}
|
||||
|
||||
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Tuple(const U& f, const V& s) : first(f), second(s) {}
|
||||
Pair(const U& f, const V& s) : first(f), second(s) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void swap(Tuple& rhs) {
|
||||
void swap(Pair& rhs) {
|
||||
using numext::swap;
|
||||
swap(first, rhs.first);
|
||||
swap(second, rhs.second);
|
||||
@@ -232,13 +238,13 @@ template <typename U, typename V> struct Tuple {
|
||||
|
||||
template <typename U, typename V>
|
||||
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
bool operator==(const Tuple<U, V>& x, const Tuple<U, V>& y) {
|
||||
bool operator==(const Pair<U, V>& x, const Pair<U, V>& y) {
|
||||
return (x.first == y.first && x.second == y.second);
|
||||
}
|
||||
|
||||
template <typename U, typename V>
|
||||
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
bool operator!=(const Tuple<U, V>& x, const Tuple<U, V>& y) {
|
||||
bool operator!=(const Pair<U, V>& x, const Pair<U, V>& y) {
|
||||
return !(x == y);
|
||||
}
|
||||
|
||||
@@ -258,13 +264,12 @@ template <typename Idx> struct IndexPair {
|
||||
};
|
||||
|
||||
|
||||
#ifdef EIGEN_HAS_SFINAE
|
||||
namespace internal {
|
||||
|
||||
template<typename IndexType, typename Index, Index... Is>
|
||||
template<typename IndexType, typename Index, Index First, Index... Is>
|
||||
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
array<Index, sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, Is...>) {
|
||||
return { idx[Is]... };
|
||||
array<Index, 1 + sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, First, Is...>) {
|
||||
return { idx[First], idx[Is]... };
|
||||
}
|
||||
template<typename IndexType, typename Index>
|
||||
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -302,9 +307,6 @@ namespace internal {
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorReshaping
|
||||
@@ -28,9 +30,9 @@ struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprTyp
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = array_size<NewDimensions>::value;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = array_size<NewDimensions>::value;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -56,7 +58,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
|
||||
public:
|
||||
typedef TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> Base;
|
||||
typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
|
||||
typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
|
||||
@@ -68,7 +70,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
|
||||
const NewDimensions& dimensions() const { return m_dims; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp)
|
||||
@@ -92,10 +94,10 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
|
||||
typedef StorageMemory<std::remove_const_t<CoeffReturnType>, Device> ConstCastStorage;
|
||||
|
||||
static const int NumOutputDims = internal::array_size<Dimensions>::value;
|
||||
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumOutputDims = internal::array_size<Dimensions>::value;
|
||||
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
|
||||
enum ReshapingKind {
|
||||
// We do not use layout information to determine reshaping kind.
|
||||
@@ -107,15 +109,12 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
||||
|
||||
// clang-format off
|
||||
static const ReshapingKind kind =
|
||||
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||
(NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/0, /*value=*/1)) ? OneByN
|
||||
: (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/1, /*value=*/1)) ? NByOne
|
||||
: Runtime;
|
||||
#else
|
||||
Runtime;
|
||||
#endif
|
||||
// clang-format on
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
@@ -125,12 +124,11 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess &&
|
||||
NumInputDims > 0 && NumOutputDims > 0,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
|
||||
};
|
||||
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
|
||||
@@ -245,12 +243,12 @@ template<typename NewDimensions, typename ArgType, typename Device>
|
||||
typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
|
||||
typedef NewDimensions Dimensions;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
|
||||
};
|
||||
@@ -283,7 +281,7 @@ template<typename NewDimensions, typename ArgType, typename Device>
|
||||
template <typename TensorBlock>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
|
||||
const TensorBlockDesc& desc, const TensorBlock& block) {
|
||||
assert(this->m_impl.data() != NULL);
|
||||
eigen_assert(this->m_impl.data() != NULL);
|
||||
|
||||
typedef typename TensorBlock::XprType TensorBlockExpr;
|
||||
typedef internal::TensorBlockAssignment<
|
||||
@@ -315,9 +313,9 @@ struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<Xp
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = array_size<StartIndices>::value;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = array_size<StartIndices>::value;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -357,7 +355,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
|
||||
const Sizes& sizes() const { return m_sizes; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp)
|
||||
@@ -369,8 +367,9 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
|
||||
};
|
||||
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Fixme: figure out the exact threshold
|
||||
namespace {
|
||||
template <typename Index, typename Device, bool BlockAccess> struct MemcpyTriggerForSlicing {
|
||||
EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
|
||||
EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const {
|
||||
@@ -400,14 +399,14 @@ template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index
|
||||
};
|
||||
#endif
|
||||
|
||||
}
|
||||
} // namespace internal
|
||||
|
||||
// Eval as rvalue
|
||||
template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
|
||||
struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
||||
{
|
||||
typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
|
||||
static const int NumDims = internal::array_size<Sizes>::value;
|
||||
static constexpr int NumDims = internal::array_size<Sizes>::value;
|
||||
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
@@ -415,9 +414,10 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
typedef Sizes Dimensions;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
|
||||
typedef StorageMemory<std::remove_const_t<CoeffReturnType>, Device> ConstCastStorage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
// Alignment can't be guaranteed at compile time since it depends on the
|
||||
// slice offsets and sizes.
|
||||
@@ -425,14 +425,13 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess &&
|
||||
// FIXME: Temporary workaround for bug in slicing of bool tensors.
|
||||
!internal::is_same<typename internal::remove_const<Scalar>::type, bool>::value,
|
||||
!internal::is_same<std::remove_const_t<Scalar>, bool>::value,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
@@ -492,7 +491,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
|
||||
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||
m_impl.evalSubExprsIfNeeded(NULL);
|
||||
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
|
||||
if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization
|
||||
&& data && m_impl.data()) {
|
||||
Index contiguous_values = 1;
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
@@ -511,7 +510,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
}
|
||||
}
|
||||
// Use memcpy if it's going to be faster than using the regular evaluation.
|
||||
const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
|
||||
const internal::MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
|
||||
if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
|
||||
EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
|
||||
for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
|
||||
@@ -588,7 +587,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
return rslt;
|
||||
}
|
||||
else {
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
|
||||
values[0] = m_impl.coeff(inputIndices[0]);
|
||||
values[packetSize-1] = m_impl.coeff(inputIndices[1]);
|
||||
EIGEN_UNROLL_LOOP
|
||||
@@ -705,7 +704,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
||||
{
|
||||
typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
|
||||
typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
|
||||
static const int NumDims = internal::array_size<Sizes>::value;
|
||||
static constexpr int NumDims = internal::array_size<Sizes>::value;
|
||||
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
@@ -713,17 +712,17 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
typedef Sizes Dimensions;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
|
||||
};
|
||||
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
@@ -811,9 +810,9 @@ struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprTyp
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = array_size<StartIndices>::value;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = array_size<StartIndices>::value;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -857,7 +856,7 @@ class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartI
|
||||
const StartIndices& strides() const { return m_strides; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp)
|
||||
@@ -874,7 +873,7 @@ template<typename StartIndices, typename StopIndices, typename Strides, typename
|
||||
struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
|
||||
{
|
||||
typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
|
||||
static const int NumDims = internal::array_size<Strides>::value;
|
||||
static constexpr int NumDims = internal::array_size<Strides>::value;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
@@ -883,6 +882,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
typedef Strides Dimensions;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
// Alignment can't be guaranteed at compile time since it depends on the
|
||||
// slice offsets and sizes.
|
||||
@@ -890,7 +890,6 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
||||
PacketAccess = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
@@ -1060,14 +1059,14 @@ struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Stride
|
||||
{
|
||||
typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
|
||||
typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
|
||||
static const int NumDims = internal::array_size<Strides>::value;
|
||||
static constexpr int NumDims = internal::array_size<Strides>::value;
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorPadding
|
||||
@@ -28,9 +30,9 @@ struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprT
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -70,7 +72,7 @@ class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, Xpr
|
||||
Scalar padding_value() const { return m_padding_value; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -86,26 +88,26 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
{
|
||||
typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<PaddingDimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<PaddingDimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = true,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = true,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
@@ -510,35 +512,20 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
|
||||
Index index, int dim_index) const {
|
||||
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||
return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
|
||||
index < m_padding[dim_index].first) ||
|
||||
(!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
|
||||
index >= m_dimensions[dim_index] - m_padding[dim_index].second);
|
||||
#else
|
||||
return (index < m_padding[dim_index].first) ||
|
||||
(index >= m_dimensions[dim_index] - m_padding[dim_index].second);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
|
||||
int dim_index) const {
|
||||
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||
return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
|
||||
#else
|
||||
EIGEN_UNUSED_VARIABLE(dim_index);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
|
||||
int dim_index) const {
|
||||
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||
return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
|
||||
#else
|
||||
EIGEN_UNUSED_VARIABLE(dim_index);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -564,7 +551,6 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
const Index initialIndex = index;
|
||||
@@ -622,7 +608,6 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
const Index initialIndex = index;
|
||||
@@ -680,7 +665,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||
{
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
values[i] = coeff(index+i);
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorPatch
|
||||
@@ -28,9 +30,9 @@ struct traits<TensorPatchOp<PatchDim, XprType> > : public traits<XprType>
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions + 1;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions + 1;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -68,7 +70,7 @@ class TensorPatchOp : public TensorBase<TensorPatchOp<PatchDim, XprType>, ReadOn
|
||||
const PatchDim& patch_dims() const { return m_patch_dims; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -83,22 +85,21 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
||||
{
|
||||
typedef TensorPatchOp<PatchDim, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -195,7 +196,6 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0;
|
||||
|
||||
@@ -11,12 +11,12 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
namespace {
|
||||
|
||||
EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t get_random_seed() {
|
||||
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
||||
// We don't support 3d kernels since we currently only use 1 and
|
||||
// 2d kernels.
|
||||
@@ -29,7 +29,7 @@ EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
|
||||
#endif
|
||||
}
|
||||
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state, uint64_t stream) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state, uint64_t stream) {
|
||||
// TODO: Unify with the implementation in the non blocking thread pool.
|
||||
uint64_t current = *state;
|
||||
// Update the internal state
|
||||
@@ -38,14 +38,11 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint6
|
||||
return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
|
||||
}
|
||||
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {
|
||||
seed = seed ? seed : get_random_seed();
|
||||
return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
T RandomToTypeUniform(uint64_t* state, uint64_t stream) {
|
||||
unsigned rnd = PCG_XSH_RS_generator(state, stream);
|
||||
@@ -123,7 +120,7 @@ std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state,
|
||||
|
||||
template <typename T> class UniformRandomGenerator {
|
||||
public:
|
||||
static const bool PacketAccess = true;
|
||||
static constexpr bool PacketAccess = true;
|
||||
|
||||
// Uses the given "seed" if non-zero, otherwise uses a random seed.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
|
||||
@@ -131,7 +128,7 @@ template <typename T> class UniformRandomGenerator {
|
||||
m_state = PCG_XSH_RS_state(seed);
|
||||
#ifdef EIGEN_USE_SYCL
|
||||
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
|
||||
// Therefor, we need two step to initializate the m_state.
|
||||
// Therefore, we need two steps to initializate the m_state.
|
||||
// IN SYCL, the constructor of the functor is s called on the CPU
|
||||
// and we get the clock seed here from the CPU. However, This seed is
|
||||
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
|
||||
@@ -140,7 +137,7 @@ template <typename T> class UniformRandomGenerator {
|
||||
// but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds
|
||||
// the (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction
|
||||
// similar to CUDA Therefore, the thread Id injection is not available at this stage.
|
||||
//However when the operator() is called the thread ID will be avilable. So inside the opeator,
|
||||
//However when the operator() is called the thread ID will be available. So inside the opeator,
|
||||
// we add the thrreadID, BlockId,... (which is equivalent of i)
|
||||
//to the seed and construct the unique m_state per thead similar to cuda.
|
||||
m_exec_once =false;
|
||||
@@ -237,20 +234,20 @@ std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state,
|
||||
|
||||
template <typename T> class NormalRandomGenerator {
|
||||
public:
|
||||
static const bool PacketAccess = true;
|
||||
static constexpr bool PacketAccess = true;
|
||||
|
||||
// Uses the given "seed" if non-zero, otherwise uses a random seed.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
|
||||
m_state = PCG_XSH_RS_state(seed);
|
||||
#ifdef EIGEN_USE_SYCL
|
||||
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
|
||||
// Therefor, we need two steps to initializate the m_state.
|
||||
// Therefore, we need two steps to initializate the m_state.
|
||||
// IN SYCL, the constructor of the functor is s called on the CPU
|
||||
// and we get the clock seed here from the CPU. However, This seed is
|
||||
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
|
||||
// and only available on the Operator() function (which is called on the GPU).
|
||||
// Therefore, the thread Id injection is not available at this stage. However when the operator()
|
||||
//is called the thread ID will be avilable. So inside the opeator,
|
||||
//is called the thread ID will be available. So inside the operator,
|
||||
// we add the thrreadID, BlockId,... (which is equivalent of i)
|
||||
//to the seed and construct the unique m_state per thead similar to cuda.
|
||||
m_exec_once =false;
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
@@ -42,8 +43,8 @@ namespace internal {
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
|
||||
template <class T> struct MakePointer {
|
||||
@@ -107,7 +108,6 @@ struct preserve_inner_most_dims {
|
||||
static const bool value = false;
|
||||
};
|
||||
|
||||
#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template <typename ReducedDims, int NumTensorDims>
|
||||
struct are_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{
|
||||
static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>();
|
||||
@@ -136,7 +136,6 @@ struct preserve_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{
|
||||
static const bool tmp2 = index_statically_lt<ReducedDims>(array_size<ReducedDims>::value - 1, NumTensorDims - 1);
|
||||
static const bool value = tmp1 & tmp2;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
template <int DimIndex, typename Self, typename Op>
|
||||
@@ -166,8 +165,12 @@ struct GenericDimReducer<-1, Self, Op> {
|
||||
};
|
||||
|
||||
template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess),
|
||||
bool UseTreeReduction = (!Self::ReducerTraits::IsStateful &&
|
||||
!Self::ReducerTraits::IsExactlyAssociative)>
|
||||
bool UseTreeReduction = (!Self::ReducerTraits::IsStateful &&
|
||||
!Self::ReducerTraits::IsExactlyAssociative &&
|
||||
// GPU threads can quickly run out of stack space
|
||||
// for moderately sized inputs.
|
||||
!Self::RunningOnGPU
|
||||
)>
|
||||
struct InnerMostDimReducer {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
|
||||
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||
@@ -180,42 +183,77 @@ struct InnerMostDimReducer {
|
||||
|
||||
template <typename Self, typename Op>
|
||||
struct InnerMostDimReducer<Self, Op, true, false> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
|
||||
const typename Self::Index packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size;
|
||||
const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
|
||||
typename Self::PacketReturnType paccum = reducer.template initializePacket<typename Self::PacketReturnType>();
|
||||
for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) {
|
||||
reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &paccum);
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer0) {
|
||||
using Index = typename Self::Index;
|
||||
constexpr Index packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size;
|
||||
Index start = 0;
|
||||
typename Self::PacketReturnType paccum0 = reducer0.template initializePacket<typename Self::PacketReturnType>();
|
||||
if (!Self::ReducerTraits::IsStateful && numValuesToReduce >= 4*packetSize) {
|
||||
const Index VectorizedSize4 = (numValuesToReduce / (4*packetSize)) * (4*packetSize);
|
||||
typename Self::PacketReturnType paccum1 = reducer0.template initializePacket<typename Self::PacketReturnType>();
|
||||
typename Self::PacketReturnType paccum2 = reducer0.template initializePacket<typename Self::PacketReturnType>();
|
||||
typename Self::PacketReturnType paccum3 = reducer0.template initializePacket<typename Self::PacketReturnType>();
|
||||
const Index offset0 = firstIndex;
|
||||
const Index offset1 = firstIndex + packetSize;
|
||||
const Index offset2 = firstIndex + 2*packetSize;
|
||||
const Index offset3 = firstIndex + 3*packetSize;
|
||||
for (Index j = 0; j < VectorizedSize4; j += 4*packetSize) {
|
||||
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset0 + j), &paccum0);
|
||||
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset1 + j), &paccum1);
|
||||
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset2 + j), &paccum2);
|
||||
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset3 + j), &paccum3);
|
||||
}
|
||||
reducer0.reducePacket(paccum1, &paccum0);
|
||||
reducer0.reducePacket(paccum2, &paccum0);
|
||||
reducer0.reducePacket(paccum3, &paccum0);
|
||||
start = VectorizedSize4;
|
||||
}
|
||||
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||
for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) {
|
||||
reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
|
||||
if (start <= (numValuesToReduce - packetSize)) {
|
||||
const Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
|
||||
for (Index j = start; j < VectorizedSize; j += packetSize) {
|
||||
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &paccum0);
|
||||
}
|
||||
start = VectorizedSize;
|
||||
}
|
||||
return reducer.finalizeBoth(accum, paccum);
|
||||
typename Self::CoeffReturnType accum = reducer0.initialize();
|
||||
for (Index j = start; j < numValuesToReduce; ++j) {
|
||||
reducer0.reduce(self.m_impl.coeff(firstIndex + j), &accum);
|
||||
}
|
||||
return reducer0.finalizeBoth(accum, paccum0);
|
||||
}
|
||||
};
|
||||
|
||||
#if !defined(EIGEN_HIPCC)
|
||||
static const int kLeafSize = 1024;
|
||||
|
||||
#if !defined(EIGEN_HIPCC)
|
||||
|
||||
// The following implements tree-based reduction, which improves the accuracy
|
||||
// of sum and mean reductions, since each of the n inputs only participates in
|
||||
// O(log n) additions.
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC inline Index LeafSize() { return 1024; }
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline Index LeafSize<half>() { return 200; }
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline Index LeafSize<bfloat16>() { return 128; }
|
||||
|
||||
template <typename Self, typename Op>
|
||||
struct InnerMostDimReducer<Self, Op, false, true> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
|
||||
reduce(const Self& self, typename Self::Index firstIndex,
|
||||
typename Self::Index numValuesToReduce, Op& reducer) {
|
||||
const Index kLeafSize = LeafSize<typename Self::CoeffReturnType>();
|
||||
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||
if (numValuesToReduce > kLeafSize) {
|
||||
const typename Self::Index half = numValuesToReduce / 2;
|
||||
// Recursively reduce the two halves.
|
||||
reducer.reduce(reduce(self, firstIndex, half, reducer), &accum);
|
||||
reducer.reduce(
|
||||
reduce(self, firstIndex + half, numValuesToReduce - half, reducer),
|
||||
&accum);
|
||||
return reducer.finalize(accum);
|
||||
} else {
|
||||
for (typename Self::Index j = 0; j < numValuesToReduce; ++j) {
|
||||
reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
|
||||
}
|
||||
return InnerMostDimReducer<Self, Op, false, false>::reduce(self, firstIndex, numValuesToReduce, reducer);
|
||||
}
|
||||
return reducer.finalize(accum);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -224,6 +262,7 @@ struct InnerMostDimReducer<Self, Op, true, true> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
|
||||
reduce(const Self& self, typename Self::Index firstIndex,
|
||||
typename Self::Index numValuesToReduce, Op& reducer) {
|
||||
const Index kLeafSize = LeafSize<typename Self::CoeffReturnType>();
|
||||
const typename Self::Index packetSize =
|
||||
internal::unpacket_traits<typename Self::PacketReturnType>::size;
|
||||
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||
@@ -242,36 +281,12 @@ struct InnerMostDimReducer<Self, Op, true, true> {
|
||||
}
|
||||
return reducer.finalize(accum);
|
||||
} else {
|
||||
const typename Self::Index UnrollSize =
|
||||
(numValuesToReduce / (2*packetSize)) * 2*packetSize;
|
||||
const typename Self::Index VectorizedSize =
|
||||
(numValuesToReduce / packetSize) * packetSize;
|
||||
typename Self::PacketReturnType paccum =
|
||||
reducer.template initializePacket<typename Self::PacketReturnType>();
|
||||
typename Self::PacketReturnType paccum2 =
|
||||
reducer.template initializePacket<typename Self::PacketReturnType>();
|
||||
for (typename Self::Index j = 0; j < UnrollSize; j += packetSize * 2) {
|
||||
reducer.reducePacket(
|
||||
self.m_impl.template packet<Unaligned>(firstIndex + j), &paccum);
|
||||
reducer.reducePacket(
|
||||
self.m_impl.template packet<Unaligned>(firstIndex + j + packetSize),
|
||||
&paccum2);
|
||||
}
|
||||
for (typename Self::Index j = UnrollSize; j < VectorizedSize; j+= packetSize) {
|
||||
reducer.reducePacket(self.m_impl.template packet<Unaligned>(
|
||||
firstIndex + j), &paccum);
|
||||
}
|
||||
reducer.reducePacket(paccum2, &paccum);
|
||||
for (typename Self::Index j = VectorizedSize; j < numValuesToReduce;
|
||||
++j) {
|
||||
reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
|
||||
}
|
||||
return reducer.finalizeBoth(accum, paccum);
|
||||
return InnerMostDimReducer<Self, Op, true, false>::reduce(self, firstIndex, numValuesToReduce, reducer);
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
|
||||
struct InnerMostDimPreserver {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) {
|
||||
@@ -292,10 +307,37 @@ struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
|
||||
|
||||
template <typename Self, typename Op>
|
||||
struct InnerMostDimPreserver<0, Self, Op, true> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
|
||||
for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) {
|
||||
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0];
|
||||
reducer.reducePacket(self.m_impl.template packet<Unaligned>(input), accum);
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer0, typename Self::PacketReturnType* accum0) {
|
||||
using Index = typename Self::Index;
|
||||
const Index stride = self.m_reducedStrides[0];
|
||||
const Index size = self.m_reducedDims[0];
|
||||
if (!Self::ReducerTraits::IsStateful && size >= 16) {
|
||||
const Index unrolled_size4 = (size / 4) * 4;
|
||||
typename Self::PacketReturnType accum1 = reducer0.template initializePacket<typename Self::PacketReturnType>();
|
||||
typename Self::PacketReturnType accum2 = reducer0.template initializePacket<typename Self::PacketReturnType>();
|
||||
typename Self::PacketReturnType accum3 = reducer0.template initializePacket<typename Self::PacketReturnType>();
|
||||
for (Index j = 0; j < unrolled_size4; j += 4) {
|
||||
const Index input0 = firstIndex + j * stride;
|
||||
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input0), accum0);
|
||||
const Index input1 = firstIndex + (j+1) * stride;
|
||||
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input1), &accum1);
|
||||
const Index input2 = firstIndex + (j+2) * stride;
|
||||
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input2), &accum2);
|
||||
const Index input3 = firstIndex + (j+3) * stride;
|
||||
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input3), &accum3);
|
||||
}
|
||||
reducer0.reducePacket(accum1, accum0);
|
||||
reducer0.reducePacket(accum2, accum0);
|
||||
reducer0.reducePacket(accum3, accum0);
|
||||
for (Index j = unrolled_size4; j < size; ++j) {
|
||||
Index input = firstIndex + j * stride;
|
||||
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input), accum0);
|
||||
}
|
||||
} else {
|
||||
for (Index j = 0; j < size; ++j) {
|
||||
Index input = firstIndex + j * stride;
|
||||
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input), accum0);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -309,7 +351,7 @@ struct InnerMostDimPreserver<-1, Self, Op, true> {
|
||||
// Default full reducer
|
||||
template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
|
||||
struct FullReducer {
|
||||
static const bool HasOptimizedImplementation = false;
|
||||
static constexpr bool HasOptimizedImplementation = false;
|
||||
|
||||
static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::EvaluatorPointerType output) {
|
||||
const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions());
|
||||
@@ -334,8 +376,8 @@ struct FullReducerShard {
|
||||
// Multithreaded full reducer
|
||||
template <typename Self, typename Op, bool Vectorizable>
|
||||
struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
|
||||
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful;
|
||||
static const Index PacketSize =
|
||||
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful;
|
||||
static constexpr Index PacketSize =
|
||||
unpacket_traits<typename Self::PacketReturnType>::size;
|
||||
|
||||
// launch one reducer per thread and accumulate the result.
|
||||
@@ -351,15 +393,14 @@ struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
|
||||
self.m_impl.costPerCoeff(Vectorizable) +
|
||||
TensorOpCost(0, 0, internal::functor_traits<Op>::Cost, Vectorizable,
|
||||
PacketSize);
|
||||
const int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
|
||||
const Index num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
|
||||
num_coeffs, cost, device.numThreads());
|
||||
if (num_threads == 1) {
|
||||
*output =
|
||||
InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer);
|
||||
return;
|
||||
}
|
||||
const Index blocksize =
|
||||
std::floor<Index>(static_cast<float>(num_coeffs) / num_threads);
|
||||
const Index blocksize = num_coeffs / num_threads;
|
||||
const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
|
||||
eigen_assert(num_coeffs >= numblocks * blocksize);
|
||||
|
||||
@@ -393,7 +434,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
|
||||
// Default inner reducer
|
||||
template <typename Self, typename Op, typename Device>
|
||||
struct InnerReducer {
|
||||
static const bool HasOptimizedImplementation = false;
|
||||
static constexpr bool HasOptimizedImplementation = false;
|
||||
|
||||
EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
|
||||
eigen_assert(false && "Not implemented");
|
||||
@@ -404,7 +445,7 @@ struct InnerReducer {
|
||||
// Default outer reducer
|
||||
template <typename Self, typename Op, typename Device>
|
||||
struct OuterReducer {
|
||||
static const bool HasOptimizedImplementation = false;
|
||||
static constexpr bool HasOptimizedImplementation = false;
|
||||
|
||||
EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
|
||||
eigen_assert(false && "Not implemented");
|
||||
@@ -416,7 +457,7 @@ struct OuterReducer {
|
||||
// Default Generic reducer
|
||||
template <typename Self, typename Op, typename Device>
|
||||
struct GenericReducer {
|
||||
static const bool HasOptimizedImplementation = false;
|
||||
static constexpr bool HasOptimizedImplementation = false;
|
||||
|
||||
EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
|
||||
eigen_assert(false && "Not implemented");
|
||||
@@ -458,9 +499,9 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void OuterReductionKernel(R, const S, I_
|
||||
template <typename Op, typename CoeffReturnType>
|
||||
struct ReductionReturnType {
|
||||
#if defined(EIGEN_USE_SYCL)
|
||||
typedef typename remove_const<decltype(std::declval<Op>().initialize())>::type type;
|
||||
typedef std::remove_const_t<decltype(std::declval<Op>().initialize())> type;
|
||||
#else
|
||||
typedef typename remove_const<CoeffReturnType>::type type;
|
||||
typedef std::remove_const_t<CoeffReturnType> type;
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -472,7 +513,7 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType,
|
||||
public:
|
||||
typedef typename Eigen::internal::traits<TensorReductionOp>::Scalar Scalar;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
|
||||
typedef typename Eigen::internal::nested<TensorReductionOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorReductionOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorReductionOp>::Index Index;
|
||||
@@ -510,44 +551,56 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
|
||||
typedef typename XprType::Index Index;
|
||||
typedef ArgType ChildType;
|
||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
|
||||
static const int NumInputDims = internal::array_size<InputDimensions>::value;
|
||||
static const int NumReducedDims = internal::array_size<Dims>::value;
|
||||
static const int NumOutputDims = NumInputDims - NumReducedDims;
|
||||
typedef typename internal::conditional<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> >::type Dimensions;
|
||||
static constexpr int NumInputDims = internal::array_size<InputDimensions>::value;
|
||||
static constexpr int NumReducedDims = internal::array_size<Dims>::value;
|
||||
static constexpr int NumOutputDims = NumInputDims - NumReducedDims;
|
||||
typedef std::conditional_t<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> > Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Self;
|
||||
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
|
||||
static constexpr bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
|
||||
typedef typename internal::ReductionReturnType<Op, typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const Index PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr Index PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
|
||||
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
// Subset of strides of the input tensor for the non-reduced dimensions.
|
||||
// Subset of strides of the input tensor for the non-reduced dimensions.
|
||||
// Indexed by output dimensions.
|
||||
static const int NumPreservedStrides = max_n_1<NumOutputDims>::size;
|
||||
static constexpr int NumPreservedStrides = max_n_1<NumOutputDims>::size;
|
||||
|
||||
// For full reductions
|
||||
#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
|
||||
static constexpr bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
|
||||
static constexpr bool RunningOnSycl = false;
|
||||
#elif defined(EIGEN_USE_SYCL)
|
||||
static constexpr bool RunningOnSycl = internal::is_same<internal::remove_all_t<Device>, Eigen::SyclDevice>::value;
|
||||
static constexpr bool RunningOnGPU = false;
|
||||
#else
|
||||
static constexpr bool RunningOnGPU = false;
|
||||
static constexpr bool RunningOnSycl = false;
|
||||
#endif
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = Self::InputPacketAccess && ReducerTraits::PacketAccess,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
||||
static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
||||
static const bool RunningFullReduction = (NumOutputDims==0);
|
||||
static constexpr bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
||||
static constexpr bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
||||
static constexpr bool RunningFullReduction = (NumOutputDims==0);
|
||||
|
||||
EIGEN_STRONG_INLINE TensorReductionEvaluatorBase(const XprType& op, const Device& device)
|
||||
: m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
|
||||
@@ -578,7 +631,7 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
|
||||
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
|
||||
}
|
||||
} else {
|
||||
m_outputStrides[NumOutputDims - 1] = 1;
|
||||
m_outputStrides[static_cast<size_t>(NumOutputDims - 1)] = 1;
|
||||
for (int i = NumOutputDims - 2; i >= 0; --i) {
|
||||
m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
|
||||
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
|
||||
@@ -625,7 +678,7 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
|
||||
? internal::array_prod(input_dims)
|
||||
: (static_cast<int>(Layout) == static_cast<int>(ColMajor))
|
||||
? m_preservedStrides[0]
|
||||
: m_preservedStrides[NumOutputDims - 1];
|
||||
: m_preservedStrides[static_cast<size_t>(NumOutputDims - 1)];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||
@@ -784,14 +837,13 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions())));
|
||||
|
||||
if (RunningOnGPU && m_result) {
|
||||
return internal::pload<PacketReturnType>(m_result + index);
|
||||
}
|
||||
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
if (ReducingInnerMostDims) {
|
||||
const Index num_values_to_reduce =
|
||||
(static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
|
||||
@@ -950,17 +1002,6 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
|
||||
// Operation to apply for computing the reduction.
|
||||
Op m_reducer;
|
||||
|
||||
// For full reductions
|
||||
#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
|
||||
static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
|
||||
static const bool RunningOnSycl = false;
|
||||
#elif defined(EIGEN_USE_SYCL)
|
||||
static const bool RunningOnSycl = internal::is_same<typename internal::remove_all<Device>::type, Eigen::SyclDevice>::value;
|
||||
static const bool RunningOnGPU = false;
|
||||
#else
|
||||
static const bool RunningOnGPU = false;
|
||||
static const bool RunningOnSycl = false;
|
||||
#endif
|
||||
EvaluatorPointerType m_result;
|
||||
|
||||
const Device EIGEN_DEVICE_REF m_device;
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#warning "Deprecated header file, please either include the main Eigen/CXX11/Tensor header or the respective TensorReductionGpu.h file"
|
||||
#endif
|
||||
|
||||
#include "TensorReductionGpu.h"
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
@@ -98,6 +100,7 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R& reducer) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef EIGEN_GPU_COMPILE_PHASE
|
||||
// reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations
|
||||
template <typename R>
|
||||
__device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reducer) {
|
||||
@@ -107,6 +110,7 @@ __device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reduc
|
||||
atomicReduce(houtput+i,*(haccum+i),reducer);
|
||||
}
|
||||
}
|
||||
#endif // EIGEN_GPU_COMPILE_PHASE
|
||||
#endif // EIGEN_HAS_GPU_FP16
|
||||
|
||||
template <>
|
||||
@@ -213,8 +217,8 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernel(Reducer reducer
|
||||
#ifdef EIGEN_HAS_GPU_FP16
|
||||
template <typename Self,
|
||||
typename Reducer, typename Index>
|
||||
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
|
||||
packet_traits<Eigen::half>::type* scratch) {
|
||||
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(
|
||||
Reducer reducer, const Self input, Index num_coeffs, half* scratch) {
|
||||
eigen_assert(blockDim.x == 1);
|
||||
eigen_assert(gridDim.x == 1);
|
||||
typedef packet_traits<Eigen::half>::type packet_type;
|
||||
@@ -224,15 +228,16 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFlo
|
||||
half2* h2scratch = reinterpret_cast<half2*>(scratch);
|
||||
for (Index i = num_coeffs - packet_remainder; i + 2 <= num_coeffs; i += 2) {
|
||||
*h2scratch =
|
||||
__halves2half2(input.m_impl.coeff(i), input.m_impl.coeff(i + 1));
|
||||
__halves2half2(input.coeff(i), input.coeff(i + 1));
|
||||
h2scratch++;
|
||||
}
|
||||
if ((num_coeffs & 1) != 0) {
|
||||
half lastCoeff = input.m_impl.coeff(num_coeffs - 1);
|
||||
half lastCoeff = input.coeff(num_coeffs - 1);
|
||||
*h2scratch = __halves2half2(lastCoeff, reducer.initialize());
|
||||
}
|
||||
} else {
|
||||
*scratch = reducer.template initializePacket<packet_type>();
|
||||
packet_type reduce = reducer.template initializePacket<packet_type>();
|
||||
internal::pstoreu(scratch, reduce);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -258,8 +263,9 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernelHalfFloat(Reduce
|
||||
|
||||
template <int BlockSize, int NumPerThread, typename Self,
|
||||
typename Reducer, typename Index>
|
||||
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
|
||||
half* output, packet_traits<Eigen::half>::type* scratch) {
|
||||
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(
|
||||
Reducer reducer, const Self input, Index num_coeffs,
|
||||
half* output, half* scratch) {
|
||||
typedef typename packet_traits<Eigen::half>::type PacketType;
|
||||
const int packet_width = unpacket_traits<PacketType>::size;
|
||||
eigen_assert(NumPerThread % packet_width == 0);
|
||||
@@ -273,19 +279,20 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
|
||||
int rem = num_coeffs % packet_width;
|
||||
if (rem != 0) {
|
||||
half2* p_scratch = reinterpret_cast<half2*>(scratch);
|
||||
*scratch = reducer.template initializePacket<PacketType>();
|
||||
pstoreu(scratch, reducer.template initializePacket<PacketType>());
|
||||
for (int i = 0; i < rem / 2; i++) {
|
||||
*p_scratch = __halves2half2(
|
||||
input.m_impl.coeff(num_coeffs - packet_width + 2 * i),
|
||||
input.m_impl.coeff(num_coeffs - packet_width + 2 * i + 1));
|
||||
input.coeff(num_coeffs - packet_width + 2 * i),
|
||||
input.coeff(num_coeffs - packet_width + 2 * i + 1));
|
||||
p_scratch++;
|
||||
}
|
||||
if ((num_coeffs & 1) != 0) {
|
||||
half last = input.m_impl.coeff(num_coeffs - 1);
|
||||
half last = input.coeff(num_coeffs - 1);
|
||||
*p_scratch = __halves2half2(last, reducer.initialize());
|
||||
}
|
||||
} else {
|
||||
*scratch = reducer.template initializePacket<PacketType>();
|
||||
PacketType reduce = reducer.template initializePacket<PacketType>();
|
||||
pstoreu(scratch, reduce);
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
@@ -298,7 +305,7 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
|
||||
for (Index i = 0; i < max_iter; i += BlockSize) {
|
||||
const Index index = first_index + packet_width * i;
|
||||
eigen_assert(index + packet_width < num_coeffs);
|
||||
PacketType val = input.m_impl.template packet<Unaligned>(index);
|
||||
PacketType val = input.template packet<Unaligned>(index);
|
||||
reducer.reducePacket(val, &accum);
|
||||
}
|
||||
|
||||
@@ -337,7 +344,7 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
|
||||
}
|
||||
|
||||
if ((threadIdx.x & (warpSize - 1)) == 0) {
|
||||
atomicReduce(scratch, accum, reducer);
|
||||
atomicReduce(reinterpret_cast<PacketType*>(scratch), accum, reducer);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
@@ -357,17 +364,21 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
|
||||
}
|
||||
|
||||
template <typename Op>
|
||||
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, packet_traits<Eigen::half>::type* scratch) {
|
||||
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, half* scratch) {
|
||||
eigen_assert(threadIdx.x == 1);
|
||||
half2* pscratch = reinterpret_cast<half2*>(scratch);
|
||||
half tmp = __float2half(0.f);
|
||||
typedef packet_traits<Eigen::half>::type packet_type;
|
||||
for (int i = 0; i < unpacket_traits<packet_type>::size; i += 2) {
|
||||
reducer.reduce(__low2half(*pscratch), &tmp);
|
||||
reducer.reduce(__high2half(*pscratch), &tmp);
|
||||
pscratch++;
|
||||
if (unpacket_traits<packet_type>::size == 1) {
|
||||
*output = *scratch;
|
||||
} else {
|
||||
half2* pscratch = reinterpret_cast<half2*>(scratch);
|
||||
half tmp = __float2half(0.f);
|
||||
for (int i = 0; i < unpacket_traits<packet_type>::size; i += 2) {
|
||||
reducer.reduce(__low2half(*pscratch), &tmp);
|
||||
reducer.reduce(__high2half(*pscratch), &tmp);
|
||||
pscratch++;
|
||||
}
|
||||
*output = tmp;
|
||||
}
|
||||
*output = tmp;
|
||||
}
|
||||
|
||||
#endif // EIGEN_HAS_GPU_FP16
|
||||
@@ -383,10 +394,10 @@ struct FullReductionLauncher {
|
||||
template <typename Self, typename Op, typename OutputType, bool PacketAccess>
|
||||
struct FullReductionLauncher<
|
||||
Self, Op, OutputType, PacketAccess,
|
||||
typename internal::enable_if<
|
||||
std::enable_if_t<
|
||||
internal::is_same<float, OutputType>::value ||
|
||||
internal::is_same<double, OutputType>::value,
|
||||
void>::type> {
|
||||
void>> {
|
||||
static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) {
|
||||
|
||||
typedef typename Self::Index Index;
|
||||
@@ -416,13 +427,11 @@ template <typename Self, typename Op>
|
||||
struct FullReductionLauncher<Self, Op, Eigen::half, true> {
|
||||
static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) {
|
||||
typedef typename Self::Index Index;
|
||||
typedef typename packet_traits<Eigen::half>::type PacketType;
|
||||
|
||||
const int block_size = 256;
|
||||
const int num_per_thread = 128;
|
||||
const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
|
||||
PacketType* scratch = static_cast<PacketType*>(device.scratchpad());
|
||||
// half2* scratch = static_cast<half2*>(device.scratchpad());
|
||||
half* scratch = static_cast<half*>(device.scratchpad());
|
||||
|
||||
if (num_blocks > 1) {
|
||||
// We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there
|
||||
@@ -449,12 +458,12 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
||||
// so reduce the scope of the optimized version of the code to the simple cases
|
||||
// of doubles, floats and half floats
|
||||
#ifdef EIGEN_HAS_GPU_FP16
|
||||
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||
internal::is_same<typename Self::CoeffReturnType, double>::value ||
|
||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||
#else // EIGEN_HAS_GPU_FP16
|
||||
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||
internal::is_same<typename Self::CoeffReturnType, double>::value);
|
||||
#endif // EIGEN_HAS_GPU_FP16
|
||||
@@ -755,10 +764,10 @@ struct InnerReductionLauncher {
|
||||
template <typename Self, typename Op, typename OutputType, bool PacketAccess>
|
||||
struct InnerReductionLauncher<
|
||||
Self, Op, OutputType, PacketAccess,
|
||||
typename internal::enable_if<
|
||||
std::enable_if_t<
|
||||
internal::is_same<float, OutputType>::value ||
|
||||
internal::is_same<double, OutputType>::value,
|
||||
void>::type> {
|
||||
void>> {
|
||||
static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
|
||||
typedef typename Self::Index Index;
|
||||
|
||||
@@ -838,12 +847,12 @@ struct InnerReducer<Self, Op, GpuDevice> {
|
||||
// so reduce the scope of the optimized version of the code to the simple case
|
||||
// of floats and half floats.
|
||||
#ifdef EIGEN_HAS_GPU_FP16
|
||||
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||
internal::is_same<typename Self::CoeffReturnType, double>::value ||
|
||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||
#else // EIGEN_HAS_GPU_FP16
|
||||
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||
internal::is_same<typename Self::CoeffReturnType, double>::value);
|
||||
#endif // EIGEN_HAS_GPU_FP16
|
||||
@@ -900,7 +909,7 @@ struct OuterReducer<Self, Op, GpuDevice> {
|
||||
// Unfortunately nvidia doesn't support well exotic types such as complex,
|
||||
// so reduce the scope of the optimized version of the code to the simple case
|
||||
// of floats.
|
||||
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||
internal::is_same<typename Self::CoeffReturnType, double>::value);
|
||||
template <typename Device, typename OutputType>
|
||||
|
||||
@@ -27,6 +27,8 @@
|
||||
|
||||
#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
|
||||
#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace TensorSycl {
|
||||
namespace internal {
|
||||
@@ -125,9 +127,8 @@ class FullReductionKernelFunctor {
|
||||
typedef typename OpDef::type Op;
|
||||
typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
|
||||
typedef typename Evaluator::PacketReturnType PacketReturnType;
|
||||
typedef
|
||||
typename ::Eigen::internal::conditional<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess),
|
||||
PacketReturnType, CoeffReturnType>::type OutType;
|
||||
typedef std::conditional_t<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess),
|
||||
PacketReturnType, CoeffReturnType> OutType;
|
||||
typedef cl::sycl::accessor<OutType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
|
||||
LocalAccessor;
|
||||
LocalAccessor scratch;
|
||||
@@ -143,7 +144,7 @@ class FullReductionKernelFunctor {
|
||||
void operator()(cl::sycl::nd_item<1> itemID) { compute_reduction(itemID); }
|
||||
|
||||
template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<Vect>::type compute_reduction(
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<Vect> compute_reduction(
|
||||
const cl::sycl::nd_item<1> &itemID) {
|
||||
auto output_ptr = final_output.get_pointer();
|
||||
Index VectorizedRange = (rng / Evaluator::PacketSize) * Evaluator::PacketSize;
|
||||
@@ -182,7 +183,7 @@ class FullReductionKernelFunctor {
|
||||
}
|
||||
|
||||
template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!Vect>::type compute_reduction(
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!Vect> compute_reduction(
|
||||
const cl::sycl::nd_item<1> &itemID) {
|
||||
auto output_ptr = final_output.get_pointer();
|
||||
Index globalid = itemID.get_global_id(0);
|
||||
@@ -481,7 +482,7 @@ struct FullReducer<Self, Op, Eigen::SyclDevice, Vectorizable> {
|
||||
static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
|
||||
static EIGEN_CONSTEXPR int PacketSize = Self::PacketAccess ? Self::PacketSize : 1;
|
||||
static void run(const Self &self, Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType data) {
|
||||
typedef typename conditional<Self::PacketAccess, typename Self::PacketReturnType, CoeffReturnType>::type OutType;
|
||||
typedef std::conditional_t<Self::PacketAccess, typename Self::PacketReturnType, CoeffReturnType> OutType;
|
||||
static_assert(!((EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1) &
|
||||
(EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1 - 1)),
|
||||
"The Local thread size must be a power of 2 for the reduction "
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_REF_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_REF_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -96,13 +98,13 @@ class TensorLazyEvaluatorWritable : public TensorLazyEvaluatorReadOnly<Dimension
|
||||
};
|
||||
|
||||
template <typename Dimensions, typename Expr, typename Device>
|
||||
class TensorLazyEvaluator : public internal::conditional<bool(internal::is_lvalue<Expr>::value),
|
||||
class TensorLazyEvaluator : public std::conditional_t<bool(internal::is_lvalue<Expr>::value),
|
||||
TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
|
||||
TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type {
|
||||
TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> > {
|
||||
public:
|
||||
typedef typename internal::conditional<bool(internal::is_lvalue<Expr>::value),
|
||||
TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
|
||||
TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type Base;
|
||||
typedef std::conditional_t<bool(internal::is_lvalue<Expr>::value),
|
||||
TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
|
||||
TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> > Base;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
|
||||
TensorLazyEvaluator(const Expr& expr, const Device& device) : Base(expr, device) {
|
||||
@@ -135,15 +137,15 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
|
||||
typedef Scalar* PointerType;
|
||||
typedef PointerType PointerArgType;
|
||||
|
||||
static const Index NumIndices = PlainObjectType::NumIndices;
|
||||
static constexpr Index NumIndices = PlainObjectType::NumIndices;
|
||||
typedef typename PlainObjectType::Dimensions Dimensions;
|
||||
|
||||
static constexpr int Layout = PlainObjectType::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = PlainObjectType::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -172,7 +174,7 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
|
||||
unrefEvaluator();
|
||||
}
|
||||
|
||||
TensorRef(const TensorRef& other) : m_evaluator(other.m_evaluator) {
|
||||
TensorRef(const TensorRef& other) : TensorBase<TensorRef<PlainObjectType> >(other), m_evaluator(other.m_evaluator) {
|
||||
eigen_assert(m_evaluator->refCount() > 0);
|
||||
m_evaluator->incrRefCount();
|
||||
}
|
||||
@@ -204,7 +206,6 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
|
||||
return m_evaluator->coeff(index);
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar operator()(Index firstIndex, IndexTypes... otherIndices) const
|
||||
{
|
||||
@@ -219,85 +220,6 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
|
||||
const array<Index, num_indices> indices{{firstIndex, otherIndices...}};
|
||||
return coeffRef(indices);
|
||||
}
|
||||
#else
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1) const
|
||||
{
|
||||
array<Index, 2> indices;
|
||||
indices[0] = i0;
|
||||
indices[1] = i1;
|
||||
return coeff(indices);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2) const
|
||||
{
|
||||
array<Index, 3> indices;
|
||||
indices[0] = i0;
|
||||
indices[1] = i1;
|
||||
indices[2] = i2;
|
||||
return coeff(indices);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3) const
|
||||
{
|
||||
array<Index, 4> indices;
|
||||
indices[0] = i0;
|
||||
indices[1] = i1;
|
||||
indices[2] = i2;
|
||||
indices[3] = i3;
|
||||
return coeff(indices);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
|
||||
{
|
||||
array<Index, 5> indices;
|
||||
indices[0] = i0;
|
||||
indices[1] = i1;
|
||||
indices[2] = i2;
|
||||
indices[3] = i3;
|
||||
indices[4] = i4;
|
||||
return coeff(indices);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1)
|
||||
{
|
||||
array<Index, 2> indices;
|
||||
indices[0] = i0;
|
||||
indices[1] = i1;
|
||||
return coeffRef(indices);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2)
|
||||
{
|
||||
array<Index, 3> indices;
|
||||
indices[0] = i0;
|
||||
indices[1] = i1;
|
||||
indices[2] = i2;
|
||||
return coeffRef(indices);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
|
||||
{
|
||||
array<Index, 4> indices;
|
||||
indices[0] = i0;
|
||||
indices[1] = i1;
|
||||
indices[2] = i2;
|
||||
indices[3] = i3;
|
||||
return coeffRef(indices);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2, Index i3, Index i4)
|
||||
{
|
||||
array<Index, 5> indices;
|
||||
indices[0] = i0;
|
||||
indices[1] = i1;
|
||||
indices[2] = i2;
|
||||
indices[3] = i3;
|
||||
indices[4] = i4;
|
||||
return coeffRef(indices);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <std::size_t NumIndices> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar coeff(const array<Index, NumIndices>& indices) const
|
||||
@@ -374,12 +296,12 @@ struct TensorEvaluator<const TensorRef<Derived>, Device>
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorRef<Derived>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorRef<Derived>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorReverse
|
||||
@@ -28,9 +30,9 @@ struct traits<TensorReverseOp<ReverseDimensions,
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -71,7 +73,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
|
||||
const ReverseDimensions& reverse() const { return m_reverse_dims; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReverseOp)
|
||||
@@ -88,21 +90,21 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
||||
{
|
||||
typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<ReverseDimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<ReverseDimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = NumDims > 0,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -213,12 +215,11 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
PacketReturnType packet(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
// TODO(ndjaitly): write a better packing routine that uses
|
||||
// local structure.
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType>
|
||||
values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
@@ -413,15 +414,15 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
|
||||
Device> Base;
|
||||
typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<ReverseDimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<ReverseDimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -431,7 +432,7 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
@@ -446,7 +447,6 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
|
||||
|
||||
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketReturnType& x) {
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
// This code is pilfered from TensorMorphing.h
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -21,9 +23,9 @@ struct traits<TensorScanOp<Op, XprType> >
|
||||
typedef traits<XprType> XprTraits;
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -379,21 +381,21 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
||||
typedef typename XprType::Index Index;
|
||||
typedef const ArgType ChildTypeNoConst;
|
||||
typedef const ArgType ChildType;
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self;
|
||||
typedef StorageMemory<Scalar, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
* buffer is given as an input and all the threads within a work-group scan and
|
||||
* reduces the boundaries between the blocks (generated from the previous
|
||||
* kernel). and write the data on the temporary buffer. If the second kernel is
|
||||
* required, the third and final kerenl (ScanAdjustmentKernelFunctor) will
|
||||
* required, the third and final kernel (ScanAdjustmentKernelFunctor) will
|
||||
* adjust the final result into the output buffer.
|
||||
* The original algorithm for the parallel prefix sum can be found here:
|
||||
*
|
||||
@@ -37,6 +37,8 @@
|
||||
#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP
|
||||
#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace TensorSycl {
|
||||
namespace internal {
|
||||
@@ -105,27 +107,27 @@ struct ScanKernelFunctor {
|
||||
inclusive(inclusive_) {}
|
||||
|
||||
template <scan_step sst = stp, typename Input>
|
||||
typename ::Eigen::internal::enable_if<sst == scan_step::first, CoeffReturnType>::type EIGEN_DEVICE_FUNC
|
||||
std::enable_if_t<sst == scan_step::first, CoeffReturnType> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE
|
||||
read(const Input &inpt, Index global_id) {
|
||||
return inpt.coeff(global_id);
|
||||
}
|
||||
|
||||
template <scan_step sst = stp, typename Input>
|
||||
typename ::Eigen::internal::enable_if<sst != scan_step::first, CoeffReturnType>::type EIGEN_DEVICE_FUNC
|
||||
std::enable_if_t<sst != scan_step::first, CoeffReturnType> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE
|
||||
read(const Input &inpt, Index global_id) {
|
||||
return inpt[global_id];
|
||||
}
|
||||
|
||||
template <scan_step sst = stp, typename InclusiveOp>
|
||||
typename ::Eigen::internal::enable_if<sst == scan_step::first>::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
std::enable_if_t<sst == scan_step::first> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
first_step_inclusive_Operation(InclusiveOp inclusive_op) {
|
||||
inclusive_op();
|
||||
}
|
||||
|
||||
template <scan_step sst = stp, typename InclusiveOp>
|
||||
typename ::Eigen::internal::enable_if<sst != scan_step::first>::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
std::enable_if_t<sst != scan_step::first> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
first_step_inclusive_Operation(InclusiveOp) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) {
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorShuffling
|
||||
@@ -28,9 +30,9 @@ struct traits<TensorShufflingOp<Shuffle, XprType> > : public traits<XprType>
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -69,7 +71,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
|
||||
const Shuffle& shufflePermutation() const { return m_shuffle; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorShufflingOp)
|
||||
@@ -88,26 +90,26 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Self;
|
||||
typedef TensorShufflingOp<Shuffle, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
@@ -194,7 +196,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
struct PacketLoader {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
static PacketReturnType Run(const Self& self, Index index) {
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
values[i] = self.coeff(index + i);
|
||||
@@ -211,7 +213,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
if (self.m_is_identity) {
|
||||
return self.m_impl.template packet<LoadMode>(index);
|
||||
} else {
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
values[i] = self.coeff(index + i);
|
||||
@@ -225,8 +227,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
|
||||
eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
|
||||
return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*this, index);
|
||||
}
|
||||
|
||||
@@ -255,7 +256,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool root_of_expr_ast = false) const {
|
||||
assert(m_impl.data() != NULL);
|
||||
eigen_assert(m_impl.data() != NULL);
|
||||
|
||||
typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
|
||||
TensorBlockIO;
|
||||
@@ -363,23 +364,23 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
|
||||
typedef TensorShufflingOp<Shuffle, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
typedef std::remove_const_t<Scalar> ScalarNoConst;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
@@ -397,9 +398,7 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
template <int StoreMode> EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketReturnType& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
#define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \internal
|
||||
@@ -39,10 +41,10 @@ template<typename T, typename FixedDimensions, int Options_>
|
||||
class TensorStorage
|
||||
{
|
||||
private:
|
||||
static const std::size_t Size = FixedDimensions::total_size;
|
||||
static constexpr std::size_t Size = FixedDimensions::total_size;
|
||||
|
||||
// Allocate an array of size at least one to prevent compiler warnings.
|
||||
static const std::size_t MinSize = max_n_1<Size>::size;
|
||||
static constexpr std::size_t MinSize = max_n_1<Size>::size;
|
||||
EIGEN_ALIGN_MAX T m_data[MinSize];
|
||||
|
||||
public:
|
||||
@@ -55,17 +57,14 @@ class TensorStorage
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const T *data() const { return m_data; }
|
||||
|
||||
static EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const FixedDimensions& dimensions()
|
||||
{
|
||||
static const FixedDimensions* singleton_dimensions = new FixedDimensions();
|
||||
return *singleton_dimensions;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const FixedDimensions dimensions() const { return FixedDimensions(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE DenseIndex size() const { return Size; }
|
||||
};
|
||||
|
||||
|
||||
// pure dynamic
|
||||
template<typename T, typename IndexType, int NumIndices_, int Options_>
|
||||
class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
|
||||
@@ -86,12 +85,10 @@ class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_dimensions(dimensions)
|
||||
{ EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN }
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template <typename... DenseIndex>
|
||||
EIGEN_DEVICE_FUNC TensorStorage(DenseIndex... indices) : m_dimensions(indices...) {
|
||||
m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(m_dimensions));
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorStorage(const Self& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(other.m_dimensions)))
|
||||
@@ -108,7 +105,6 @@ class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
|
||||
return *this;
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC TensorStorage(Self&& other) : TensorStorage()
|
||||
{
|
||||
*this = std::move(other);
|
||||
@@ -120,7 +116,6 @@ class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
|
||||
numext::swap(m_dimensions, other.m_dimensions);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); }
|
||||
EIGEN_DEVICE_FUNC void swap(Self& other)
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorStriding
|
||||
@@ -28,9 +30,9 @@ struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
};
|
||||
|
||||
@@ -69,7 +71,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
|
||||
const Strides& strides() const { return m_dims; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingOp)
|
||||
@@ -86,21 +88,21 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
||||
{
|
||||
typedef TensorStridingOp<Strides, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -193,7 +195,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
||||
return rslt;
|
||||
}
|
||||
else {
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
values[0] = m_impl.coeff(inputIndices[0]);
|
||||
values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
|
||||
EIGEN_UNROLL_LOOP
|
||||
@@ -265,14 +267,14 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
|
||||
typedef TensorStridingOp<Strides, ArgType> XprType;
|
||||
typedef TensorEvaluator<const XprType, Device> Base;
|
||||
// typedef typename XprType::Index Index;
|
||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
// typedef DSizes<Index, NumDims> Dimensions;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -284,7 +286,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
|
||||
{
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRACE_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_TRACE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorTrace
|
||||
@@ -30,9 +32,9 @@ struct traits<TensorTraceOp<Dims, XprType> > : public traits<XprType>
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
};
|
||||
|
||||
template<typename Dims, typename XprType>
|
||||
@@ -69,7 +71,7 @@ class TensorTraceOp : public TensorBase<TensorTraceOp<Dims, XprType> >
|
||||
const Dims& dims() const { return m_dims; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const typename internal::remove_all<typename XprType::Nested>::type& expression() const { return m_xpr; }
|
||||
const internal::remove_all_t<typename XprType::Nested>& expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
typename XprType::Nested m_xpr;
|
||||
@@ -82,24 +84,24 @@ template<typename Dims, typename ArgType, typename Device>
|
||||
struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
|
||||
{
|
||||
typedef TensorTraceOp<Dims, ArgType> XprType;
|
||||
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static const int NumReducedDims = internal::array_size<Dims>::value;
|
||||
static const int NumOutputDims = NumInputDims - NumReducedDims;
|
||||
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumReducedDims = internal::array_size<Dims>::value;
|
||||
static constexpr int NumOutputDims = NumInputDims - NumReducedDims;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef DSizes<Index, NumOutputDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
static constexpr int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -134,6 +136,7 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(num_distinct_reduce_dims);
|
||||
eigen_assert(num_distinct_reduce_dims == NumReducedDims);
|
||||
|
||||
// Compute the dimensions of the result.
|
||||
@@ -243,11 +246,9 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {
|
||||
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
|
||||
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
values[i] = coeff(index + i);
|
||||
}
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
@@ -50,8 +52,8 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
|
||||
typedef Scalar_ Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef IndexType_ Index;
|
||||
static const int NumDimensions = NumIndices_;
|
||||
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
|
||||
static constexpr int NumDimensions = NumIndices_;
|
||||
static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
|
||||
enum {
|
||||
Options = Options_,
|
||||
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit)
|
||||
@@ -69,8 +71,8 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
|
||||
typedef Scalar_ Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef IndexType_ Index;
|
||||
static const int NumDimensions = array_size<Dimensions>::value;
|
||||
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
|
||||
static constexpr int NumDimensions = array_size<Dimensions>::value;
|
||||
static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
|
||||
enum {
|
||||
Options = Options_,
|
||||
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit)
|
||||
@@ -90,8 +92,8 @@ struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> >
|
||||
typedef typename BaseTraits::Scalar Scalar;
|
||||
typedef typename BaseTraits::StorageKind StorageKind;
|
||||
typedef typename BaseTraits::Index Index;
|
||||
static const int NumDimensions = BaseTraits::NumDimensions;
|
||||
static const int Layout = BaseTraits::Layout;
|
||||
static constexpr int NumDimensions = BaseTraits::NumDimensions;
|
||||
static constexpr int Layout = BaseTraits::Layout;
|
||||
enum {
|
||||
Options = Options_,
|
||||
Flags = BaseTraits::Flags
|
||||
@@ -112,8 +114,8 @@ struct traits<TensorRef<PlainObjectType> >
|
||||
typedef typename BaseTraits::Scalar Scalar;
|
||||
typedef typename BaseTraits::StorageKind StorageKind;
|
||||
typedef typename BaseTraits::Index Index;
|
||||
static const int NumDimensions = BaseTraits::NumDimensions;
|
||||
static const int Layout = BaseTraits::Layout;
|
||||
static constexpr int NumDimensions = BaseTraits::NumDimensions;
|
||||
static constexpr int Layout = BaseTraits::Layout;
|
||||
enum {
|
||||
Options = BaseTraits::Options,
|
||||
Flags = BaseTraits::Flags
|
||||
@@ -122,16 +124,16 @@ struct traits<TensorRef<PlainObjectType> >
|
||||
};
|
||||
|
||||
|
||||
template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
|
||||
struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
|
||||
template<typename Scalar_, int NumIndices_, int Options, typename IndexType_>
|
||||
struct eval<Tensor<Scalar_, NumIndices_, Options, IndexType_>, Eigen::Dense>
|
||||
{
|
||||
typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
|
||||
typedef const Tensor<Scalar_, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
|
||||
};
|
||||
|
||||
template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
|
||||
struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
|
||||
template<typename Scalar_, int NumIndices_, int Options, typename IndexType_>
|
||||
struct eval<const Tensor<Scalar_, NumIndices_, Options, IndexType_>, Eigen::Dense>
|
||||
{
|
||||
typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
|
||||
typedef const Tensor<Scalar_, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
|
||||
};
|
||||
|
||||
template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
|
||||
@@ -254,10 +256,10 @@ struct nested<const TensorRef<PlainObjectType> >
|
||||
// the SAME case.
|
||||
// When the stride is 1, we have the simplified case R'=R-K+1, C'=C-K+1, Pr=0,
|
||||
// Pc=0.
|
||||
typedef enum {
|
||||
enum PaddingType {
|
||||
PADDING_VALID = 1,
|
||||
PADDING_SAME = 2
|
||||
} PaddingType;
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
@@ -55,7 +57,7 @@ struct TensorUInt128
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
explicit TensorUInt128(const T& x) : high(0), low(x) {
|
||||
eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest()));
|
||||
eigen_assert((static_cast<std::conditional_t<sizeof(T) == 8, uint64_t, uint32_t>>(x) <= NumTraits<uint64_t>::highest()));
|
||||
eigen_assert(x >= 0);
|
||||
}
|
||||
|
||||
@@ -78,14 +80,14 @@ template <typename HL, typename LL, typename HR, typename LR>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
{
|
||||
return (lhs.high == rhs.high) & (lhs.low == rhs.low);
|
||||
return (lhs.high == rhs.high) && (lhs.low == rhs.low);
|
||||
}
|
||||
|
||||
template <typename HL, typename LL, typename HR, typename LR>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
{
|
||||
return (lhs.high != rhs.high) | (lhs.low != rhs.low);
|
||||
return (lhs.high != rhs.high) || (lhs.low != rhs.low);
|
||||
}
|
||||
|
||||
template <typename HL, typename LL, typename HR, typename LR>
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorVolumePatch
|
||||
@@ -26,14 +28,14 @@ namespace internal {
|
||||
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
|
||||
struct traits<TensorVolumePatchOp<Planes, Rows, Cols, XprType> > : public traits<XprType>
|
||||
{
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
|
||||
typedef traits<XprType> XprTraits;
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions + 1;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
typedef std::remove_reference_t<Nested> Nested_;
|
||||
static constexpr int NumDimensions = XprTraits::NumDimensions + 1;
|
||||
static constexpr int Layout = XprTraits::Layout;
|
||||
typedef typename XprTraits::PointerType PointerType;
|
||||
|
||||
};
|
||||
@@ -135,7 +137,7 @@ class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows,
|
||||
Scalar padding_value() const { return m_padding_value; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
const internal::remove_all_t<typename XprType::Nested>&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -170,22 +172,22 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
||||
{
|
||||
typedef TensorVolumePatchOp<Planes, Rows, Cols, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static const int NumDims = NumInputDims + 1;
|
||||
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static constexpr int NumDims = NumInputDims + 1;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
RawAccess = false
|
||||
};
|
||||
@@ -419,7 +421,6 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 ||
|
||||
@@ -543,7 +544,7 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||
{
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
values[i] = coeff(index+i);
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
|
||||
#define EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
class DynamicSGroup
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE_H
|
||||
#error "Please include unsupported/Eigen/CXX11/TensorSymmetry instead of including headers inside the src directory directly."
|
||||
#endif
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
|
||||
#define EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
|
||||
#define EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
enum {
|
||||
@@ -237,11 +239,11 @@ struct tensor_symmetry_pre_analysis<NumIndices, Gen_, Gens_...>
|
||||
typedef tensor_static_symgroup_if<(sizeof...(Gens_) + 1 <= max_static_generators), NumIndices, Gen_, Gens_...> helper;
|
||||
constexpr static std::size_t possible_size = helper::size;
|
||||
|
||||
typedef typename conditional<
|
||||
typedef std::conditional_t<
|
||||
possible_size == 0 || possible_size >= max_static_elements,
|
||||
DynamicSGroupFromTemplateArgs<Gen_, Gens_...>,
|
||||
typename helper::type
|
||||
>::type root_type;
|
||||
> root_type;
|
||||
};
|
||||
|
||||
template<bool instantiate, std::size_t NumIndices, typename... Gens>
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
|
||||
#define EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
|
||||
|
||||
#include "../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -126,11 +128,11 @@ template<
|
||||
>
|
||||
struct strip_identities<Equality, id, type_list<t, ts...>>
|
||||
{
|
||||
typedef typename conditional<
|
||||
typedef std::conditional_t<
|
||||
Equality<id, t>::value,
|
||||
typename strip_identities<Equality, id, type_list<ts...>>::type,
|
||||
typename concat<type_list<t>, typename strip_identities<Equality, id, type_list<ts...>>::type>::type
|
||||
>::type type;
|
||||
> type;
|
||||
constexpr static int global_flags = Equality<id, t>::global_flags | strip_identities<Equality, id, type_list<ts...>>::global_flags;
|
||||
};
|
||||
|
||||
@@ -637,21 +639,21 @@ struct enumerate_group_elements_noid<Multiply, Equality, id, type_list<>, initia
|
||||
* \tparam Equality The equality check operation that checks if two group elements
|
||||
* are equal to another.
|
||||
* \tparam id The identity element
|
||||
* \tparam _generators A list of (possibly redundant) generators of the group
|
||||
* \tparam Generators_ A list of (possibly redundant) generators of the group
|
||||
*/
|
||||
template<
|
||||
template<typename, typename> class Multiply,
|
||||
template<typename, typename> class Equality,
|
||||
typename id,
|
||||
typename _generators
|
||||
typename Generators_
|
||||
>
|
||||
struct enumerate_group_elements
|
||||
: public enumerate_group_elements_noid<
|
||||
Multiply,
|
||||
Equality,
|
||||
id,
|
||||
typename strip_identities<Equality, id, _generators>::type,
|
||||
strip_identities<Equality, id, _generators>::global_flags
|
||||
typename strip_identities<Equality, id, Generators_>::type,
|
||||
strip_identities<Equality, id, Generators_>::global_flags
|
||||
>
|
||||
{
|
||||
};
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
#ifndef EIGEN_CXX11_THREADPOOL_BARRIER_H
|
||||
#define EIGEN_CXX11_THREADPOOL_BARRIER_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
class Barrier {
|
||||
|
||||
@@ -7,8 +7,10 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
|
||||
#define EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
|
||||
#ifndef EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H
|
||||
#define EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
@@ -85,7 +87,7 @@ class EventCount {
|
||||
CheckState(state, true);
|
||||
uint64_t newstate;
|
||||
if ((state & kSignalMask) != 0) {
|
||||
// Consume the signal and return immidiately.
|
||||
// Consume the signal and return immediately.
|
||||
newstate = state - kWaiterInc - kSignalInc;
|
||||
} else {
|
||||
// Remove this thread from pre-wait counter and add to the waiter stack.
|
||||
@@ -112,7 +114,7 @@ class EventCount {
|
||||
CheckState(state, true);
|
||||
uint64_t newstate = state - kWaiterInc;
|
||||
// We don't know if the thread was also notified or not,
|
||||
// so we should not consume a signal unconditionaly.
|
||||
// so we should not consume a signal unconditionally.
|
||||
// Only if number of waiters is equal to number of signals,
|
||||
// we know that the thread was notified and we must take away the signal.
|
||||
if (((state & kWaiterMask) >> kWaiterShift) ==
|
||||
@@ -246,4 +248,4 @@ class EventCount {
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
|
||||
#endif // EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
#ifndef EIGEN_CXX11_THREADPOOL_MODULE_H
|
||||
#error "Please include unsupported/Eigen/CXX11/ThreadPool instead of including headers inside the src directory directly."
|
||||
#endif
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
|
||||
#define EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template <typename Environment>
|
||||
|
||||
@@ -7,8 +7,10 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
|
||||
#define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
|
||||
#ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H
|
||||
#define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
@@ -233,4 +235,4 @@ class RunQueue {
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
|
||||
#endif // EIGEN_CXX11_THREADPOOL_RUNQUEUE_H
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H
|
||||
#define EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
struct StlThreadEnvironment {
|
||||
|
||||
@@ -18,10 +18,7 @@
|
||||
|
||||
#else
|
||||
|
||||
#if EIGEN_MAX_CPP_VER >= 11 && \
|
||||
((EIGEN_COMP_GNUC && EIGEN_GNUC_AT_LEAST(4, 8)) || \
|
||||
__has_feature(cxx_thread_local) || \
|
||||
(EIGEN_COMP_MSVC >= 1900) )
|
||||
#if ((EIGEN_COMP_GNUC) || __has_feature(cxx_thread_local) || EIGEN_COMP_MSVC )
|
||||
#define EIGEN_THREAD_LOCAL static thread_local
|
||||
#endif
|
||||
|
||||
@@ -62,6 +59,8 @@
|
||||
|
||||
#endif // EIGEN_AVOID_THREAD_LOCAL
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H
|
||||
#define EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// This defines an interface that ThreadPoolDevice can take to use
|
||||
|
||||
@@ -11,10 +11,6 @@
|
||||
#define EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H
|
||||
|
||||
// Try to come up with a portable way to yield
|
||||
#if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7)
|
||||
#define EIGEN_THREAD_YIELD() sched_yield()
|
||||
#else
|
||||
#define EIGEN_THREAD_YIELD() std::this_thread::yield()
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H
|
||||
|
||||
@@ -35,7 +35,8 @@ template<typename T, T... nn>
|
||||
struct numeric_list { constexpr static std::size_t count = sizeof...(nn); };
|
||||
|
||||
template<typename T, T n, T... nn>
|
||||
struct numeric_list<T, n, nn...> { static const std::size_t count = sizeof...(nn) + 1; const static T first_value = n; };
|
||||
struct numeric_list<T, n, nn...> { static constexpr std::size_t count = sizeof...(nn) + 1;
|
||||
static constexpr T first_value = n; };
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/* numeric list constructors
|
||||
@@ -81,7 +82,8 @@ template<typename a, typename... as> struct take<0, type_list<a, as...>>
|
||||
template<> struct take<0, type_list<>> { typedef type_list<> type; };
|
||||
|
||||
template<typename T, int n, T a, T... as> struct take<n, numeric_list<T, a, as...>> : concat<numeric_list<T, a>, typename take<n-1, numeric_list<T, as...>>::type> {};
|
||||
template<typename T, int n> struct take<n, numeric_list<T>> { typedef numeric_list<T> type; };
|
||||
// XXX The following breaks in gcc-11, and is invalid anyways.
|
||||
// template<typename T, int n> struct take<n, numeric_list<T>> { typedef numeric_list<T> type; };
|
||||
template<typename T, T a, T... as> struct take<0, numeric_list<T, a, as...>> { typedef numeric_list<T> type; };
|
||||
template<typename T> struct take<0, numeric_list<T>> { typedef numeric_list<T> type; };
|
||||
|
||||
|
||||
@@ -27,18 +27,6 @@
|
||||
#error GNU C++ Compiler (g++) only supports required C++ features since version 4.6.
|
||||
#endif
|
||||
|
||||
/* Check that the compiler at least claims to support C++11. It might not be sufficient
|
||||
* because the compiler may not implement it correctly, but at least we'll know.
|
||||
* On the other hand, visual studio still doesn't claim to support C++11 although it's
|
||||
* compliant enugh for our purpose.
|
||||
*/
|
||||
#if (EIGEN_COMP_CXXVER < 11)
|
||||
#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
|
||||
#pragma GCC diagnostic error "-Wfatal-errors"
|
||||
#endif
|
||||
#error This library needs at least a C++11 compliant compiler. If you use g++/clang, please enable the -std=c++11 compiler flag. (-std=c++0x on older versions.)
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -10,16 +10,43 @@
|
||||
#ifndef EIGEN_EMULATE_ARRAY_H
|
||||
#define EIGEN_EMULATE_ARRAY_H
|
||||
|
||||
|
||||
|
||||
// The array class is only available starting with cxx11. Emulate our own here
|
||||
// if needed. Beware, msvc still doesn't advertise itself as a c++11 compiler!
|
||||
// Moreover, CUDA doesn't support the STL containers, so we use our own instead.
|
||||
#if (__cplusplus <= 199711L && EIGEN_COMP_MSVC < 1900) || defined(EIGEN_GPUCC) || defined(EIGEN_AVOID_STL_ARRAY)
|
||||
// CUDA doesn't support the STL containers, so we use our own instead.
|
||||
#if defined(EIGEN_GPUCC) || defined(EIGEN_AVOID_STL_ARRAY)
|
||||
|
||||
namespace Eigen {
|
||||
template <typename T, size_t n> class array {
|
||||
|
||||
public:
|
||||
typedef T value_type;
|
||||
typedef T* iterator;
|
||||
typedef const T* const_iterator;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE iterator begin() { return values; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const_iterator begin() const { return values; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE iterator end() { return values + n; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const_iterator end() const { return values + n; }
|
||||
|
||||
|
||||
#if !defined(EIGEN_GPUCC)
|
||||
typedef std::reverse_iterator<iterator> reverse_iterator;
|
||||
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE reverse_iterator rbegin() { return reverse_iterator(end());}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE reverse_iterator rend() { return reverse_iterator(begin()); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE T& operator[] (size_t index) { eigen_internal_assert(index < size()); return values[index]; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -40,6 +67,7 @@ template <typename T, size_t n> class array {
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const T& back() const { return values[n-1]; }
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
static std::size_t size() { return n; }
|
||||
|
||||
@@ -122,13 +150,11 @@ template <typename T, size_t n> class array {
|
||||
values[7] = v8;
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE array(std::initializer_list<T> l) {
|
||||
eigen_assert(l.size() == n);
|
||||
internal::smart_copy(l.begin(), l.end(), values);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@@ -172,12 +198,10 @@ template <typename T> class array<T, 0> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE array() : dummy() { }
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) : dummy() {
|
||||
EIGEN_UNUSED_VARIABLE(l);
|
||||
eigen_assert(l.size() == 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
private:
|
||||
T dummy;
|
||||
@@ -226,6 +250,7 @@ template<class T, std::size_t N> struct array_size<const array<T,N>& > {
|
||||
|
||||
// The compiler supports c++11, and we're not targeting cuda: use std::array as Eigen::array
|
||||
#include <array>
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template <typename T, std::size_t N> using array = std::array<T, N>;
|
||||
|
||||
@@ -29,7 +29,7 @@ namespace Eigen {
|
||||
*/
|
||||
template <typename T>
|
||||
class MaxSizeVector {
|
||||
static const size_t alignment = EIGEN_PLAIN_ENUM_MAX(EIGEN_ALIGNOF(T), sizeof(void*));
|
||||
static const size_t alignment = internal::plain_enum_max(EIGEN_ALIGNOF(T), sizeof(void*));
|
||||
public:
|
||||
// Construct a new MaxSizeVector, reserve n elements.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_FFT_H
|
||||
#define EIGEN_FFT_H
|
||||
#ifndef EIGEN_FFT_MODULE_H
|
||||
#define EIGEN_FFT_MODULE_H
|
||||
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
@@ -29,10 +29,19 @@
|
||||
* The default implementation is based on kissfft. It is a small, free, and
|
||||
* reasonably efficient default.
|
||||
*
|
||||
* There are currently two implementation backend:
|
||||
* There are currently four implementation backend:
|
||||
*
|
||||
* - kissfft(https://github.com/mborgerding/kissfft) : Simple and not so fast, BSD-3-Clause.
|
||||
* It is a mixed-radix Fast Fourier Transform based up on the principle, "Keep It Simple, Stupid."
|
||||
* Notice that:kissfft fails to handle "atypically-sized" inputs(i.e., sizes with large factors),a workaround is using fftw or pocketfft.
|
||||
* - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size.
|
||||
* - MKL (http://en.wikipedia.org/wiki/Math_Kernel_Library) : fastest, commercial -- may be incompatible with Eigen in GPL form.
|
||||
* - MKL (https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html) : fastest, free -- may be incompatible with Eigen in GPL form.
|
||||
* - pocketfft (https://gitlab.mpcdf.mpg.de/mtr/pocketfft) : faster than kissfft, BSD 3-clause.
|
||||
* It is a heavily modified implementation of FFTPack, with the following advantages:
|
||||
* 1.strictly C++11 compliant
|
||||
* 2.more accurate twiddle factor computation
|
||||
* 3.very fast plan generation
|
||||
* 4.worst case complexity for transform sizes with large prime factors is N*log(N), because Bluestein's algorithm is used for these cases
|
||||
*
|
||||
* \section FFTDesign Design
|
||||
*
|
||||
@@ -79,15 +88,21 @@
|
||||
template <typename T> struct default_fft_impl : public internal::fftw_impl<T> {};
|
||||
}
|
||||
#elif defined EIGEN_MKL_DEFAULT
|
||||
// TODO
|
||||
// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form
|
||||
// intel Math Kernel Library: fastest, free -- may be incompatible with Eigen in GPL form
|
||||
# include "src/FFT/ei_imklfft_impl.h"
|
||||
namespace Eigen {
|
||||
template <typename T> struct default_fft_impl : public internal::imklfft_impl {};
|
||||
template <typename T> struct default_fft_impl : public internal::imklfft::imklfft_impl<T> {};
|
||||
}
|
||||
#else
|
||||
#elif defined EIGEN_POCKETFFT_DEFAULT
|
||||
// internal::pocketfft_impl: a heavily modified implementation of FFTPack, with many advantages.
|
||||
# include<pocketfft_hdronly.h>
|
||||
# include"src/FFT/ei_pocketfft_impl.h"
|
||||
namespace Eigen {
|
||||
template <typename T>
|
||||
struct default_fft_impl : public internal::pocketfft_impl<T> {};
|
||||
}
|
||||
#else
|
||||
// internal::kissfft_impl: small, free, reasonably efficient default, derived from kissfft
|
||||
//
|
||||
# include "src/FFT/ei_kissfft_impl.h"
|
||||
namespace Eigen {
|
||||
template <typename T>
|
||||
@@ -195,19 +210,19 @@ class FFT
|
||||
m_impl.fwd(dst,src,static_cast<int>(nfft));
|
||||
}
|
||||
|
||||
/*
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
|
||||
inline
|
||||
void fwd2(Complex * dst, const Complex * src, int n0,int n1)
|
||||
{
|
||||
m_impl.fwd2(dst,src,n0,n1);
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
|
||||
template <typename _Input>
|
||||
template <typename Input_>
|
||||
inline
|
||||
void fwd( std::vector<Complex> & dst, const std::vector<_Input> & src)
|
||||
void fwd( std::vector<Complex> & dst, const std::vector<Input_> & src)
|
||||
{
|
||||
if ( NumTraits<_Input>::IsComplex == 0 && HasFlag(HalfSpectrum) )
|
||||
if ( NumTraits<Input_>::IsComplex == 0 && HasFlag(HalfSpectrum) )
|
||||
dst.resize( (src.size()>>1)+1); // half the bins + Nyquist bin
|
||||
else
|
||||
dst.resize(src.size());
|
||||
@@ -343,19 +358,18 @@ class FFT
|
||||
}
|
||||
}
|
||||
|
||||
template <typename _Output>
|
||||
template <typename Output_>
|
||||
inline
|
||||
void inv( std::vector<_Output> & dst, const std::vector<Complex> & src,Index nfft=-1)
|
||||
void inv( std::vector<Output_> & dst, const std::vector<Complex> & src,Index nfft=-1)
|
||||
{
|
||||
if (nfft<1)
|
||||
nfft = ( NumTraits<_Output>::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size();
|
||||
nfft = ( NumTraits<Output_>::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size();
|
||||
dst.resize( nfft );
|
||||
inv( &dst[0],&src[0],nfft);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
// TODO: multi-dimensional FFTs
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
|
||||
inline
|
||||
void inv2(Complex * dst, const Complex * src, int n0,int n1)
|
||||
{
|
||||
@@ -363,7 +377,8 @@ class FFT
|
||||
if ( HasFlag( Unscaled ) == false)
|
||||
scale(dst,1./(n0*n1),n0*n1);
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
|
||||
|
||||
inline
|
||||
impl_type & impl() {return m_impl;}
|
||||
|
||||
@@ -16,15 +16,61 @@
|
||||
|
||||
|
||||
/**
|
||||
* \defgroup IterativeLinearSolvers_Module Iterative solvers module
|
||||
* \defgroup IterativeLinearSolvers_Module Iterative Solvers module
|
||||
* This module aims to provide various iterative linear and non linear solver algorithms.
|
||||
* It currently provides:
|
||||
* - a constrained conjugate gradient
|
||||
* - a Householder GMRES implementation
|
||||
* - an IDR(s) implementation
|
||||
* - a BiCGSTAB(L) implementation
|
||||
* - a DGMRES implementation
|
||||
* - a MINRES implementation
|
||||
* - a IDRSTABL implementation
|
||||
*
|
||||
* Choosing the best solver for solving \c A \c x = \c b depends a lot on the preconditioner chosen as well as the properties of \c A. The following flowchart might help you.
|
||||
* \dot width=50%
|
||||
* digraph g {
|
||||
* node [ fontname=Arial, fontsize=11];
|
||||
* edge [ fontname=Helvetica, fontsize=10 ];
|
||||
* A1[label="hermitian",shape="box"];
|
||||
* A2[label="positive definite",shape="box"];
|
||||
* CG[shape="plaintext"];
|
||||
* A3[label="ill conditioned",shape="box"];
|
||||
* A4[label="good preconditioner",shape="box"];
|
||||
* A5[label="flexible preconditioner",shape="box"];
|
||||
* A6[label="strongly indefinite",shape="box"];
|
||||
* A8[label="large imaginary eigenvalue",shape="box"];
|
||||
* A7[label="large imaginary eigenvalue",shape="box"];
|
||||
*
|
||||
* SYMMLQ[shape="plaintext"];
|
||||
* MINRES[shape="plaintext"];
|
||||
* GCR[shape="plaintext"];
|
||||
* GMRES[shape="plaintext"];
|
||||
* IDRSTABL[shape="plaintext"];
|
||||
* IDRS[shape="plaintext"];
|
||||
* BICGSTABL[shape="plaintext"];
|
||||
* BICGSTAB[shape="plaintext"];
|
||||
*
|
||||
* A1 -> A2 [label="yes"];
|
||||
* A2 -> CG [label="yes"];
|
||||
* A2 -> A3 [label="no"];
|
||||
* A3 -> SYMMLQ [label="yes"];
|
||||
* A3 -> MINRES [label="no"];
|
||||
*
|
||||
* A1 -> A4 [label="no"];
|
||||
* A4 -> A5 [label="yes"];
|
||||
* A5 -> GCR [label="yes"];
|
||||
* A5 -> GMRES [label="no"];
|
||||
*
|
||||
* A4 -> A6 [label="no"];
|
||||
* A6 -> A8 [label="yes"];
|
||||
* A6 -> A7 [label="no"];
|
||||
* A7 -> BICGSTABL [label="yes"];
|
||||
* A7 -> BICGSTAB [label="no"];
|
||||
* A8 -> IDRSTABL [label="yes"];
|
||||
* A8 -> IDRS [label="no"];
|
||||
* }
|
||||
* \enddot
|
||||
* \code
|
||||
* #include <unsupported/Eigen/IterativeSolvers>
|
||||
* \endcode
|
||||
@@ -41,9 +87,10 @@
|
||||
#include "src/IterativeSolvers/IncompleteLU.h"
|
||||
#include "src/IterativeSolvers/GMRES.h"
|
||||
#include "src/IterativeSolvers/DGMRES.h"
|
||||
//#include "src/IterativeSolvers/SSORPreconditioner.h"
|
||||
#include "src/IterativeSolvers/MINRES.h"
|
||||
#include "src/IterativeSolvers/IDRS.h"
|
||||
#include "src/IterativeSolvers/BiCGSTABL.h"
|
||||
#include "src/IterativeSolvers/IDRSTABL.h"
|
||||
|
||||
#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
|
||||
@@ -10,10 +10,7 @@
|
||||
#define EIGEN_KRONECKER_PRODUCT_MODULE_H
|
||||
|
||||
#include "../../Eigen/Core"
|
||||
|
||||
#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
#include "../../Eigen/src/SparseCore/SparseUtil.h"
|
||||
#include "../../Eigen/SparseCore"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_LEVENBERGMARQUARDT_MODULE
|
||||
#define EIGEN_LEVENBERGMARQUARDT_MODULE
|
||||
#ifndef EIGEN_LEVENBERGMARQUARDT_MODULE_H
|
||||
#define EIGEN_LEVENBERGMARQUARDT_MODULE_H
|
||||
|
||||
// #include <vector>
|
||||
|
||||
@@ -46,4 +46,4 @@
|
||||
|
||||
#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
|
||||
|
||||
#endif // EIGEN_LEVENBERGMARQUARDT_MODULE
|
||||
#endif // EIGEN_LEVENBERGMARQUARDT_MODULE_H
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MATRIX_FUNCTIONS
|
||||
#define EIGEN_MATRIX_FUNCTIONS
|
||||
#ifndef EIGEN_MATRIX_FUNCTIONS_MODULE_H
|
||||
#define EIGEN_MATRIX_FUNCTIONS_MODULE_H
|
||||
|
||||
#include <cfloat>
|
||||
#include <list>
|
||||
@@ -500,5 +500,4 @@ Output: \verbinclude MatrixSquareRoot.out
|
||||
|
||||
*/
|
||||
|
||||
#endif // EIGEN_MATRIX_FUNCTIONS
|
||||
|
||||
#endif // EIGEN_MATRIX_FUNCTIONS_MODULE_H
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user