ADD: added other eigen lib
This commit is contained in:
421
libs/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h
Normal file
421
libs/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h
Normal file
@@ -0,0 +1,421 @@
|
||||
#ifndef EIGEN_ACCELERATESUPPORT_H
|
||||
#define EIGEN_ACCELERATESUPPORT_H
|
||||
|
||||
#include <Accelerate/Accelerate.h>
|
||||
|
||||
#include <Eigen/Sparse>
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
|
||||
class AccelerateImpl;
|
||||
|
||||
/** \ingroup AccelerateSupport_Module
|
||||
* \class AccelerateLLT
|
||||
* \brief A direct Cholesky (LLT) factorization and solver based on Accelerate
|
||||
*
|
||||
* \warning Only single and double precision real scalar types are supported by Accelerate
|
||||
*
|
||||
* \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam UpLo_ additional information about the matrix structure. Default is Lower.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class AccelerateLLT
|
||||
*/
|
||||
template <typename MatrixType, int UpLo = Lower>
|
||||
using AccelerateLLT = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationCholesky, true>;
|
||||
|
||||
/** \ingroup AccelerateSupport_Module
|
||||
* \class AccelerateLDLT
|
||||
* \brief The default Cholesky (LDLT) factorization and solver based on Accelerate
|
||||
*
|
||||
* \warning Only single and double precision real scalar types are supported by Accelerate
|
||||
*
|
||||
* \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam UpLo_ additional information about the matrix structure. Default is Lower.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class AccelerateLDLT
|
||||
*/
|
||||
template <typename MatrixType, int UpLo = Lower>
|
||||
using AccelerateLDLT = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLT, true>;
|
||||
|
||||
/** \ingroup AccelerateSupport_Module
|
||||
* \class AccelerateLDLTUnpivoted
|
||||
* \brief A direct Cholesky-like LDL^T factorization and solver based on Accelerate with only 1x1 pivots and no pivoting
|
||||
*
|
||||
* \warning Only single and double precision real scalar types are supported by Accelerate
|
||||
*
|
||||
* \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam UpLo_ additional information about the matrix structure. Default is Lower.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTUnpivoted
|
||||
*/
|
||||
template <typename MatrixType, int UpLo = Lower>
|
||||
using AccelerateLDLTUnpivoted = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLTUnpivoted, true>;
|
||||
|
||||
/** \ingroup AccelerateSupport_Module
|
||||
* \class AccelerateLDLTSBK
|
||||
* \brief A direct Cholesky (LDLT) factorization and solver based on Accelerate with Supernode Bunch-Kaufman and static pivoting
|
||||
*
|
||||
* \warning Only single and double precision real scalar types are supported by Accelerate
|
||||
*
|
||||
* \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam UpLo_ additional information about the matrix structure. Default is Lower.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTSBK
|
||||
*/
|
||||
template <typename MatrixType, int UpLo = Lower>
|
||||
using AccelerateLDLTSBK = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLTSBK, true>;
|
||||
|
||||
/** \ingroup AccelerateSupport_Module
|
||||
* \class AccelerateLDLTTPP
|
||||
* \brief A direct Cholesky (LDLT) factorization and solver based on Accelerate with full threshold partial pivoting
|
||||
*
|
||||
* \warning Only single and double precision real scalar types are supported by Accelerate
|
||||
*
|
||||
* \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam UpLo_ additional information about the matrix structure. Default is Lower.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTTPP
|
||||
*/
|
||||
template <typename MatrixType, int UpLo = Lower>
|
||||
using AccelerateLDLTTPP = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLTTPP, true>;
|
||||
|
||||
/** \ingroup AccelerateSupport_Module
|
||||
* \class AccelerateQR
|
||||
* \brief A QR factorization and solver based on Accelerate
|
||||
*
|
||||
* \warning Only single and double precision real scalar types are supported by Accelerate
|
||||
*
|
||||
* \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class AccelerateQR
|
||||
*/
|
||||
template <typename MatrixType>
|
||||
using AccelerateQR = AccelerateImpl<MatrixType, 0, SparseFactorizationQR, false>;
|
||||
|
||||
/** \ingroup AccelerateSupport_Module
|
||||
* \class AccelerateCholeskyAtA
|
||||
* \brief A QR factorization and solver based on Accelerate without storing Q (equivalent to A^TA = R^T R)
|
||||
*
|
||||
* \warning Only single and double precision real scalar types are supported by Accelerate
|
||||
*
|
||||
* \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class AccelerateCholeskyAtA
|
||||
*/
|
||||
template <typename MatrixType>
|
||||
using AccelerateCholeskyAtA = AccelerateImpl<MatrixType, 0, SparseFactorizationCholeskyAtA, false>;
|
||||
|
||||
namespace internal {
|
||||
template <typename T>
|
||||
struct AccelFactorizationDeleter {
|
||||
void operator()(T* sym) {
|
||||
if (sym) {
|
||||
SparseCleanup(*sym);
|
||||
delete sym;
|
||||
sym = nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DenseVecT, typename DenseMatT, typename SparseMatT, typename NumFactT>
|
||||
struct SparseTypesTraitBase {
|
||||
typedef DenseVecT AccelDenseVector;
|
||||
typedef DenseMatT AccelDenseMatrix;
|
||||
typedef SparseMatT AccelSparseMatrix;
|
||||
|
||||
typedef SparseOpaqueSymbolicFactorization SymbolicFactorization;
|
||||
typedef NumFactT NumericFactorization;
|
||||
|
||||
typedef AccelFactorizationDeleter<SymbolicFactorization> SymbolicFactorizationDeleter;
|
||||
typedef AccelFactorizationDeleter<NumericFactorization> NumericFactorizationDeleter;
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct SparseTypesTrait {};
|
||||
|
||||
template <>
|
||||
struct SparseTypesTrait<double> : SparseTypesTraitBase<DenseVector_Double, DenseMatrix_Double, SparseMatrix_Double,
|
||||
SparseOpaqueFactorization_Double> {};
|
||||
|
||||
template <>
|
||||
struct SparseTypesTrait<float>
|
||||
: SparseTypesTraitBase<DenseVector_Float, DenseMatrix_Float, SparseMatrix_Float, SparseOpaqueFactorization_Float> {
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
|
||||
class AccelerateImpl : public SparseSolverBase<AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_> > {
|
||||
protected:
|
||||
using Base = SparseSolverBase<AccelerateImpl>;
|
||||
using Base::derived;
|
||||
using Base::m_isInitialized;
|
||||
|
||||
public:
|
||||
using Base::_solve_impl;
|
||||
|
||||
typedef MatrixType_ MatrixType;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
enum { ColsAtCompileTime = Dynamic, MaxColsAtCompileTime = Dynamic };
|
||||
enum { UpLo = UpLo_ };
|
||||
|
||||
using AccelDenseVector = typename internal::SparseTypesTrait<Scalar>::AccelDenseVector;
|
||||
using AccelDenseMatrix = typename internal::SparseTypesTrait<Scalar>::AccelDenseMatrix;
|
||||
using AccelSparseMatrix = typename internal::SparseTypesTrait<Scalar>::AccelSparseMatrix;
|
||||
using SymbolicFactorization = typename internal::SparseTypesTrait<Scalar>::SymbolicFactorization;
|
||||
using NumericFactorization = typename internal::SparseTypesTrait<Scalar>::NumericFactorization;
|
||||
using SymbolicFactorizationDeleter = typename internal::SparseTypesTrait<Scalar>::SymbolicFactorizationDeleter;
|
||||
using NumericFactorizationDeleter = typename internal::SparseTypesTrait<Scalar>::NumericFactorizationDeleter;
|
||||
|
||||
AccelerateImpl() {
|
||||
m_isInitialized = false;
|
||||
|
||||
auto check_flag_set = [](int value, int flag) { return ((value & flag) == flag); };
|
||||
|
||||
if (check_flag_set(UpLo_, Symmetric)) {
|
||||
m_sparseKind = SparseSymmetric;
|
||||
m_triType = (UpLo_ & Lower) ? SparseLowerTriangle : SparseUpperTriangle;
|
||||
} else if (check_flag_set(UpLo_, UnitLower)) {
|
||||
m_sparseKind = SparseUnitTriangular;
|
||||
m_triType = SparseLowerTriangle;
|
||||
} else if (check_flag_set(UpLo_, UnitUpper)) {
|
||||
m_sparseKind = SparseUnitTriangular;
|
||||
m_triType = SparseUpperTriangle;
|
||||
} else if (check_flag_set(UpLo_, StrictlyLower)) {
|
||||
m_sparseKind = SparseTriangular;
|
||||
m_triType = SparseLowerTriangle;
|
||||
} else if (check_flag_set(UpLo_, StrictlyUpper)) {
|
||||
m_sparseKind = SparseTriangular;
|
||||
m_triType = SparseUpperTriangle;
|
||||
} else if (check_flag_set(UpLo_, Lower)) {
|
||||
m_sparseKind = SparseTriangular;
|
||||
m_triType = SparseLowerTriangle;
|
||||
} else if (check_flag_set(UpLo_, Upper)) {
|
||||
m_sparseKind = SparseTriangular;
|
||||
m_triType = SparseUpperTriangle;
|
||||
} else {
|
||||
m_sparseKind = SparseOrdinary;
|
||||
m_triType = (UpLo_ & Lower) ? SparseLowerTriangle : SparseUpperTriangle;
|
||||
}
|
||||
|
||||
m_order = SparseOrderDefault;
|
||||
}
|
||||
|
||||
explicit AccelerateImpl(const MatrixType& matrix) : AccelerateImpl() { compute(matrix); }
|
||||
|
||||
~AccelerateImpl() {}
|
||||
|
||||
inline Index cols() const { return m_nCols; }
|
||||
inline Index rows() const { return m_nRows; }
|
||||
|
||||
ComputationInfo info() const {
|
||||
eigen_assert(m_isInitialized && "Decomposition is not initialized.");
|
||||
return m_info;
|
||||
}
|
||||
|
||||
void analyzePattern(const MatrixType& matrix);
|
||||
|
||||
void factorize(const MatrixType& matrix);
|
||||
|
||||
void compute(const MatrixType& matrix);
|
||||
|
||||
template <typename Rhs, typename Dest>
|
||||
void _solve_impl(const MatrixBase<Rhs>& b, MatrixBase<Dest>& dest) const;
|
||||
|
||||
/** Sets the ordering algorithm to use. */
|
||||
void setOrder(SparseOrder_t order) { m_order = order; }
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
void buildAccelSparseMatrix(const SparseMatrix<T>& a, AccelSparseMatrix& A, std::vector<long>& columnStarts) {
|
||||
const Index nColumnsStarts = a.cols() + 1;
|
||||
|
||||
columnStarts.resize(nColumnsStarts);
|
||||
|
||||
for (Index i = 0; i < nColumnsStarts; i++) columnStarts[i] = a.outerIndexPtr()[i];
|
||||
|
||||
SparseAttributes_t attributes{};
|
||||
attributes.transpose = false;
|
||||
attributes.triangle = m_triType;
|
||||
attributes.kind = m_sparseKind;
|
||||
|
||||
SparseMatrixStructure structure{};
|
||||
structure.attributes = attributes;
|
||||
structure.rowCount = static_cast<int>(a.rows());
|
||||
structure.columnCount = static_cast<int>(a.cols());
|
||||
structure.blockSize = 1;
|
||||
structure.columnStarts = columnStarts.data();
|
||||
structure.rowIndices = const_cast<int*>(a.innerIndexPtr());
|
||||
|
||||
A.structure = structure;
|
||||
A.data = const_cast<T*>(a.valuePtr());
|
||||
}
|
||||
|
||||
void doAnalysis(AccelSparseMatrix& A) {
|
||||
m_numericFactorization.reset(nullptr);
|
||||
|
||||
SparseSymbolicFactorOptions opts{};
|
||||
opts.control = SparseDefaultControl;
|
||||
opts.orderMethod = m_order;
|
||||
opts.order = nullptr;
|
||||
opts.ignoreRowsAndColumns = nullptr;
|
||||
opts.malloc = malloc;
|
||||
opts.free = free;
|
||||
opts.reportError = nullptr;
|
||||
|
||||
m_symbolicFactorization.reset(new SymbolicFactorization(SparseFactor(Solver_, A.structure, opts)));
|
||||
|
||||
SparseStatus_t status = m_symbolicFactorization->status;
|
||||
|
||||
updateInfoStatus(status);
|
||||
|
||||
if (status != SparseStatusOK) m_symbolicFactorization.reset(nullptr);
|
||||
}
|
||||
|
||||
void doFactorization(AccelSparseMatrix& A) {
|
||||
SparseStatus_t status = SparseStatusReleased;
|
||||
|
||||
if (m_symbolicFactorization) {
|
||||
m_numericFactorization.reset(new NumericFactorization(SparseFactor(*m_symbolicFactorization, A)));
|
||||
|
||||
status = m_numericFactorization->status;
|
||||
|
||||
if (status != SparseStatusOK) m_numericFactorization.reset(nullptr);
|
||||
}
|
||||
|
||||
updateInfoStatus(status);
|
||||
}
|
||||
|
||||
protected:
|
||||
void updateInfoStatus(SparseStatus_t status) const {
|
||||
switch (status) {
|
||||
case SparseStatusOK:
|
||||
m_info = Success;
|
||||
break;
|
||||
case SparseFactorizationFailed:
|
||||
case SparseMatrixIsSingular:
|
||||
m_info = NumericalIssue;
|
||||
break;
|
||||
case SparseInternalError:
|
||||
case SparseParameterError:
|
||||
case SparseStatusReleased:
|
||||
default:
|
||||
m_info = InvalidInput;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mutable ComputationInfo m_info;
|
||||
Index m_nRows, m_nCols;
|
||||
std::unique_ptr<SymbolicFactorization, SymbolicFactorizationDeleter> m_symbolicFactorization;
|
||||
std::unique_ptr<NumericFactorization, NumericFactorizationDeleter> m_numericFactorization;
|
||||
SparseKind_t m_sparseKind;
|
||||
SparseTriangle_t m_triType;
|
||||
SparseOrder_t m_order;
|
||||
};
|
||||
|
||||
/** Computes the symbolic and numeric decomposition of matrix \a a */
|
||||
template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
|
||||
void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::compute(const MatrixType& a) {
|
||||
if (EnforceSquare_) eigen_assert(a.rows() == a.cols());
|
||||
|
||||
m_nRows = a.rows();
|
||||
m_nCols = a.cols();
|
||||
|
||||
AccelSparseMatrix A{};
|
||||
std::vector<long> columnStarts;
|
||||
|
||||
buildAccelSparseMatrix(a, A, columnStarts);
|
||||
|
||||
doAnalysis(A);
|
||||
|
||||
if (m_symbolicFactorization) doFactorization(A);
|
||||
|
||||
m_isInitialized = true;
|
||||
}
|
||||
|
||||
/** Performs a symbolic decomposition on the sparsity pattern of matrix \a a.
|
||||
*
|
||||
* This function is particularly useful when solving for several problems having the same structure.
|
||||
*
|
||||
* \sa factorize()
|
||||
*/
|
||||
template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
|
||||
void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::analyzePattern(const MatrixType& a) {
|
||||
if (EnforceSquare_) eigen_assert(a.rows() == a.cols());
|
||||
|
||||
m_nRows = a.rows();
|
||||
m_nCols = a.cols();
|
||||
|
||||
AccelSparseMatrix A{};
|
||||
std::vector<long> columnStarts;
|
||||
|
||||
buildAccelSparseMatrix(a, A, columnStarts);
|
||||
|
||||
doAnalysis(A);
|
||||
|
||||
m_isInitialized = true;
|
||||
}
|
||||
|
||||
/** Performs a numeric decomposition of matrix \a a.
|
||||
*
|
||||
* The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed.
|
||||
*
|
||||
* \sa analyzePattern()
|
||||
*/
|
||||
template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
|
||||
void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::factorize(const MatrixType& a) {
|
||||
eigen_assert(m_symbolicFactorization && "You must first call analyzePattern()");
|
||||
eigen_assert(m_nRows == a.rows() && m_nCols == a.cols());
|
||||
|
||||
if (EnforceSquare_) eigen_assert(a.rows() == a.cols());
|
||||
|
||||
AccelSparseMatrix A{};
|
||||
std::vector<long> columnStarts;
|
||||
|
||||
buildAccelSparseMatrix(a, A, columnStarts);
|
||||
|
||||
doFactorization(A);
|
||||
}
|
||||
|
||||
template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
|
||||
template <typename Rhs, typename Dest>
|
||||
void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::_solve_impl(const MatrixBase<Rhs>& b,
|
||||
MatrixBase<Dest>& x) const {
|
||||
if (!m_numericFactorization) {
|
||||
m_info = InvalidInput;
|
||||
return;
|
||||
}
|
||||
|
||||
eigen_assert(m_nRows == b.rows());
|
||||
eigen_assert(((b.cols() == 1) || b.outerStride() == b.rows()));
|
||||
|
||||
SparseStatus_t status = SparseStatusOK;
|
||||
|
||||
Scalar* b_ptr = const_cast<Scalar*>(b.derived().data());
|
||||
Scalar* x_ptr = const_cast<Scalar*>(x.derived().data());
|
||||
|
||||
AccelDenseMatrix xmat{};
|
||||
xmat.attributes = SparseAttributes_t();
|
||||
xmat.columnCount = static_cast<int>(x.cols());
|
||||
xmat.rowCount = static_cast<int>(x.rows());
|
||||
xmat.columnStride = xmat.rowCount;
|
||||
xmat.data = x_ptr;
|
||||
|
||||
AccelDenseMatrix bmat{};
|
||||
bmat.attributes = SparseAttributes_t();
|
||||
bmat.columnCount = static_cast<int>(b.cols());
|
||||
bmat.rowCount = static_cast<int>(b.rows());
|
||||
bmat.columnStride = bmat.rowCount;
|
||||
bmat.data = b_ptr;
|
||||
|
||||
SparseSolve(*m_numericFactorization, bmat, xmat);
|
||||
|
||||
updateInfoStatus(status);
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_ACCELERATESUPPORT_H
|
||||
@@ -0,0 +1,3 @@
|
||||
#ifndef EIGEN_ACCELERATESUPPORT_MODULE_H
|
||||
#error "Please include Eigen/AccelerateSupport instead of including headers inside the src directory directly."
|
||||
#endif
|
||||
3
libs/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h
Normal file
3
libs/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h
Normal file
@@ -0,0 +1,3 @@
|
||||
#ifndef EIGEN_CHOLESKY_MODULE_H
|
||||
#error "Please include Eigen/Cholesky instead of including headers inside the src directory directly."
|
||||
#endif
|
||||
@@ -13,11 +13,13 @@
|
||||
#ifndef EIGEN_LDLT_H
|
||||
#define EIGEN_LDLT_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename _MatrixType, int _UpLo> struct traits<LDLT<_MatrixType, _UpLo> >
|
||||
: traits<_MatrixType>
|
||||
template<typename MatrixType_, int UpLo_> struct traits<LDLT<MatrixType_, UpLo_> >
|
||||
: traits<MatrixType_>
|
||||
{
|
||||
typedef MatrixXpr XprKind;
|
||||
typedef SolverStorage StorageKind;
|
||||
@@ -37,8 +39,8 @@ namespace internal {
|
||||
*
|
||||
* \brief Robust Cholesky decomposition of a matrix with pivoting
|
||||
*
|
||||
* \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
|
||||
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* \tparam MatrixType_ the type of the matrix of which to compute the LDL^T Cholesky decomposition
|
||||
* \tparam UpLo_ the triangular part that will be used for the decomposition: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
|
||||
@@ -56,11 +58,11 @@ namespace internal {
|
||||
*
|
||||
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo> class LDLT
|
||||
: public SolverBase<LDLT<_MatrixType, _UpLo> >
|
||||
template<typename MatrixType_, int UpLo_> class LDLT
|
||||
: public SolverBase<LDLT<MatrixType_, UpLo_> >
|
||||
{
|
||||
public:
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef MatrixType_ MatrixType;
|
||||
typedef SolverBase<LDLT> Base;
|
||||
friend class SolverBase<LDLT>;
|
||||
|
||||
@@ -68,7 +70,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
enum {
|
||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||
UpLo = _UpLo
|
||||
UpLo = UpLo_
|
||||
};
|
||||
typedef Matrix<Scalar, RowsAtCompileTime, 1, 0, MaxRowsAtCompileTime, 1> TmpMatrixType;
|
||||
|
||||
@@ -244,7 +246,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
* This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
|
||||
* \code x = decomposition.adjoint().solve(b) \endcode
|
||||
*/
|
||||
const LDLT& adjoint() const { return *this; };
|
||||
const LDLT& adjoint() const { return *this; }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
|
||||
@@ -270,10 +272,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
|
||||
/** \internal
|
||||
* Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
|
||||
@@ -441,7 +440,7 @@ template<> struct ldlt_inplace<Lower>
|
||||
// Update the terms of L
|
||||
Index rs = size-j-1;
|
||||
w.tail(rs) -= wj * mat.col(j).tail(rs);
|
||||
if(gamma != 0)
|
||||
if(!numext::is_exactly_zero(gamma))
|
||||
mat.col(j).tail(rs) += (sigma*numext::conj(wj)/gamma)*w.tail(rs);
|
||||
}
|
||||
return true;
|
||||
@@ -494,12 +493,10 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
|
||||
|
||||
/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename MatrixType, int UpLo_>
|
||||
template<typename InputType>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
|
||||
LDLT<MatrixType,UpLo_>& LDLT<MatrixType,UpLo_>::compute(const EigenBase<InputType>& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
|
||||
@@ -510,7 +507,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputTyp
|
||||
// TODO move this code to SelfAdjointView
|
||||
for (Index col = 0; col < size; ++col) {
|
||||
RealScalar abs_col_sum;
|
||||
if (_UpLo == Lower)
|
||||
if (UpLo_ == Lower)
|
||||
abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
|
||||
else
|
||||
abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
|
||||
@@ -534,9 +531,9 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputTyp
|
||||
* \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1.
|
||||
* \sa setZero()
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename MatrixType, int UpLo_>
|
||||
template<typename Derived>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)
|
||||
LDLT<MatrixType,UpLo_>& LDLT<MatrixType,UpLo_>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,UpLo_>::RealScalar& sigma)
|
||||
{
|
||||
typedef typename TranspositionType::StorageIndex IndexType;
|
||||
const Index size = w.rows();
|
||||
@@ -562,16 +559,16 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename _MatrixType, int _UpLo>
|
||||
template<typename MatrixType_, int UpLo_>
|
||||
template<typename RhsType, typename DstType>
|
||||
void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
|
||||
void LDLT<MatrixType_,UpLo_>::_solve_impl(const RhsType &rhs, DstType &dst) const
|
||||
{
|
||||
_solve_impl_transposed<true>(rhs, dst);
|
||||
}
|
||||
|
||||
template<typename _MatrixType,int _UpLo>
|
||||
template<typename MatrixType_,int UpLo_>
|
||||
template<bool Conjugate, typename RhsType, typename DstType>
|
||||
void LDLT<_MatrixType,_UpLo>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
|
||||
void LDLT<MatrixType_,UpLo_>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
|
||||
{
|
||||
// dst = P b
|
||||
dst = m_transpositions * rhs;
|
||||
@@ -624,9 +621,9 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl_transposed(const RhsType &rhs, DstType
|
||||
*
|
||||
* \sa LDLT::solve(), MatrixBase::ldlt()
|
||||
*/
|
||||
template<typename MatrixType,int _UpLo>
|
||||
template<typename MatrixType,int UpLo_>
|
||||
template<typename Derived>
|
||||
bool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
|
||||
bool LDLT<MatrixType,UpLo_>::solveInPlace(MatrixBase<Derived> &bAndX) const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
eigen_assert(m_matrix.rows() == bAndX.rows());
|
||||
@@ -639,8 +636,8 @@ bool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
|
||||
/** \returns the matrix represented by the decomposition,
|
||||
* i.e., it returns the product: P^T L D L^* P.
|
||||
* This function is provided for debug purpose. */
|
||||
template<typename MatrixType, int _UpLo>
|
||||
MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
template<typename MatrixType, int UpLo_>
|
||||
MatrixType LDLT<MatrixType,UpLo_>::reconstructedMatrix() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
const Index size = m_matrix.rows();
|
||||
|
||||
@@ -10,12 +10,14 @@
|
||||
#ifndef EIGEN_LLT_H
|
||||
#define EIGEN_LLT_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal{
|
||||
|
||||
template<typename _MatrixType, int _UpLo> struct traits<LLT<_MatrixType, _UpLo> >
|
||||
: traits<_MatrixType>
|
||||
template<typename MatrixType_, int UpLo_> struct traits<LLT<MatrixType_, UpLo_> >
|
||||
: traits<MatrixType_>
|
||||
{
|
||||
typedef MatrixXpr XprKind;
|
||||
typedef SolverStorage StorageKind;
|
||||
@@ -32,8 +34,8 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
*
|
||||
* \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
|
||||
*
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
|
||||
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* \tparam MatrixType_ the type of the matrix of which we are computing the LL^T Cholesky decomposition
|
||||
* \tparam UpLo_ the triangular part that will be used for the decomposition: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
|
||||
@@ -58,16 +60,16 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
*
|
||||
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
|
||||
*
|
||||
* Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
|
||||
* Note that during the decomposition, only the lower (or upper, as defined by UpLo_) triangular part of A is considered.
|
||||
* Therefore, the strict lower part does not have to store correct values.
|
||||
*
|
||||
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo> class LLT
|
||||
: public SolverBase<LLT<_MatrixType, _UpLo> >
|
||||
template<typename MatrixType_, int UpLo_> class LLT
|
||||
: public SolverBase<LLT<MatrixType_, UpLo_> >
|
||||
{
|
||||
public:
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef MatrixType_ MatrixType;
|
||||
typedef SolverBase<LLT> Base;
|
||||
friend class SolverBase<LLT>;
|
||||
|
||||
@@ -79,7 +81,7 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
enum {
|
||||
PacketSize = internal::packet_traits<Scalar>::size,
|
||||
AlignmentMask = int(PacketSize)-1,
|
||||
UpLo = _UpLo
|
||||
UpLo = UpLo_
|
||||
};
|
||||
|
||||
typedef internal::LLT_Traits<MatrixType,UpLo> Traits;
|
||||
@@ -199,7 +201,7 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
* This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
|
||||
* \code x = decomposition.adjoint().solve(b) \endcode
|
||||
*/
|
||||
const LLT& adjoint() const EIGEN_NOEXCEPT { return *this; };
|
||||
const LLT& adjoint() const EIGEN_NOEXCEPT { return *this; }
|
||||
|
||||
inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
|
||||
inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
|
||||
@@ -217,10 +219,7 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
|
||||
/** \internal
|
||||
* Used to compute and store L
|
||||
@@ -243,7 +242,7 @@ static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef typename MatrixType::ColXpr ColXpr;
|
||||
typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
|
||||
typedef internal::remove_all_t<ColXpr> ColXprCleaned;
|
||||
typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
|
||||
typedef Matrix<Scalar,Dynamic,1> TempVectorType;
|
||||
typedef typename TempVectorType::SegmentReturnType TempVecSegment;
|
||||
@@ -298,7 +297,7 @@ static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const
|
||||
if(rs)
|
||||
{
|
||||
temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs);
|
||||
if(gamma != 0)
|
||||
if(!numext::is_exactly_zero(gamma))
|
||||
mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*numext::conj(wj)/gamma)*temp.tail(rs);
|
||||
}
|
||||
}
|
||||
@@ -427,12 +426,10 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
|
||||
* Example: \include TutorialLinAlgComputeTwice.cpp
|
||||
* Output: \verbinclude TutorialLinAlgComputeTwice.out
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename MatrixType, int UpLo_>
|
||||
template<typename InputType>
|
||||
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
|
||||
LLT<MatrixType,UpLo_>& LLT<MatrixType,UpLo_>::compute(const EigenBase<InputType>& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
m_matrix.resize(size, size);
|
||||
@@ -444,7 +441,7 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
|
||||
// TODO move this code to SelfAdjointView
|
||||
for (Index col = 0; col < size; ++col) {
|
||||
RealScalar abs_col_sum;
|
||||
if (_UpLo == Lower)
|
||||
if (UpLo_ == Lower)
|
||||
abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
|
||||
else
|
||||
abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
|
||||
@@ -464,9 +461,9 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
|
||||
* then after it we have LL^* = A + sigma * v v^* where \a v must be a vector
|
||||
* of same dimension.
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo>
|
||||
template<typename MatrixType_, int UpLo_>
|
||||
template<typename VectorType>
|
||||
LLT<_MatrixType,_UpLo> & LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma)
|
||||
LLT<MatrixType_,UpLo_> & LLT<MatrixType_,UpLo_>::rankUpdate(const VectorType& v, const RealScalar& sigma)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType);
|
||||
eigen_assert(v.size()==m_matrix.cols());
|
||||
@@ -480,16 +477,16 @@ LLT<_MatrixType,_UpLo> & LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v,
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename _MatrixType,int _UpLo>
|
||||
template<typename MatrixType_,int UpLo_>
|
||||
template<typename RhsType, typename DstType>
|
||||
void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
|
||||
void LLT<MatrixType_,UpLo_>::_solve_impl(const RhsType &rhs, DstType &dst) const
|
||||
{
|
||||
_solve_impl_transposed<true>(rhs, dst);
|
||||
}
|
||||
|
||||
template<typename _MatrixType,int _UpLo>
|
||||
template<typename MatrixType_,int UpLo_>
|
||||
template<bool Conjugate, typename RhsType, typename DstType>
|
||||
void LLT<_MatrixType,_UpLo>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
|
||||
void LLT<MatrixType_,UpLo_>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
|
||||
{
|
||||
dst = rhs;
|
||||
|
||||
@@ -511,9 +508,9 @@ void LLT<_MatrixType,_UpLo>::_solve_impl_transposed(const RhsType &rhs, DstType
|
||||
*
|
||||
* \sa LLT::solve(), MatrixBase::llt()
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename MatrixType, int UpLo_>
|
||||
template<typename Derived>
|
||||
void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
|
||||
void LLT<MatrixType,UpLo_>::solveInPlace(const MatrixBase<Derived> &bAndX) const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
eigen_assert(m_matrix.rows()==bAndX.rows());
|
||||
@@ -524,8 +521,8 @@ void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
|
||||
/** \returns the matrix represented by the decomposition,
|
||||
* i.e., it returns the product: L L^*.
|
||||
* This function is provided for debug purpose. */
|
||||
template<typename MatrixType, int _UpLo>
|
||||
MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
template<typename MatrixType, int UpLo_>
|
||||
MatrixType LLT<MatrixType,UpLo_>::reconstructedMatrix() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
return matrixL() * matrixL().adjoint().toDenseMatrix();
|
||||
|
||||
@@ -33,64 +33,86 @@
|
||||
#ifndef EIGEN_LLT_LAPACKE_H
|
||||
#define EIGEN_LLT_LAPACKE_H
|
||||
|
||||
namespace Eigen {
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar> struct lapacke_llt;
|
||||
namespace lapacke_helpers {
|
||||
// -------------------------------------------------------------------------------------------------------------------
|
||||
// Dispatch for rank update handling upper and lower parts
|
||||
// -------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \
|
||||
template<> struct lapacke_llt<EIGTYPE> \
|
||||
{ \
|
||||
template<typename MatrixType> \
|
||||
static inline Index potrf(MatrixType& m, char uplo) \
|
||||
{ \
|
||||
lapack_int matrix_order; \
|
||||
lapack_int size, lda, info, StorageOrder; \
|
||||
EIGTYPE* a; \
|
||||
eigen_assert(m.rows()==m.cols()); \
|
||||
/* Set up parameters for ?potrf */ \
|
||||
size = convert_index<lapack_int>(m.rows()); \
|
||||
StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
|
||||
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
|
||||
a = &(m.coeffRef(0,0)); \
|
||||
lda = convert_index<lapack_int>(m.outerStride()); \
|
||||
\
|
||||
info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \
|
||||
info = (info==0) ? -1 : info>0 ? info-1 : size; \
|
||||
return info; \
|
||||
} \
|
||||
}; \
|
||||
template<> struct llt_inplace<EIGTYPE, Lower> \
|
||||
{ \
|
||||
template<typename MatrixType> \
|
||||
static Index blocked(MatrixType& m) \
|
||||
{ \
|
||||
return lapacke_llt<EIGTYPE>::potrf(m, 'L'); \
|
||||
} \
|
||||
template<typename MatrixType, typename VectorType> \
|
||||
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
|
||||
{ return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \
|
||||
}; \
|
||||
template<> struct llt_inplace<EIGTYPE, Upper> \
|
||||
{ \
|
||||
template<typename MatrixType> \
|
||||
static Index blocked(MatrixType& m) \
|
||||
{ \
|
||||
return lapacke_llt<EIGTYPE>::potrf(m, 'U'); \
|
||||
} \
|
||||
template<typename MatrixType, typename VectorType> \
|
||||
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
|
||||
{ \
|
||||
Transpose<MatrixType> matt(mat); \
|
||||
return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate(), sigma); \
|
||||
} \
|
||||
};
|
||||
template<UpLoType Mode>
|
||||
struct rank_update {};
|
||||
|
||||
EIGEN_LAPACKE_LLT(double, double, d)
|
||||
EIGEN_LAPACKE_LLT(float, float, s)
|
||||
EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z)
|
||||
EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c)
|
||||
template<>
|
||||
struct rank_update<Lower> {
|
||||
template<typename MatrixType, typename VectorType>
|
||||
static Index run(MatrixType &mat, const VectorType &vec, const typename MatrixType::RealScalar &sigma) {
|
||||
return Eigen::internal::llt_rank_update_lower(mat, vec, sigma);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct rank_update<Upper> {
|
||||
template<typename MatrixType, typename VectorType>
|
||||
static Index run(MatrixType &mat, const VectorType &vec, const typename MatrixType::RealScalar &sigma) {
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return Eigen::internal::llt_rank_update_lower(matt, vec.conjugate(), sigma);
|
||||
}
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------------------------------------------------------
|
||||
// Generic lapacke llt implementation that hands of to the dispatches
|
||||
// -------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
template<typename Scalar, UpLoType Mode>
|
||||
struct lapacke_llt {
|
||||
template<typename MatrixType>
|
||||
static Index blocked(MatrixType& m)
|
||||
{
|
||||
eigen_assert(m.rows() == m.cols());
|
||||
if(m.rows() == 0) {
|
||||
return -1;
|
||||
}
|
||||
/* Set up parameters for ?potrf */
|
||||
lapack_int size = to_lapack(m.rows());
|
||||
lapack_int matrix_order = lapack_storage_of(m);
|
||||
Scalar* a = &(m.coeffRef(0,0));
|
||||
lapack_int lda = to_lapack(m.outerStride());
|
||||
|
||||
lapack_int info = potrf(matrix_order, translate_mode<Mode>, size, to_lapack(a), lda );
|
||||
info = (info==0) ? -1 : info>0 ? info-1 : size;
|
||||
return info;
|
||||
}
|
||||
|
||||
template<typename MatrixType, typename VectorType>
|
||||
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma)
|
||||
{
|
||||
return rank_update<Mode>::run(mat, vec, sigma);
|
||||
}
|
||||
};
|
||||
}
|
||||
// end namespace lapacke_helpers
|
||||
|
||||
/*
|
||||
* Here, we just put the generic implementation from lapacke_llt into a full specialization of the llt_inplace
|
||||
* type. By being a full specialization, the versions defined here thus get precedence over the generic implementation
|
||||
* in LLT.h for double, float and complex double, complex float types.
|
||||
*/
|
||||
|
||||
#define EIGEN_LAPACKE_LLT(EIGTYPE) \
|
||||
template<> struct llt_inplace<EIGTYPE, Lower> : public lapacke_helpers::lapacke_llt<EIGTYPE, Lower> {}; \
|
||||
template<> struct llt_inplace<EIGTYPE, Upper> : public lapacke_helpers::lapacke_llt<EIGTYPE, Upper> {};
|
||||
|
||||
EIGEN_LAPACKE_LLT(double)
|
||||
EIGEN_LAPACKE_LLT(float)
|
||||
EIGEN_LAPACKE_LLT(std::complex<double>)
|
||||
EIGEN_LAPACKE_LLT(std::complex<float>)
|
||||
|
||||
#undef EIGEN_LAPACKE_LLT
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CHOLMODSUPPORT_H
|
||||
#define EIGEN_CHOLMODSUPPORT_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -54,8 +56,8 @@ template<> struct cholmod_configure_matrix<std::complex<double> > {
|
||||
/** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object.
|
||||
* Note that the data are shared.
|
||||
*/
|
||||
template<typename _Scalar, int _Options, typename _StorageIndex>
|
||||
cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> > mat)
|
||||
template<typename Scalar_, int Options_, typename StorageIndex_>
|
||||
cholmod_sparse viewAsCholmod(Ref<SparseMatrix<Scalar_,Options_,StorageIndex_> > mat)
|
||||
{
|
||||
cholmod_sparse res;
|
||||
res.nzmax = mat.nonZeros();
|
||||
@@ -80,11 +82,11 @@ cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> >
|
||||
res.dtype = 0;
|
||||
res.stype = -1;
|
||||
|
||||
if (internal::is_same<_StorageIndex,int>::value)
|
||||
if (internal::is_same<StorageIndex_,int>::value)
|
||||
{
|
||||
res.itype = CHOLMOD_INT;
|
||||
}
|
||||
else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value)
|
||||
else if (internal::is_same<StorageIndex_,SuiteSparse_long>::value)
|
||||
{
|
||||
res.itype = CHOLMOD_LONG;
|
||||
}
|
||||
@@ -94,39 +96,39 @@ cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> >
|
||||
}
|
||||
|
||||
// setup res.xtype
|
||||
internal::cholmod_configure_matrix<_Scalar>::run(res);
|
||||
internal::cholmod_configure_matrix<Scalar_>::run(res);
|
||||
|
||||
res.stype = 0;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Options, typename _Index>
|
||||
const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat)
|
||||
template<typename Scalar_, int Options_, typename Index_>
|
||||
const cholmod_sparse viewAsCholmod(const SparseMatrix<Scalar_,Options_,Index_>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<Scalar_,Options_,Index_> >(mat.const_cast_derived()));
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Options, typename _Index>
|
||||
const cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat)
|
||||
template<typename Scalar_, int Options_, typename Index_>
|
||||
const cholmod_sparse viewAsCholmod(const SparseVector<Scalar_,Options_,Index_>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<Scalar_,Options_,Index_> >(mat.const_cast_derived()));
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix.
|
||||
* The data are not copied but shared. */
|
||||
template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>
|
||||
cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
|
||||
template<typename Scalar_, int Options_, typename Index_, unsigned int UpLo>
|
||||
cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<Scalar_,Options_,Index_>, UpLo>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.matrix().const_cast_derived()));
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<Scalar_,Options_,Index_> >(mat.matrix().const_cast_derived()));
|
||||
|
||||
if(UpLo==Upper) res.stype = 1;
|
||||
if(UpLo==Lower) res.stype = -1;
|
||||
// swap stype for rowmajor matrices (only works for real matrices)
|
||||
EIGEN_STATIC_ASSERT((_Options & RowMajorBit) == 0 || NumTraits<_Scalar>::IsComplex == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
|
||||
if(_Options & RowMajorBit) res.stype *=-1;
|
||||
EIGEN_STATIC_ASSERT((Options_ & RowMajorBit) == 0 || NumTraits<Scalar_>::IsComplex == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
|
||||
if(Options_ & RowMajorBit) res.stype *=-1;
|
||||
|
||||
return res;
|
||||
}
|
||||
@@ -155,9 +157,9 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)
|
||||
/** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix.
|
||||
* The data are not copied but shared. */
|
||||
template<typename Scalar, int Flags, typename StorageIndex>
|
||||
MappedSparseMatrix<Scalar,Flags,StorageIndex> viewAsEigen(cholmod_sparse& cm)
|
||||
Map<SparseMatrix<Scalar,Flags,StorageIndex> > viewAsEigen(cholmod_sparse& cm)
|
||||
{
|
||||
return MappedSparseMatrix<Scalar,Flags,StorageIndex>
|
||||
return Map<SparseMatrix<Scalar,Flags,StorageIndex> >
|
||||
(cm.nrow, cm.ncol, static_cast<StorageIndex*>(cm.p)[cm.ncol],
|
||||
static_cast<StorageIndex*>(cm.p), static_cast<StorageIndex*>(cm.i),static_cast<Scalar*>(cm.x) );
|
||||
}
|
||||
@@ -167,11 +169,11 @@ namespace internal {
|
||||
// template specializations for int and long that call the correct cholmod method
|
||||
|
||||
#define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \
|
||||
template<typename _StorageIndex> inline ret cm_ ## name (cholmod_common &Common) { return cholmod_ ## name (&Common); } \
|
||||
template<typename StorageIndex_> inline ret cm_ ## name (cholmod_common &Common) { return cholmod_ ## name (&Common); } \
|
||||
template<> inline ret cm_ ## name<SuiteSparse_long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); }
|
||||
|
||||
#define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \
|
||||
template<typename _StorageIndex> inline ret cm_ ## name (t1& a1, cholmod_common &Common) { return cholmod_ ## name (&a1, &Common); } \
|
||||
template<typename StorageIndex_> inline ret cm_ ## name (t1& a1, cholmod_common &Common) { return cholmod_ ## name (&a1, &Common); } \
|
||||
template<> inline ret cm_ ## name<SuiteSparse_long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); }
|
||||
|
||||
EIGEN_CHOLMOD_SPECIALIZE0(int, start)
|
||||
@@ -183,14 +185,14 @@ EIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A)
|
||||
|
||||
EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A)
|
||||
|
||||
template<typename _StorageIndex> inline cholmod_dense* cm_solve (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_solve (sys, &L, &B, &Common); }
|
||||
template<typename StorageIndex_> inline cholmod_dense* cm_solve (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_solve (sys, &L, &B, &Common); }
|
||||
template<> inline cholmod_dense* cm_solve<SuiteSparse_long> (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_l_solve (sys, &L, &B, &Common); }
|
||||
|
||||
template<typename _StorageIndex> inline cholmod_sparse* cm_spsolve (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve (sys, &L, &B, &Common); }
|
||||
template<typename StorageIndex_> inline cholmod_sparse* cm_spsolve (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve (sys, &L, &B, &Common); }
|
||||
template<> inline cholmod_sparse* cm_spsolve<SuiteSparse_long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); }
|
||||
|
||||
template<typename _StorageIndex>
|
||||
inline int cm_factorize_p (cholmod_sparse* A, double beta[2], _StorageIndex* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p (A, beta, fset, fsize, L, &Common); }
|
||||
template<typename StorageIndex_>
|
||||
inline int cm_factorize_p (cholmod_sparse* A, double beta[2], StorageIndex_* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p (A, beta, fset, fsize, L, &Common); }
|
||||
template<>
|
||||
inline int cm_factorize_p<SuiteSparse_long> (cholmod_sparse* A, double beta[2], SuiteSparse_long* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); }
|
||||
|
||||
@@ -210,7 +212,7 @@ enum CholmodMode {
|
||||
* \brief The base class for the direct Cholesky factorization of Cholmod
|
||||
* \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo, typename Derived>
|
||||
template<typename MatrixType_, int UpLo_, typename Derived>
|
||||
class CholmodBase : public SparseSolverBase<Derived>
|
||||
{
|
||||
protected:
|
||||
@@ -218,8 +220,8 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
using Base::derived;
|
||||
using Base::m_isInitialized;
|
||||
public:
|
||||
typedef _MatrixType MatrixType;
|
||||
enum { UpLo = _UpLo };
|
||||
typedef MatrixType_ MatrixType;
|
||||
enum { UpLo = UpLo_ };
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef MatrixType CholMatrixType;
|
||||
@@ -436,7 +438,7 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
if (m_cholmodFactor->is_ll)
|
||||
logDet *= 2.0;
|
||||
return logDet;
|
||||
};
|
||||
}
|
||||
|
||||
template<typename Stream>
|
||||
void dumpMemory(Stream& /*s*/)
|
||||
@@ -461,8 +463,8 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
* The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
* \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
|
||||
* or Upper. Default is Lower.
|
||||
*
|
||||
* \implsparsesolverconcept
|
||||
@@ -473,15 +475,15 @@ class CholmodBase : public SparseSolverBase<Derived>
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> >
|
||||
template<typename MatrixType_, int UpLo_ = Lower>
|
||||
class CholmodSimplicialLLT : public CholmodBase<MatrixType_, UpLo_, CholmodSimplicialLLT<MatrixType_, UpLo_> >
|
||||
{
|
||||
typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT> Base;
|
||||
typedef CholmodBase<MatrixType_, UpLo_, CholmodSimplicialLLT> Base;
|
||||
using Base::m_cholmod;
|
||||
|
||||
public:
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef MatrixType_ MatrixType;
|
||||
|
||||
CholmodSimplicialLLT() : Base() { init(); }
|
||||
|
||||
@@ -512,8 +514,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
|
||||
* The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
* \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
|
||||
* or Upper. Default is Lower.
|
||||
*
|
||||
* \implsparsesolverconcept
|
||||
@@ -524,15 +526,15 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> >
|
||||
template<typename MatrixType_, int UpLo_ = Lower>
|
||||
class CholmodSimplicialLDLT : public CholmodBase<MatrixType_, UpLo_, CholmodSimplicialLDLT<MatrixType_, UpLo_> >
|
||||
{
|
||||
typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT> Base;
|
||||
typedef CholmodBase<MatrixType_, UpLo_, CholmodSimplicialLDLT> Base;
|
||||
using Base::m_cholmod;
|
||||
|
||||
public:
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef MatrixType_ MatrixType;
|
||||
|
||||
CholmodSimplicialLDLT() : Base() { init(); }
|
||||
|
||||
@@ -561,8 +563,8 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
|
||||
* The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
* \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
|
||||
* or Upper. Default is Lower.
|
||||
*
|
||||
* \implsparsesolverconcept
|
||||
@@ -573,15 +575,15 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> >
|
||||
template<typename MatrixType_, int UpLo_ = Lower>
|
||||
class CholmodSupernodalLLT : public CholmodBase<MatrixType_, UpLo_, CholmodSupernodalLLT<MatrixType_, UpLo_> >
|
||||
{
|
||||
typedef CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT> Base;
|
||||
typedef CholmodBase<MatrixType_, UpLo_, CholmodSupernodalLLT> Base;
|
||||
using Base::m_cholmod;
|
||||
|
||||
public:
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef MatrixType_ MatrixType;
|
||||
|
||||
CholmodSupernodalLLT() : Base() { init(); }
|
||||
|
||||
@@ -612,8 +614,8 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
|
||||
* On the other hand, it does not provide access to the result of the factorization.
|
||||
* The default is to let Cholmod automatically choose between a simplicial and supernodal factorization.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
* \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
|
||||
* or Upper. Default is Lower.
|
||||
*
|
||||
* \implsparsesolverconcept
|
||||
@@ -624,15 +626,15 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> >
|
||||
template<typename MatrixType_, int UpLo_ = Lower>
|
||||
class CholmodDecomposition : public CholmodBase<MatrixType_, UpLo_, CholmodDecomposition<MatrixType_, UpLo_> >
|
||||
{
|
||||
typedef CholmodBase<_MatrixType, _UpLo, CholmodDecomposition> Base;
|
||||
typedef CholmodBase<MatrixType_, UpLo_, CholmodDecomposition> Base;
|
||||
using Base::m_cholmod;
|
||||
|
||||
public:
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef MatrixType_ MatrixType;
|
||||
|
||||
CholmodDecomposition() : Base() { init(); }
|
||||
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H
|
||||
#error "Please include Eigen/CholmodSupport instead of including headers inside the src directory directly."
|
||||
#endif
|
||||
@@ -10,69 +10,18 @@
|
||||
#ifndef EIGEN_ARITHMETIC_SEQUENCE_H
|
||||
#define EIGEN_ARITHMETIC_SEQUENCE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#if (!EIGEN_HAS_CXX11) || !((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48)
|
||||
template<typename T> struct aseq_negate {};
|
||||
|
||||
template<> struct aseq_negate<Index> {
|
||||
typedef Index type;
|
||||
};
|
||||
|
||||
template<int N> struct aseq_negate<FixedInt<N> > {
|
||||
typedef FixedInt<-N> type;
|
||||
};
|
||||
|
||||
// Compilation error in the following case:
|
||||
template<> struct aseq_negate<FixedInt<DynamicIndex> > {};
|
||||
|
||||
template<typename FirstType,typename SizeType,typename IncrType,
|
||||
bool FirstIsSymbolic=symbolic::is_symbolic<FirstType>::value,
|
||||
bool SizeIsSymbolic =symbolic::is_symbolic<SizeType>::value>
|
||||
struct aseq_reverse_first_type {
|
||||
typedef Index type;
|
||||
};
|
||||
|
||||
template<typename FirstType,typename SizeType,typename IncrType>
|
||||
struct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,true> {
|
||||
typedef symbolic::AddExpr<FirstType,
|
||||
symbolic::ProductExpr<symbolic::AddExpr<SizeType,symbolic::ValueExpr<FixedInt<-1> > >,
|
||||
symbolic::ValueExpr<IncrType> >
|
||||
> type;
|
||||
};
|
||||
|
||||
template<typename SizeType,typename IncrType,typename EnableIf = void>
|
||||
struct aseq_reverse_first_type_aux {
|
||||
typedef Index type;
|
||||
};
|
||||
|
||||
template<typename SizeType,typename IncrType>
|
||||
struct aseq_reverse_first_type_aux<SizeType,IncrType,typename internal::enable_if<bool((SizeType::value+IncrType::value)|0x1)>::type> {
|
||||
typedef FixedInt<(SizeType::value-1)*IncrType::value> type;
|
||||
};
|
||||
|
||||
template<typename FirstType,typename SizeType,typename IncrType>
|
||||
struct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,false> {
|
||||
typedef typename aseq_reverse_first_type_aux<SizeType,IncrType>::type Aux;
|
||||
typedef symbolic::AddExpr<FirstType,symbolic::ValueExpr<Aux> > type;
|
||||
};
|
||||
|
||||
template<typename FirstType,typename SizeType,typename IncrType>
|
||||
struct aseq_reverse_first_type<FirstType,SizeType,IncrType,false,true> {
|
||||
typedef symbolic::AddExpr<symbolic::ProductExpr<symbolic::AddExpr<SizeType,symbolic::ValueExpr<FixedInt<-1> > >,
|
||||
symbolic::ValueExpr<IncrType> >,
|
||||
symbolic::ValueExpr<> > type;
|
||||
};
|
||||
#endif
|
||||
|
||||
// Helper to cleanup the type of the increment:
|
||||
template<typename T> struct cleanup_seq_incr {
|
||||
typedef typename cleanup_index_type<T,DynamicIndex>::type type;
|
||||
};
|
||||
|
||||
}
|
||||
} // namespace internal
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
// seq(first,last,incr) and seqN(first,size,incr)
|
||||
@@ -137,21 +86,9 @@ protected:
|
||||
IncrType m_incr;
|
||||
|
||||
public:
|
||||
|
||||
#if EIGEN_HAS_CXX11 && ((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48)
|
||||
auto reverse() const -> decltype(Eigen::seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr)) {
|
||||
return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr);
|
||||
}
|
||||
#else
|
||||
protected:
|
||||
typedef typename internal::aseq_negate<IncrType>::type ReverseIncrType;
|
||||
typedef typename internal::aseq_reverse_first_type<FirstType,SizeType,IncrType>::type ReverseFirstType;
|
||||
public:
|
||||
ArithmeticSequence<ReverseFirstType,SizeType,ReverseIncrType>
|
||||
reverse() const {
|
||||
return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
/** \returns an ArithmeticSequence starting at \a first, of length \a size, and increment \a incr
|
||||
@@ -200,7 +137,6 @@ auto seq(FirstType f, LastType l);
|
||||
|
||||
#else // EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
template<typename FirstType,typename LastType>
|
||||
auto seq(FirstType f, LastType l) -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f),
|
||||
( typename internal::cleanup_index_type<LastType>::type(l)
|
||||
@@ -226,101 +162,11 @@ auto seq(FirstType f, LastType l, IncrType incr)
|
||||
CleanedIncrType(incr));
|
||||
}
|
||||
|
||||
#else // EIGEN_HAS_CXX11
|
||||
|
||||
template<typename FirstType,typename LastType>
|
||||
typename internal::enable_if<!(symbolic::is_symbolic<FirstType>::value || symbolic::is_symbolic<LastType>::value),
|
||||
ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index> >::type
|
||||
seq(FirstType f, LastType l)
|
||||
{
|
||||
return seqN(typename internal::cleanup_index_type<FirstType>::type(f),
|
||||
Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>())));
|
||||
}
|
||||
|
||||
template<typename FirstTypeDerived,typename LastType>
|
||||
typename internal::enable_if<!symbolic::is_symbolic<LastType>::value,
|
||||
ArithmeticSequence<FirstTypeDerived, symbolic::AddExpr<symbolic::AddExpr<symbolic::NegateExpr<FirstTypeDerived>,symbolic::ValueExpr<> >,
|
||||
symbolic::ValueExpr<internal::FixedInt<1> > > > >::type
|
||||
seq(const symbolic::BaseExpr<FirstTypeDerived> &f, LastType l)
|
||||
{
|
||||
return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+fix<1>()));
|
||||
}
|
||||
|
||||
template<typename FirstType,typename LastTypeDerived>
|
||||
typename internal::enable_if<!symbolic::is_symbolic<FirstType>::value,
|
||||
ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,
|
||||
symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,symbolic::ValueExpr<> >,
|
||||
symbolic::ValueExpr<internal::FixedInt<1> > > > >::type
|
||||
seq(FirstType f, const symbolic::BaseExpr<LastTypeDerived> &l)
|
||||
{
|
||||
return seqN(typename internal::cleanup_index_type<FirstType>::type(f),(l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()));
|
||||
}
|
||||
|
||||
template<typename FirstTypeDerived,typename LastTypeDerived>
|
||||
ArithmeticSequence<FirstTypeDerived,
|
||||
symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,symbolic::NegateExpr<FirstTypeDerived> >,symbolic::ValueExpr<internal::FixedInt<1> > > >
|
||||
seq(const symbolic::BaseExpr<FirstTypeDerived> &f, const symbolic::BaseExpr<LastTypeDerived> &l)
|
||||
{
|
||||
return seqN(f.derived(),(l.derived()-f.derived()+fix<1>()));
|
||||
}
|
||||
|
||||
|
||||
template<typename FirstType,typename LastType, typename IncrType>
|
||||
typename internal::enable_if<!(symbolic::is_symbolic<FirstType>::value || symbolic::is_symbolic<LastType>::value),
|
||||
ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index,typename internal::cleanup_seq_incr<IncrType>::type> >::type
|
||||
seq(FirstType f, LastType l, IncrType incr)
|
||||
{
|
||||
typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
|
||||
return seqN(typename internal::cleanup_index_type<FirstType>::type(f),
|
||||
Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr)), incr);
|
||||
}
|
||||
|
||||
template<typename FirstTypeDerived,typename LastType, typename IncrType>
|
||||
typename internal::enable_if<!symbolic::is_symbolic<LastType>::value,
|
||||
ArithmeticSequence<FirstTypeDerived,
|
||||
symbolic::QuotientExpr<symbolic::AddExpr<symbolic::AddExpr<symbolic::NegateExpr<FirstTypeDerived>,
|
||||
symbolic::ValueExpr<> >,
|
||||
symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
|
||||
symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
|
||||
typename internal::cleanup_seq_incr<IncrType>::type> >::type
|
||||
seq(const symbolic::BaseExpr<FirstTypeDerived> &f, LastType l, IncrType incr)
|
||||
{
|
||||
typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
|
||||
return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr);
|
||||
}
|
||||
|
||||
template<typename FirstType,typename LastTypeDerived, typename IncrType>
|
||||
typename internal::enable_if<!symbolic::is_symbolic<FirstType>::value,
|
||||
ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,
|
||||
symbolic::QuotientExpr<symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,symbolic::ValueExpr<> >,
|
||||
symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
|
||||
symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
|
||||
typename internal::cleanup_seq_incr<IncrType>::type> >::type
|
||||
seq(FirstType f, const symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr)
|
||||
{
|
||||
typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
|
||||
return seqN(typename internal::cleanup_index_type<FirstType>::type(f),
|
||||
(l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr), incr);
|
||||
}
|
||||
|
||||
template<typename FirstTypeDerived,typename LastTypeDerived, typename IncrType>
|
||||
ArithmeticSequence<FirstTypeDerived,
|
||||
symbolic::QuotientExpr<symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,
|
||||
symbolic::NegateExpr<FirstTypeDerived> >,
|
||||
symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
|
||||
symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
|
||||
typename internal::cleanup_seq_incr<IncrType>::type>
|
||||
seq(const symbolic::BaseExpr<FirstTypeDerived> &f, const symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr)
|
||||
{
|
||||
typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
|
||||
return seqN(f.derived(),(l.derived()-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr);
|
||||
}
|
||||
#endif // EIGEN_HAS_CXX11
|
||||
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
namespace placeholders {
|
||||
|
||||
#if EIGEN_HAS_CXX11 || defined(EIGEN_PARSED_BY_DOXYGEN)
|
||||
/** \cpp11
|
||||
* \returns a symbolic ArithmeticSequence representing the last \a size elements with increment \a incr.
|
||||
*
|
||||
@@ -329,9 +175,9 @@ seq(const symbolic::BaseExpr<FirstTypeDerived> &f, const symbolic::BaseExpr<Last
|
||||
* \sa lastN(SizeType), seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */
|
||||
template<typename SizeType,typename IncrType>
|
||||
auto lastN(SizeType size, IncrType incr)
|
||||
-> decltype(seqN(Eigen::last-(size-fix<1>())*incr, size, incr))
|
||||
-> decltype(seqN(Eigen::placeholders::last-(size-fix<1>())*incr, size, incr))
|
||||
{
|
||||
return seqN(Eigen::last-(size-fix<1>())*incr, size, incr);
|
||||
return seqN(Eigen::placeholders::last-(size-fix<1>())*incr, size, incr);
|
||||
}
|
||||
|
||||
/** \cpp11
|
||||
@@ -342,18 +188,19 @@ auto lastN(SizeType size, IncrType incr)
|
||||
* \sa lastN(SizeType,IncrType, seqN(FirstType,SizeType), seq(FirstType,LastType) */
|
||||
template<typename SizeType>
|
||||
auto lastN(SizeType size)
|
||||
-> decltype(seqN(Eigen::last+fix<1>()-size, size))
|
||||
-> decltype(seqN(Eigen::placeholders::last+fix<1>()-size, size))
|
||||
{
|
||||
return seqN(Eigen::last+fix<1>()-size, size);
|
||||
return seqN(Eigen::placeholders::last+fix<1>()-size, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace placeholders
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Convert a symbolic span into a usable one (i.e., remove last/end "keywords")
|
||||
template<typename T>
|
||||
struct make_size_type {
|
||||
typedef typename internal::conditional<symbolic::is_symbolic<T>::value, Index, T>::type type;
|
||||
typedef std::conditional_t<symbolic::is_symbolic<T>::value, Index, T> type;
|
||||
};
|
||||
|
||||
template<typename FirstType,typename SizeType,typename IncrType,int XprSize>
|
||||
@@ -387,25 +234,23 @@ struct get_compile_time_incr<ArithmeticSequence<FirstType,SizeType,IncrType> > {
|
||||
* \code using namespace Eigen::indexing; \endcode
|
||||
* is equivalent to:
|
||||
* \code
|
||||
using Eigen::all;
|
||||
using Eigen::fix;
|
||||
using Eigen::seq;
|
||||
using Eigen::seqN;
|
||||
using Eigen::lastN; // c++11 only
|
||||
using Eigen::last;
|
||||
using Eigen::lastp1;
|
||||
using Eigen::fix;
|
||||
using Eigen::placeholders::all;
|
||||
using Eigen::placeholders::last;
|
||||
using Eigen::placeholders::lastN; // c++11 only
|
||||
using Eigen::placeholders::lastp1;
|
||||
\endcode
|
||||
*/
|
||||
namespace indexing {
|
||||
using Eigen::all;
|
||||
using Eigen::fix;
|
||||
using Eigen::seq;
|
||||
using Eigen::seqN;
|
||||
#if EIGEN_HAS_CXX11
|
||||
using Eigen::lastN;
|
||||
#endif
|
||||
using Eigen::last;
|
||||
using Eigen::lastp1;
|
||||
using Eigen::fix;
|
||||
using Eigen::placeholders::all;
|
||||
using Eigen::placeholders::last;
|
||||
using Eigen::placeholders::lastN;
|
||||
using Eigen::placeholders::lastp1;
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -10,14 +10,16 @@
|
||||
#ifndef EIGEN_ARRAY_H
|
||||
#define EIGEN_ARRAY_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
|
||||
struct traits<Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> > : traits<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
|
||||
{
|
||||
typedef ArrayXpr XprKind;
|
||||
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
|
||||
typedef ArrayBase<Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> > XprBase;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -41,16 +43,16 @@ struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : tra
|
||||
*
|
||||
* \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
|
||||
class Array
|
||||
: public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public PlainObjectBase<Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef PlainObjectBase<Array> Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Array)
|
||||
|
||||
enum { Options = _Options };
|
||||
enum { Options = Options_ };
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
|
||||
protected:
|
||||
@@ -131,7 +133,6 @@ class Array
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array() : Base()
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
|
||||
@@ -142,17 +143,14 @@ class Array
|
||||
Array(internal::constructor_without_unaligned_array_assert)
|
||||
: Base(internal::constructor_without_unaligned_array_assert())
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
#endif
|
||||
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
|
||||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
@@ -160,9 +158,7 @@ class Array
|
||||
Base::operator=(std::move(other));
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
/** \copydoc PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)
|
||||
*
|
||||
* Example: \include Array_variadic_ctor_cxx11.cpp
|
||||
@@ -197,16 +193,15 @@ class Array
|
||||
*
|
||||
* \sa Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const std::initializer_list<std::initializer_list<Scalar>>& list) : Base(list) {}
|
||||
#endif // end EIGEN_HAS_CXX11
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(
|
||||
const std::initializer_list<std::initializer_list<Scalar>>& list)
|
||||
: Base(list) {}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit Array(const T& x)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init1<T>(x);
|
||||
}
|
||||
|
||||
@@ -214,7 +209,6 @@ class Array
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
this->template _init2<T0,T1>(val0, val1);
|
||||
}
|
||||
|
||||
@@ -249,7 +243,6 @@ class Array
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3)
|
||||
m_storage.data()[0] = val0;
|
||||
m_storage.data()[1] = val1;
|
||||
@@ -261,7 +254,6 @@ class Array
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4)
|
||||
m_storage.data()[0] = val0;
|
||||
m_storage.data()[1] = val1;
|
||||
@@ -283,8 +275,8 @@ class Array
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
|
||||
typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
|
||||
PrivateType>::type = PrivateType())
|
||||
std::enable_if_t<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
|
||||
PrivateType> = PrivateType())
|
||||
: Base(other.derived())
|
||||
{ }
|
||||
|
||||
@@ -359,8 +351,6 @@ EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
|
||||
#undef EIGEN_MAKE_ARRAY_TYPEDEFS
|
||||
#undef EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
|
||||
#define EIGEN_MAKE_ARRAY_TYPEDEFS(Size, SizeSuffix) \
|
||||
/** \ingroup arraytypedefs */ \
|
||||
/** \brief \cpp11 */ \
|
||||
@@ -392,8 +382,6 @@ EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(4)
|
||||
#undef EIGEN_MAKE_ARRAY_TYPEDEFS
|
||||
#undef EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS
|
||||
|
||||
#endif // EIGEN_HAS_CXX11
|
||||
|
||||
#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
|
||||
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
|
||||
using Eigen::Vector##SizeSuffix##TypeSuffix; \
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_ARRAYBASE_H
|
||||
#define EIGEN_ARRAYBASE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename ExpressionType> class MatrixWrapper;
|
||||
@@ -21,7 +23,7 @@ template<typename ExpressionType> class MatrixWrapper;
|
||||
*
|
||||
* An array is similar to a dense vector or matrix. While matrices are mathematical
|
||||
* objects with well defined linear algebra operators, an array is just a collection
|
||||
* of scalar values arranged in a one or two dimensionnal fashion. As the main consequence,
|
||||
* of scalar values arranged in a one or two dimensional fashion. As the main consequence,
|
||||
* all operations applied to an array are performed coefficient wise. Furthermore,
|
||||
* arrays support scalar math functions of the c++ standard library (e.g., std::sin(x)), and convenient
|
||||
* constructors allowing to easily write generic code working for both scalar values
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_ARRAYWRAPPER_H
|
||||
#define EIGEN_ARRAYWRAPPER_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class ArrayWrapper
|
||||
@@ -26,12 +28,12 @@ namespace Eigen {
|
||||
namespace internal {
|
||||
template<typename ExpressionType>
|
||||
struct traits<ArrayWrapper<ExpressionType> >
|
||||
: public traits<typename remove_all<typename ExpressionType::Nested>::type >
|
||||
: public traits<remove_all_t<typename ExpressionType::Nested> >
|
||||
{
|
||||
typedef ArrayXpr XprKind;
|
||||
// Let's remove NestByRefBit
|
||||
enum {
|
||||
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
|
||||
Flags0 = traits<remove_all_t<typename ExpressionType::Nested> >::Flags,
|
||||
LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
|
||||
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
|
||||
};
|
||||
@@ -45,13 +47,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
typedef ArrayBase<ArrayWrapper> Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
|
||||
typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
|
||||
typedef internal::remove_all_t<ExpressionType> NestedExpression;
|
||||
|
||||
typedef typename internal::conditional<
|
||||
typedef std::conditional_t<
|
||||
internal::is_lvalue<ExpressionType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
> ScalarWithConstIfNotLvalue;
|
||||
|
||||
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
|
||||
|
||||
@@ -91,7 +93,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
inline void evalTo(Dest& dst) const { dst = m_expression; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<NestedExpressionType>::type&
|
||||
const internal::remove_all_t<NestedExpressionType>&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_expression;
|
||||
@@ -124,12 +126,12 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
namespace internal {
|
||||
template<typename ExpressionType>
|
||||
struct traits<MatrixWrapper<ExpressionType> >
|
||||
: public traits<typename remove_all<typename ExpressionType::Nested>::type >
|
||||
: public traits<remove_all_t<typename ExpressionType::Nested> >
|
||||
{
|
||||
typedef MatrixXpr XprKind;
|
||||
// Let's remove NestByRefBit
|
||||
enum {
|
||||
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
|
||||
Flags0 = traits<remove_all_t<typename ExpressionType::Nested> >::Flags,
|
||||
LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
|
||||
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
|
||||
};
|
||||
@@ -143,13 +145,13 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
typedef MatrixBase<MatrixWrapper<ExpressionType> > Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
|
||||
typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
|
||||
typedef internal::remove_all_t<ExpressionType> NestedExpression;
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<ExpressionType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
typedef std::conditional_t<
|
||||
internal::is_lvalue<ExpressionType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
> ScalarWithConstIfNotLvalue;
|
||||
|
||||
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
|
||||
|
||||
@@ -185,7 +187,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<NestedExpressionType>::type&
|
||||
const internal::remove_all_t<NestedExpressionType>&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_expression;
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
#ifndef EIGEN_ASSIGN_H
|
||||
#define EIGEN_ASSIGN_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Derived>
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
#ifndef EIGEN_ASSIGN_EVALUATOR_H
|
||||
#define EIGEN_ASSIGN_EVALUATOR_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// This implementation is based on Assign.h
|
||||
@@ -40,7 +42,7 @@ public:
|
||||
DstAlignment = DstEvaluator::Alignment,
|
||||
SrcAlignment = SrcEvaluator::Alignment,
|
||||
DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
|
||||
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
|
||||
JointAlignment = plain_enum_min(DstAlignment, SrcAlignment)
|
||||
};
|
||||
|
||||
private:
|
||||
@@ -51,8 +53,8 @@ private:
|
||||
InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
|
||||
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
|
||||
: int(Dst::MaxRowsAtCompileTime),
|
||||
RestrictedInnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(InnerSize,MaxPacketSize),
|
||||
RestrictedLinearSize = EIGEN_SIZE_MIN_PREFER_FIXED(Dst::SizeAtCompileTime,MaxPacketSize),
|
||||
RestrictedInnerSize = min_size_prefer_fixed(InnerSize, MaxPacketSize),
|
||||
RestrictedLinearSize = min_size_prefer_fixed(Dst::SizeAtCompileTime, MaxPacketSize),
|
||||
OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
|
||||
MaxSizeAtCompileTime = Dst::SizeAtCompileTime
|
||||
};
|
||||
@@ -111,7 +113,7 @@ public:
|
||||
|| int(Traversal) == SliceVectorizedTraversal
|
||||
};
|
||||
|
||||
typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
|
||||
typedef std::conditional_t<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType> PacketType;
|
||||
|
||||
private:
|
||||
enum {
|
||||
@@ -216,7 +218,7 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) { }
|
||||
};
|
||||
|
||||
template<typename Kernel, int Index_, int Stop>
|
||||
@@ -285,7 +287,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) { }
|
||||
};
|
||||
|
||||
template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
|
||||
@@ -325,10 +327,9 @@ struct dense_assignment_loop;
|
||||
template<typename Kernel, int Unrolling>
|
||||
struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& /*kernel*/)
|
||||
EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE EIGEN_CONSTEXPR run(Kernel& /*kernel*/)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,
|
||||
EIGEN_STATIC_ASSERT(int(Kernel::DstEvaluatorType::XprType::SizeAtCompileTime) == 0,
|
||||
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
|
||||
}
|
||||
};
|
||||
@@ -386,7 +387,7 @@ struct unaligned_dense_assignment_loop
|
||||
{
|
||||
// if IsAligned = true, then do nothing
|
||||
template <typename Kernel>
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&, Index, Index) {}
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -402,7 +403,7 @@ struct unaligned_dense_assignment_loop<false>
|
||||
Index end)
|
||||
#else
|
||||
template <typename Kernel>
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel,
|
||||
Index start,
|
||||
Index end)
|
||||
#endif
|
||||
@@ -415,7 +416,7 @@ struct unaligned_dense_assignment_loop<false>
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
|
||||
{
|
||||
const Index size = kernel.size();
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
@@ -443,7 +444,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
@@ -469,7 +470,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
|
||||
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
||||
DstAlignment = Kernel::AssignmentTraits::DstAlignment
|
||||
};
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
|
||||
{
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
@@ -511,7 +512,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
|
||||
{
|
||||
const Index size = kernel.size();
|
||||
for(Index i = 0; i < size; ++i)
|
||||
@@ -522,7 +523,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
||||
@@ -536,7 +537,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
@@ -584,7 +585,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
@@ -766,7 +767,7 @@ void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::a
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
{
|
||||
typedef evaluator<DstXprType> DstEvaluatorType;
|
||||
typedef evaluator<SrcXprType> SrcEvaluatorType;
|
||||
@@ -844,8 +845,8 @@ void call_assignment(const Dst& dst, const Src& src)
|
||||
|
||||
// Deal with "assume-aliasing"
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||
void call_assignment(Dst& dst, const Src& src, const Func& func, std::enable_if_t< evaluator_assume_aliasing<Src>::value, void*> = 0)
|
||||
{
|
||||
typename plain_matrix_type<Src>::type tmp(src);
|
||||
call_assignment_no_alias(dst, tmp, func);
|
||||
@@ -853,7 +854,7 @@ void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
||||
void call_assignment(Dst& dst, const Src& src, const Func& func, std::enable_if_t<!evaluator_assume_aliasing<Src>::value, void*> = 0)
|
||||
{
|
||||
call_assignment_no_alias(dst, src, func);
|
||||
}
|
||||
@@ -861,7 +862,7 @@ void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable
|
||||
// by-pass "assume-aliasing"
|
||||
// When there is no aliasing, we require that 'dst' has been properly resized
|
||||
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||
void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
|
||||
{
|
||||
call_assignment_no_alias(dst.expression(), src, func);
|
||||
@@ -869,7 +870,7 @@ void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func&
|
||||
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||
void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
|
||||
{
|
||||
enum {
|
||||
@@ -878,8 +879,8 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
|
||||
) && int(Dst::SizeAtCompileTime) != 1
|
||||
};
|
||||
|
||||
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
|
||||
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
|
||||
typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst> ActualDstTypeCleaned;
|
||||
typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&> ActualDstType;
|
||||
ActualDstType actualDst(dst);
|
||||
|
||||
// TODO check whether this is the right place to perform these checks:
|
||||
@@ -911,14 +912,14 @@ void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const
|
||||
}
|
||||
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||
void call_assignment_no_alias(Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
||||
}
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
|
||||
{
|
||||
// TODO check whether this is the right place to perform these checks:
|
||||
@@ -929,7 +930,7 @@ void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func&
|
||||
Assignment<Dst,Src,Func>::run(dst, src, func);
|
||||
}
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
||||
|
||||
6
libs/eigen/Eigen/src/Core/Assign_MKL.h
Executable file → Normal file
6
libs/eigen/Eigen/src/Core/Assign_MKL.h
Executable file → Normal file
@@ -34,6 +34,8 @@
|
||||
#ifndef EIGEN_ASSIGN_VML_H
|
||||
#define EIGEN_ASSIGN_VML_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -82,7 +84,7 @@ class vml_assign_traits
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
||||
template< typename DstXprType, typename SrcXprNested> \
|
||||
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
|
||||
Dense2Dense, std::enable_if_t<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>> { \
|
||||
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
|
||||
resize_if_allowed(dst, src, func); \
|
||||
@@ -142,7 +144,7 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
|
||||
template< typename DstXprType, typename SrcXprNested, typename Plain> \
|
||||
struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
|
||||
Dense2Dense, std::enable_if_t<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>> { \
|
||||
typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_BANDMATRIX_H
|
||||
#define EIGEN_BANDMATRIX_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -41,7 +43,7 @@ class BandMatrixBase : public EigenBase<Derived>
|
||||
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic))
|
||||
? 1 + Supers + Subs
|
||||
: Dynamic,
|
||||
SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime)
|
||||
SizeAtCompileTime = min_size_prefer_dynamic(RowsAtCompileTime,ColsAtCompileTime)
|
||||
};
|
||||
|
||||
public:
|
||||
@@ -96,13 +98,13 @@ class BandMatrixBase : public EigenBase<Derived>
|
||||
DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic)
|
||||
? Dynamic
|
||||
: (ActualIndex<0
|
||||
? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex)
|
||||
: EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex))
|
||||
? min_size_prefer_dynamic(ColsAtCompileTime, RowsAtCompileTime + ActualIndex)
|
||||
: min_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime - ActualIndex))
|
||||
};
|
||||
typedef Block<CoefficientsType,1, DiagonalSize> BuildType;
|
||||
typedef typename internal::conditional<Conjugate,
|
||||
typedef std::conditional_t<Conjugate,
|
||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>,BuildType >,
|
||||
BuildType>::type Type;
|
||||
BuildType> Type;
|
||||
};
|
||||
|
||||
/** \returns a vector expression of the \a N -th sub or super diagonal */
|
||||
@@ -161,12 +163,12 @@ class BandMatrixBase : public EigenBase<Derived>
|
||||
*
|
||||
* \brief Represents a rectangular matrix with a banded storage
|
||||
*
|
||||
* \tparam _Scalar Numeric type, i.e. float, double, int
|
||||
* \tparam _Rows Number of rows, or \b Dynamic
|
||||
* \tparam _Cols Number of columns, or \b Dynamic
|
||||
* \tparam _Supers Number of super diagonal
|
||||
* \tparam _Subs Number of sub diagonal
|
||||
* \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
|
||||
* \tparam Scalar_ Numeric type, i.e. float, double, int
|
||||
* \tparam Rows_ Number of rows, or \b Dynamic
|
||||
* \tparam Cols_ Number of columns, or \b Dynamic
|
||||
* \tparam Supers_ Number of super diagonal
|
||||
* \tparam Subs_ Number of sub diagonal
|
||||
* \tparam Options_ A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
|
||||
* The former controls \ref TopicStorageOrders "storage order", and defaults to
|
||||
* column-major. The latter controls whether the matrix represents a selfadjoint
|
||||
* matrix in which case either Supers of Subs have to be null.
|
||||
@@ -174,29 +176,29 @@ class BandMatrixBase : public EigenBase<Derived>
|
||||
* \sa class TridiagonalMatrix
|
||||
*/
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
|
||||
struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
template<typename Scalar_, int Rows_, int Cols_, int Supers_, int Subs_, int Options_>
|
||||
struct traits<BandMatrix<Scalar_,Rows_,Cols_,Supers_,Subs_,Options_> >
|
||||
{
|
||||
typedef _Scalar Scalar;
|
||||
typedef Scalar_ Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef Eigen::Index StorageIndex;
|
||||
enum {
|
||||
CoeffReadCost = NumTraits<Scalar>::ReadCost,
|
||||
RowsAtCompileTime = _Rows,
|
||||
ColsAtCompileTime = _Cols,
|
||||
MaxRowsAtCompileTime = _Rows,
|
||||
MaxColsAtCompileTime = _Cols,
|
||||
RowsAtCompileTime = Rows_,
|
||||
ColsAtCompileTime = Cols_,
|
||||
MaxRowsAtCompileTime = Rows_,
|
||||
MaxColsAtCompileTime = Cols_,
|
||||
Flags = LvalueBit,
|
||||
Supers = _Supers,
|
||||
Subs = _Subs,
|
||||
Options = _Options,
|
||||
Supers = Supers_,
|
||||
Subs = Subs_,
|
||||
Options = Options_,
|
||||
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
|
||||
};
|
||||
typedef Matrix<Scalar, DataRowsAtCompileTime, ColsAtCompileTime, int(Options) & int(RowMajor) ? RowMajor : ColMajor> CoefficientsType;
|
||||
};
|
||||
|
||||
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
|
||||
class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
|
||||
template<typename Scalar_, int Rows, int Cols, int Supers, int Subs, int Options>
|
||||
class BandMatrix : public BandMatrixBase<BandMatrix<Scalar_,Rows,Cols,Supers,Subs,Options> >
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -233,32 +235,32 @@ class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Sub
|
||||
internal::variable_if_dynamic<Index, Subs> m_subs;
|
||||
};
|
||||
|
||||
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
|
||||
template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
|
||||
class BandMatrixWrapper;
|
||||
|
||||
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
|
||||
struct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
|
||||
struct traits<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
|
||||
{
|
||||
typedef typename _CoefficientsType::Scalar Scalar;
|
||||
typedef typename _CoefficientsType::StorageKind StorageKind;
|
||||
typedef typename _CoefficientsType::StorageIndex StorageIndex;
|
||||
typedef typename CoefficientsType_::Scalar Scalar;
|
||||
typedef typename CoefficientsType_::StorageKind StorageKind;
|
||||
typedef typename CoefficientsType_::StorageIndex StorageIndex;
|
||||
enum {
|
||||
CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,
|
||||
RowsAtCompileTime = _Rows,
|
||||
ColsAtCompileTime = _Cols,
|
||||
MaxRowsAtCompileTime = _Rows,
|
||||
MaxColsAtCompileTime = _Cols,
|
||||
CoeffReadCost = internal::traits<CoefficientsType_>::CoeffReadCost,
|
||||
RowsAtCompileTime = Rows_,
|
||||
ColsAtCompileTime = Cols_,
|
||||
MaxRowsAtCompileTime = Rows_,
|
||||
MaxColsAtCompileTime = Cols_,
|
||||
Flags = LvalueBit,
|
||||
Supers = _Supers,
|
||||
Subs = _Subs,
|
||||
Options = _Options,
|
||||
Supers = Supers_,
|
||||
Subs = Subs_,
|
||||
Options = Options_,
|
||||
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
|
||||
};
|
||||
typedef _CoefficientsType CoefficientsType;
|
||||
typedef CoefficientsType_ CoefficientsType;
|
||||
};
|
||||
|
||||
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
|
||||
class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
|
||||
class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -266,12 +268,12 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
|
||||
typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;
|
||||
typedef typename internal::traits<BandMatrixWrapper>::StorageIndex StorageIndex;
|
||||
|
||||
explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
|
||||
explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=Rows_, Index cols=Cols_, Index supers=Supers_, Index subs=Subs_)
|
||||
: m_coeffs(coeffs),
|
||||
m_rows(rows), m_supers(supers), m_subs(subs)
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
//internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
|
||||
// eigen_assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
|
||||
}
|
||||
|
||||
/** \returns the number of columns */
|
||||
@@ -291,9 +293,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
|
||||
protected:
|
||||
|
||||
const CoefficientsType& m_coeffs;
|
||||
internal::variable_if_dynamic<Index, _Rows> m_rows;
|
||||
internal::variable_if_dynamic<Index, _Supers> m_supers;
|
||||
internal::variable_if_dynamic<Index, _Subs> m_subs;
|
||||
internal::variable_if_dynamic<Index, Rows_> m_rows;
|
||||
internal::variable_if_dynamic<Index, Supers_> m_supers;
|
||||
internal::variable_if_dynamic<Index, Subs_> m_subs;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -330,16 +332,16 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint
|
||||
|
||||
struct BandShape {};
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
|
||||
struct evaluator_traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
: public evaluator_traits_base<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
template<typename Scalar_, int Rows_, int Cols_, int Supers_, int Subs_, int Options_>
|
||||
struct evaluator_traits<BandMatrix<Scalar_,Rows_,Cols_,Supers_,Subs_,Options_> >
|
||||
: public evaluator_traits_base<BandMatrix<Scalar_,Rows_,Cols_,Supers_,Subs_,Options_> >
|
||||
{
|
||||
typedef BandShape Shape;
|
||||
};
|
||||
|
||||
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
|
||||
struct evaluator_traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
: public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
|
||||
struct evaluator_traits<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
|
||||
: public evaluator_traits_base<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
|
||||
{
|
||||
typedef BandShape Shape;
|
||||
};
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_BLOCK_H
|
||||
#define EIGEN_BLOCK_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -21,7 +23,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
|
||||
typedef typename traits<XprType>::StorageKind StorageKind;
|
||||
typedef typename traits<XprType>::XprKind XprKind;
|
||||
typedef typename ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
||||
typedef std::remove_reference_t<XprTypeNested> XprTypeNested_;
|
||||
enum{
|
||||
MatrixRows = traits<XprType>::RowsAtCompileTime,
|
||||
MatrixCols = traits<XprType>::ColsAtCompileTime,
|
||||
@@ -110,7 +112,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
|
||||
|
||||
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
||||
typedef internal::remove_all_t<XprType> NestedExpression;
|
||||
|
||||
/** Column or Row constructor
|
||||
*/
|
||||
@@ -260,19 +262,19 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index rowId, Index colId) const
|
||||
EIGEN_DEVICE_FUNC inline PacketScalar packet(Index rowId, Index colId) const
|
||||
{
|
||||
return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||
EIGEN_DEVICE_FUNC inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||
{
|
||||
m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index index) const
|
||||
EIGEN_DEVICE_FUNC inline PacketScalar packet(Index index) const
|
||||
{
|
||||
return m_xpr.template packet<Unaligned>
|
||||
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
@@ -280,7 +282,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& val)
|
||||
EIGEN_DEVICE_FUNC inline void writePacket(Index index, const PacketScalar& val)
|
||||
{
|
||||
m_xpr.template writePacket<Unaligned>
|
||||
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
@@ -295,7 +297,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
|
||||
const internal::remove_all_t<XprTypeNested>& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
@@ -378,7 +380,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const EIGEN_NOEXCEPT
|
||||
const internal::remove_all_t<XprTypeNested>& nestedExpression() const EIGEN_NOEXCEPT
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
@@ -10,58 +10,62 @@
|
||||
#ifndef EIGEN_ALLANDANY_H
|
||||
#define EIGEN_ALLANDANY_H
|
||||
|
||||
namespace Eigen {
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived, int UnrollCount, int Rows>
|
||||
template<typename Derived, int UnrollCount, int InnerSize>
|
||||
struct all_unroller
|
||||
{
|
||||
enum {
|
||||
col = (UnrollCount-1) / Rows,
|
||||
row = (UnrollCount-1) % Rows
|
||||
IsRowMajor = (int(Derived::Flags) & int(RowMajor)),
|
||||
i = (UnrollCount-1) / InnerSize,
|
||||
j = (UnrollCount-1) % InnerSize
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
|
||||
{
|
||||
return all_unroller<Derived, UnrollCount-1, Rows>::run(mat) && mat.coeff(row, col);
|
||||
return all_unroller<Derived, UnrollCount-1, InnerSize>::run(mat) && mat.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived, int Rows>
|
||||
struct all_unroller<Derived, 0, Rows>
|
||||
template<typename Derived, int InnerSize>
|
||||
struct all_unroller<Derived, 0, InnerSize>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static inline bool run(const Derived &/*mat*/) { return true; }
|
||||
};
|
||||
|
||||
template<typename Derived, int Rows>
|
||||
struct all_unroller<Derived, Dynamic, Rows>
|
||||
template<typename Derived, int InnerSize>
|
||||
struct all_unroller<Derived, Dynamic, InnerSize>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
|
||||
};
|
||||
|
||||
template<typename Derived, int UnrollCount, int Rows>
|
||||
template<typename Derived, int UnrollCount, int InnerSize>
|
||||
struct any_unroller
|
||||
{
|
||||
enum {
|
||||
col = (UnrollCount-1) / Rows,
|
||||
row = (UnrollCount-1) % Rows
|
||||
IsRowMajor = (int(Derived::Flags) & int(RowMajor)),
|
||||
i = (UnrollCount-1) / InnerSize,
|
||||
j = (UnrollCount-1) % InnerSize
|
||||
};
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
|
||||
{
|
||||
return any_unroller<Derived, UnrollCount-1, Rows>::run(mat) || mat.coeff(row, col);
|
||||
return any_unroller<Derived, UnrollCount-1, InnerSize>::run(mat) || mat.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived, int Rows>
|
||||
struct any_unroller<Derived, 0, Rows>
|
||||
template<typename Derived, int InnerSize>
|
||||
struct any_unroller<Derived, 0, InnerSize>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static inline bool run(const Derived & /*mat*/) { return false; }
|
||||
};
|
||||
|
||||
template<typename Derived, int Rows>
|
||||
struct any_unroller<Derived, Dynamic, Rows>
|
||||
template<typename Derived, int InnerSize>
|
||||
struct any_unroller<Derived, Dynamic, InnerSize>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
|
||||
};
|
||||
@@ -81,16 +85,16 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const
|
||||
typedef internal::evaluator<Derived> Evaluator;
|
||||
enum {
|
||||
unroll = SizeAtCompileTime != Dynamic
|
||||
&& SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT
|
||||
&& SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT,
|
||||
};
|
||||
Evaluator evaluator(derived());
|
||||
if(unroll)
|
||||
return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator);
|
||||
return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, InnerSizeAtCompileTime>::run(evaluator);
|
||||
else
|
||||
{
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
if (!evaluator.coeff(i, j)) return false;
|
||||
for(Index i = 0; i < derived().outerSize(); ++i)
|
||||
for(Index j = 0; j < derived().innerSize(); ++j)
|
||||
if (!evaluator.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i)) return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -105,16 +109,16 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const
|
||||
typedef internal::evaluator<Derived> Evaluator;
|
||||
enum {
|
||||
unroll = SizeAtCompileTime != Dynamic
|
||||
&& SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT
|
||||
&& SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT,
|
||||
};
|
||||
Evaluator evaluator(derived());
|
||||
if(unroll)
|
||||
return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator);
|
||||
return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, InnerSizeAtCompileTime>::run(evaluator);
|
||||
else
|
||||
{
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
if (evaluator.coeff(i, j)) return true;
|
||||
for(Index i = 0; i < derived().outerSize(); ++i)
|
||||
for(Index j = 0; j < derived().innerSize(); ++j)
|
||||
if (evaluator.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i)) return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -134,7 +138,7 @@ EIGEN_DEVICE_FUNC inline Eigen::Index DenseBase<Derived>::count() const
|
||||
* \sa allFinite()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline bool DenseBase<Derived>::hasNaN() const
|
||||
EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::hasNaN() const
|
||||
{
|
||||
#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
|
||||
return derived().array().isNaN().any();
|
||||
@@ -148,7 +152,7 @@ inline bool DenseBase<Derived>::hasNaN() const
|
||||
* \sa hasNaN()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline bool DenseBase<Derived>::allFinite() const
|
||||
EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::allFinite() const
|
||||
{
|
||||
#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
|
||||
return derived().array().isFinite().all();
|
||||
@@ -156,7 +160,7 @@ inline bool DenseBase<Derived>::allFinite() const
|
||||
return !((derived()-derived()).hasNaN());
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_ALLANDANY_H
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_COMMAINITIALIZER_H
|
||||
#define EIGEN_COMMAINITIALIZER_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class CommaInitializer
|
||||
@@ -45,7 +47,7 @@ struct CommaInitializer
|
||||
{
|
||||
eigen_assert(m_xpr.rows() >= other.rows() && m_xpr.cols() >= other.cols()
|
||||
&& "Cannot comma-initialize a 0x0 matrix (operator<<)");
|
||||
m_xpr.block(0, 0, other.rows(), other.cols()) = other;
|
||||
m_xpr.template block<OtherDerived::RowsAtCompileTime, OtherDerived::ColsAtCompileTime>(0, 0, other.rows(), other.cols()) = other;
|
||||
}
|
||||
|
||||
/* Copy/Move constructor which transfers ownership. This is crucial in
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CONDITIONESTIMATOR_H
|
||||
#define EIGEN_CONDITIONESTIMATOR_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -160,12 +162,12 @@ rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Deco
|
||||
{
|
||||
typedef typename Decomposition::RealScalar RealScalar;
|
||||
eigen_assert(dec.rows() == dec.cols());
|
||||
if (dec.rows() == 0) return NumTraits<RealScalar>::infinity();
|
||||
if (matrix_norm == RealScalar(0)) return RealScalar(0);
|
||||
if (dec.rows() == 1) return RealScalar(1);
|
||||
if (dec.rows() == 0) return NumTraits<RealScalar>::infinity();
|
||||
if (numext::is_exactly_zero(matrix_norm)) return RealScalar(0);
|
||||
if (dec.rows() == 1) return RealScalar(1);
|
||||
const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
|
||||
return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0)
|
||||
: (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
|
||||
return (numext::is_exactly_zero(inverse_matrix_norm) ? RealScalar(0)
|
||||
: (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
#ifndef EIGEN_COREEVALUATORS_H
|
||||
#define EIGEN_COREEVALUATORS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -498,7 +500,7 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
|
||||
: evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
|
||||
{
|
||||
typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
|
||||
typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;
|
||||
typedef internal::remove_all_t<PlainObjectType> PlainObjectTypeCleaned;
|
||||
|
||||
enum {
|
||||
CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
|
||||
@@ -655,9 +657,9 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased
|
||||
)
|
||||
),
|
||||
Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit),
|
||||
Alignment = EIGEN_PLAIN_ENUM_MIN(
|
||||
EIGEN_PLAIN_ENUM_MIN(evaluator<Arg1>::Alignment, evaluator<Arg2>::Alignment),
|
||||
evaluator<Arg3>::Alignment)
|
||||
Alignment = plain_enum_min(
|
||||
plain_enum_min(evaluator<Arg1>::Alignment, evaluator<Arg2>::Alignment),
|
||||
evaluator<Arg3>::Alignment)
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr)
|
||||
@@ -751,7 +753,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
||||
)
|
||||
),
|
||||
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
|
||||
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
|
||||
Alignment = plain_enum_min(evaluator<Lhs>::Alignment, evaluator<Rhs>::Alignment)
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -810,11 +812,11 @@ protected:
|
||||
|
||||
// -------------------- CwiseUnaryView --------------------
|
||||
|
||||
template<typename UnaryOp, typename ArgType>
|
||||
struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
|
||||
: evaluator_base<CwiseUnaryView<UnaryOp, ArgType> >
|
||||
template<typename UnaryOp, typename ArgType, typename StrideType>
|
||||
struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType, StrideType>, IndexBased>
|
||||
: evaluator_base<CwiseUnaryView<UnaryOp, ArgType, StrideType> >
|
||||
{
|
||||
typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
|
||||
typedef CwiseUnaryView<UnaryOp, ArgType, StrideType> XprType;
|
||||
|
||||
enum {
|
||||
CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),
|
||||
@@ -900,7 +902,8 @@ struct mapbase_evaluator : evaluator_base<Derived>
|
||||
m_innerStride(map.innerStride()),
|
||||
m_outerStride(map.outerStride())
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
|
||||
EIGEN_STATIC_ASSERT(check_implication((evaluator<Derived>::Flags & PacketAccessBit) != 0,
|
||||
internal::inner_stride_at_compile_time<Derived>::ret == 1),
|
||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
|
||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||
}
|
||||
@@ -1072,7 +1075,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
||||
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
|
||||
&& (OuterStrideAtCompileTime!=0)
|
||||
&& (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
|
||||
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
|
||||
Alignment = plain_enum_min(evaluator<ArgType>::Alignment, Alignment0)
|
||||
};
|
||||
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -1222,8 +1225,8 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
|
||||
explicit block_evaluator(const XprType& block)
|
||||
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
|
||||
{
|
||||
// TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
|
||||
eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
|
||||
eigen_internal_assert((internal::is_constant_evaluated() || (internal::UIntPtr(block.data()) % plain_enum_max(1,evaluator<XprType>::Alignment)) == 0) \
|
||||
&& "data is not aligned");
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1239,12 +1242,12 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
||||
typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
|
||||
+ EIGEN_PLAIN_ENUM_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
|
||||
evaluator<ElseMatrixType>::CoeffReadCost),
|
||||
+ plain_enum_max(evaluator<ThenMatrixType>::CoeffReadCost,
|
||||
evaluator<ElseMatrixType>::CoeffReadCost),
|
||||
|
||||
Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
|
||||
|
||||
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
|
||||
Alignment = plain_enum_min(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -1295,7 +1298,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
|
||||
};
|
||||
typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested;
|
||||
typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
|
||||
typedef internal::remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
|
||||
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
|
||||
@@ -1379,7 +1382,7 @@ template<typename XprType>
|
||||
struct evaluator_wrapper_base
|
||||
: evaluator_base<XprType>
|
||||
{
|
||||
typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
|
||||
typedef remove_all_t<typename XprType::NestedExpressionType> ArgType;
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
|
||||
Flags = evaluator<ArgType>::Flags,
|
||||
@@ -1720,14 +1723,14 @@ struct evaluator<EvalToTemp<ArgType> >
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
: m_result(xpr.arg())
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_result);
|
||||
internal::construct_at<Base>(this, m_result);
|
||||
}
|
||||
|
||||
// This constructor is used when nesting an EvalTo evaluator in another evaluator
|
||||
EIGEN_DEVICE_FUNC evaluator(const ArgType& arg)
|
||||
: m_result(arg)
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_result);
|
||||
internal::construct_at<Base>(this, m_result);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_COREITERATORS_H
|
||||
#define EIGEN_COREITERATORS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_CWISE_BINARY_OP_H
|
||||
#define EIGEN_CWISE_BINARY_OP_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -19,7 +21,7 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
{
|
||||
// we must not inherit from traits<Lhs> since it has
|
||||
// the potential to cause problems with MSVC
|
||||
typedef typename remove_all<Lhs>::type Ancestor;
|
||||
typedef remove_all_t<Lhs> Ancestor;
|
||||
typedef typename traits<Ancestor>::XprKind XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
|
||||
@@ -43,10 +45,10 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
typename traits<Rhs>::StorageIndex>::type StorageIndex;
|
||||
typedef typename Lhs::Nested LhsNested;
|
||||
typedef typename Rhs::Nested RhsNested;
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
typedef std::remove_reference_t<LhsNested> LhsNested_;
|
||||
typedef std::remove_reference_t<RhsNested> RhsNested_;
|
||||
enum {
|
||||
Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
|
||||
Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,LhsNested_::Flags & RowMajorBit,RhsNested_::Flags & RowMajorBit>::value
|
||||
};
|
||||
};
|
||||
} // end namespace internal
|
||||
@@ -84,9 +86,9 @@ class CwiseBinaryOp :
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::remove_all<BinaryOp>::type Functor;
|
||||
typedef typename internal::remove_all<LhsType>::type Lhs;
|
||||
typedef typename internal::remove_all<RhsType>::type Rhs;
|
||||
typedef internal::remove_all_t<BinaryOp> Functor;
|
||||
typedef internal::remove_all_t<LhsType> Lhs;
|
||||
typedef internal::remove_all_t<RhsType> Rhs;
|
||||
|
||||
typedef typename CwiseBinaryOpImpl<
|
||||
BinaryOp, LhsType, RhsType,
|
||||
@@ -95,12 +97,15 @@ class CwiseBinaryOp :
|
||||
BinaryOp>::ret>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
|
||||
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
|
||||
|
||||
typedef typename internal::ref_selector<LhsType>::type LhsNested;
|
||||
typedef typename internal::ref_selector<RhsType>::type RhsNested;
|
||||
typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
|
||||
typedef std::remove_reference_t<LhsNested> LhsNested_;
|
||||
typedef std::remove_reference_t<RhsNested> RhsNested_;
|
||||
|
||||
#if EIGEN_COMP_MSVC && EIGEN_HAS_CXX11
|
||||
#if EIGEN_COMP_MSVC
|
||||
//Required for Visual Studio or the Copy constructor will probably not get inlined!
|
||||
EIGEN_STRONG_INLINE
|
||||
CwiseBinaryOp(const CwiseBinaryOp<BinaryOp,LhsType,RhsType>&) = default;
|
||||
@@ -110,29 +115,26 @@ class CwiseBinaryOp :
|
||||
CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
|
||||
: m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
|
||||
{
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
|
||||
// require the sizes to match
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
|
||||
eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||
Index rows() const EIGEN_NOEXCEPT {
|
||||
// return the fixed size type if available to enable compile time optimizations
|
||||
return internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic ? m_rhs.rows() : m_lhs.rows();
|
||||
return internal::traits<internal::remove_all_t<LhsNested>>::RowsAtCompileTime==Dynamic ? m_rhs.rows() : m_lhs.rows();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
|
||||
Index cols() const EIGEN_NOEXCEPT {
|
||||
// return the fixed size type if available to enable compile time optimizations
|
||||
return internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic ? m_rhs.cols() : m_lhs.cols();
|
||||
return internal::traits<internal::remove_all_t<LhsNested>>::ColsAtCompileTime==Dynamic ? m_rhs.cols() : m_lhs.cols();
|
||||
}
|
||||
|
||||
/** \returns the left hand side nested expression */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const _LhsNested& lhs() const { return m_lhs; }
|
||||
const LhsNested_& lhs() const { return m_lhs; }
|
||||
/** \returns the right hand side nested expression */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const _RhsNested& rhs() const { return m_rhs; }
|
||||
const RhsNested_& rhs() const { return m_rhs; }
|
||||
/** \returns the functor representing the binary operation */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const BinaryOp& functor() const { return m_functor; }
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_CWISE_NULLARY_OP_H
|
||||
#define EIGEN_CWISE_NULLARY_OP_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -304,6 +306,20 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar>(low,high,Derived::SizeAtCompileTime));
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessEqualSpacedReturnType
|
||||
DenseBase<Derived>::EqualSpaced(Index size, const Scalar& low, const Scalar& step) {
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::equalspaced_op<Scalar>(low, step));
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessEqualSpacedReturnType
|
||||
DenseBase<Derived>::EqualSpaced(const Scalar& low, const Scalar& step) {
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::equalspaced_op<Scalar>(low, step));
|
||||
}
|
||||
|
||||
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
|
||||
@@ -453,6 +469,19 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(
|
||||
return setLinSpaced(size(), low, high);
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setEqualSpaced(Index newSize, const Scalar& low,
|
||||
const Scalar& step) {
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return derived() = Derived::NullaryExpr(newSize, internal::equalspaced_op<Scalar>(low, step));
|
||||
}
|
||||
template <typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setEqualSpaced(const Scalar& low,
|
||||
const Scalar& step) {
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return setEqualSpaced(size(), low, step);
|
||||
}
|
||||
|
||||
// zero:
|
||||
|
||||
/** \returns an expression of a zero matrix.
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
#ifndef EIGEN_CWISE_TERNARY_OP_H
|
||||
#define EIGEN_CWISE_TERNARY_OP_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -19,7 +21,7 @@ template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
|
||||
struct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
|
||||
// we must not inherit from traits<Arg1> since it has
|
||||
// the potential to cause problems with MSVC
|
||||
typedef typename remove_all<Arg1>::type Ancestor;
|
||||
typedef remove_all_t<Arg1> Ancestor;
|
||||
typedef typename traits<Ancestor>::XprKind XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
|
||||
@@ -41,10 +43,10 @@ struct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
|
||||
typedef typename Arg1::Nested Arg1Nested;
|
||||
typedef typename Arg2::Nested Arg2Nested;
|
||||
typedef typename Arg3::Nested Arg3Nested;
|
||||
typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
|
||||
typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
|
||||
typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
|
||||
enum { Flags = _Arg1Nested::Flags & RowMajorBit };
|
||||
typedef std::remove_reference_t<Arg1Nested> Arg1Nested_;
|
||||
typedef std::remove_reference_t<Arg2Nested> Arg2Nested_;
|
||||
typedef std::remove_reference_t<Arg3Nested> Arg3Nested_;
|
||||
enum { Flags = Arg1Nested_::Flags & RowMajorBit };
|
||||
};
|
||||
} // end namespace internal
|
||||
|
||||
@@ -87,9 +89,23 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<
|
||||
internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
typedef typename internal::remove_all<Arg1Type>::type Arg1;
|
||||
typedef typename internal::remove_all<Arg2Type>::type Arg2;
|
||||
typedef typename internal::remove_all<Arg3Type>::type Arg3;
|
||||
typedef internal::remove_all_t<Arg1Type> Arg1;
|
||||
typedef internal::remove_all_t<Arg2Type> Arg2;
|
||||
typedef internal::remove_all_t<Arg3Type> Arg3;
|
||||
|
||||
// require the sizes to match
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)
|
||||
|
||||
// The index types should match
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<
|
||||
typename internal::traits<Arg1Type>::StorageKind,
|
||||
typename internal::traits<Arg2Type>::StorageKind>::value),
|
||||
STORAGE_KIND_MUST_MATCH)
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<
|
||||
typename internal::traits<Arg1Type>::StorageKind,
|
||||
typename internal::traits<Arg3Type>::StorageKind>::value),
|
||||
STORAGE_KIND_MUST_MATCH)
|
||||
|
||||
typedef typename CwiseTernaryOpImpl<
|
||||
TernaryOp, Arg1Type, Arg2Type, Arg3Type,
|
||||
@@ -99,29 +115,15 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<
|
||||
typedef typename internal::ref_selector<Arg1Type>::type Arg1Nested;
|
||||
typedef typename internal::ref_selector<Arg2Type>::type Arg2Nested;
|
||||
typedef typename internal::ref_selector<Arg3Type>::type Arg3Nested;
|
||||
typedef typename internal::remove_reference<Arg1Nested>::type _Arg1Nested;
|
||||
typedef typename internal::remove_reference<Arg2Nested>::type _Arg2Nested;
|
||||
typedef typename internal::remove_reference<Arg3Nested>::type _Arg3Nested;
|
||||
typedef std::remove_reference_t<Arg1Nested> Arg1Nested_;
|
||||
typedef std::remove_reference_t<Arg2Nested> Arg2Nested_;
|
||||
typedef std::remove_reference_t<Arg3Nested> Arg3Nested_;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2,
|
||||
const Arg3& a3,
|
||||
const TernaryOp& func = TernaryOp())
|
||||
: m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) {
|
||||
// require the sizes to match
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)
|
||||
|
||||
// The index types should match
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<
|
||||
typename internal::traits<Arg1Type>::StorageKind,
|
||||
typename internal::traits<Arg2Type>::StorageKind>::value),
|
||||
STORAGE_KIND_MUST_MATCH)
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<
|
||||
typename internal::traits<Arg1Type>::StorageKind,
|
||||
typename internal::traits<Arg3Type>::StorageKind>::value),
|
||||
STORAGE_KIND_MUST_MATCH)
|
||||
|
||||
eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() &&
|
||||
a1.rows() == a3.rows() && a1.cols() == a3.cols());
|
||||
}
|
||||
@@ -130,14 +132,14 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<
|
||||
EIGEN_STRONG_INLINE Index rows() const {
|
||||
// return the fixed size type if available to enable compile time
|
||||
// optimizations
|
||||
if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
if (internal::traits<internal::remove_all_t<Arg1Nested>>::
|
||||
RowsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg2Nested>::type>::
|
||||
internal::traits<internal::remove_all_t<Arg2Nested>>::
|
||||
RowsAtCompileTime == Dynamic)
|
||||
return m_arg3.rows();
|
||||
else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
else if (internal::traits<internal::remove_all_t<Arg1Nested>>::
|
||||
RowsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg3Nested>::type>::
|
||||
internal::traits<internal::remove_all_t<Arg3Nested>>::
|
||||
RowsAtCompileTime == Dynamic)
|
||||
return m_arg2.rows();
|
||||
else
|
||||
@@ -147,14 +149,14 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<
|
||||
EIGEN_STRONG_INLINE Index cols() const {
|
||||
// return the fixed size type if available to enable compile time
|
||||
// optimizations
|
||||
if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
if (internal::traits<internal::remove_all_t<Arg1Nested>>::
|
||||
ColsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg2Nested>::type>::
|
||||
internal::traits<internal::remove_all_t<Arg2Nested>>::
|
||||
ColsAtCompileTime == Dynamic)
|
||||
return m_arg3.cols();
|
||||
else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
else if (internal::traits<internal::remove_all_t<Arg1Nested>>::
|
||||
ColsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg3Nested>::type>::
|
||||
internal::traits<internal::remove_all_t<Arg3Nested>>::
|
||||
ColsAtCompileTime == Dynamic)
|
||||
return m_arg2.cols();
|
||||
else
|
||||
@@ -163,13 +165,13 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<
|
||||
|
||||
/** \returns the first argument nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _Arg1Nested& arg1() const { return m_arg1; }
|
||||
const Arg1Nested_& arg1() const { return m_arg1; }
|
||||
/** \returns the first argument nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _Arg2Nested& arg2() const { return m_arg2; }
|
||||
const Arg2Nested_& arg2() const { return m_arg2; }
|
||||
/** \returns the third argument nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _Arg3Nested& arg3() const { return m_arg3; }
|
||||
const Arg3Nested_& arg3() const { return m_arg3; }
|
||||
/** \returns the functor representing the ternary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const TernaryOp& functor() const { return m_functor; }
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_CWISE_UNARY_OP_H
|
||||
#define EIGEN_CWISE_UNARY_OP_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -22,9 +24,9 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
|
||||
UnaryOp(const typename XprType::Scalar&)
|
||||
>::type Scalar;
|
||||
typedef typename XprType::Nested XprTypeNested;
|
||||
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
||||
typedef std::remove_reference_t<XprTypeNested> XprTypeNested_;
|
||||
enum {
|
||||
Flags = _XprTypeNested::Flags & RowMajorBit
|
||||
Flags = XprTypeNested_::Flags & RowMajorBit
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -59,7 +61,7 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
|
||||
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
|
||||
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
||||
typedef internal::remove_all_t<XprType> NestedExpression;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
||||
@@ -76,12 +78,12 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const typename internal::remove_all<XprTypeNested>::type&
|
||||
const internal::remove_all_t<XprTypeNested>&
|
||||
nestedExpression() const { return m_xpr; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename internal::remove_all<XprTypeNested>::type&
|
||||
internal::remove_all_t<XprTypeNested>&
|
||||
nestedExpression() { return m_xpr; }
|
||||
|
||||
protected:
|
||||
|
||||
@@ -10,35 +10,42 @@
|
||||
#ifndef EIGEN_CWISE_UNARY_VIEW_H
|
||||
#define EIGEN_CWISE_UNARY_VIEW_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename ViewOp, typename MatrixType>
|
||||
struct traits<CwiseUnaryView<ViewOp, MatrixType> >
|
||||
template<typename ViewOp, typename MatrixType, typename StrideType>
|
||||
struct traits<CwiseUnaryView<ViewOp, MatrixType, StrideType> >
|
||||
: traits<MatrixType>
|
||||
{
|
||||
typedef typename result_of<
|
||||
ViewOp(const typename traits<MatrixType>::Scalar&)
|
||||
>::type Scalar;
|
||||
typedef typename MatrixType::Nested MatrixTypeNested;
|
||||
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
typedef remove_all_t<MatrixTypeNested> MatrixTypeNested_;
|
||||
enum {
|
||||
FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
|
||||
Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
|
||||
Flags = traits<MatrixTypeNested_>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
|
||||
MatrixTypeInnerStride = inner_stride_at_compile_time<MatrixType>::ret,
|
||||
// need to cast the sizeof's from size_t to int explicitly, otherwise:
|
||||
// "error: no integral type can represent all of the enumerator values
|
||||
InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
|
||||
? int(Dynamic)
|
||||
: int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
|
||||
OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret == Dynamic
|
||||
? int(Dynamic)
|
||||
: outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar))
|
||||
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
|
||||
? (MatrixTypeInnerStride == Dynamic
|
||||
? int(Dynamic)
|
||||
: int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)))
|
||||
: int(StrideType::InnerStrideAtCompileTime),
|
||||
|
||||
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
|
||||
? (outer_stride_at_compile_time<MatrixType>::ret == Dynamic
|
||||
? int(Dynamic)
|
||||
: outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)))
|
||||
: int(StrideType::OuterStrideAtCompileTime)
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename ViewOp, typename MatrixType, typename StorageKind>
|
||||
template<typename ViewOp, typename MatrixType, typename StrideType, typename StorageKind>
|
||||
class CwiseUnaryViewImpl;
|
||||
|
||||
/** \class CwiseUnaryView
|
||||
@@ -54,15 +61,15 @@ class CwiseUnaryViewImpl;
|
||||
*
|
||||
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
|
||||
*/
|
||||
template<typename ViewOp, typename MatrixType>
|
||||
class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
|
||||
template<typename ViewOp, typename MatrixType, typename StrideType>
|
||||
class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, typename internal::traits<MatrixType>::StorageKind>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
||||
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
|
||||
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
typedef internal::remove_all_t<MatrixType> NestedExpression;
|
||||
|
||||
explicit EIGEN_DEVICE_FUNC inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
|
||||
: m_matrix(mat), m_functor(func) {}
|
||||
@@ -78,11 +85,11 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
|
||||
EIGEN_DEVICE_FUNC const ViewOp& functor() const { return m_functor; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC const typename internal::remove_all<MatrixTypeNested>::type&
|
||||
EIGEN_DEVICE_FUNC const internal::remove_all_t<MatrixTypeNested>&
|
||||
nestedExpression() const { return m_matrix; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC typename internal::remove_reference<MatrixTypeNested>::type&
|
||||
EIGEN_DEVICE_FUNC std::remove_reference_t<MatrixTypeNested>&
|
||||
nestedExpression() { return m_matrix; }
|
||||
|
||||
protected:
|
||||
@@ -91,22 +98,22 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
|
||||
};
|
||||
|
||||
// Generic API dispatcher
|
||||
template<typename ViewOp, typename XprType, typename StorageKind>
|
||||
template<typename ViewOp, typename XprType, typename StrideType, typename StorageKind>
|
||||
class CwiseUnaryViewImpl
|
||||
: public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type
|
||||
: public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type
|
||||
{
|
||||
public:
|
||||
typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base;
|
||||
typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type Base;
|
||||
};
|
||||
|
||||
template<typename ViewOp, typename MatrixType>
|
||||
class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
|
||||
: public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
|
||||
template<typename ViewOp, typename MatrixType, typename StrideType>
|
||||
class CwiseUnaryViewImpl<ViewOp,MatrixType,StrideType,Dense>
|
||||
: public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType, StrideType> >::type
|
||||
{
|
||||
public:
|
||||
|
||||
typedef CwiseUnaryView<ViewOp, MatrixType> Derived;
|
||||
typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
|
||||
typedef CwiseUnaryView<ViewOp, MatrixType,StrideType> Derived;
|
||||
typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType,StrideType> >::type Base;
|
||||
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
|
||||
@@ -116,12 +123,16 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const
|
||||
{
|
||||
return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
|
||||
return StrideType::InnerStrideAtCompileTime != 0
|
||||
? int(StrideType::InnerStrideAtCompileTime)
|
||||
: derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const
|
||||
{
|
||||
return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
|
||||
return StrideType::OuterStrideAtCompileTime != 0
|
||||
? int(StrideType::OuterStrideAtCompileTime)
|
||||
: derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
|
||||
}
|
||||
protected:
|
||||
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)
|
||||
|
||||
@@ -11,17 +11,12 @@
|
||||
#ifndef EIGEN_DENSEBASE_H
|
||||
#define EIGEN_DENSEBASE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type.
|
||||
// This dummy function simply aims at checking that at compile time.
|
||||
static inline void check_DenseIndex_is_signed() {
|
||||
EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE)
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE)
|
||||
|
||||
/** \class DenseBase
|
||||
* \ingroup Core_Module
|
||||
@@ -110,8 +105,7 @@ template<typename Derived> class DenseBase
|
||||
* \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
|
||||
|
||||
|
||||
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime>::ret),
|
||||
SizeAtCompileTime = (internal::size_of_xpr_at_compile_time<Derived>::ret),
|
||||
/**< This is equal to the number of coefficients, i.e. the number of
|
||||
* rows times the number of columns, or to \a Dynamic if this is not
|
||||
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
|
||||
@@ -138,8 +132,8 @@ template<typename Derived> class DenseBase
|
||||
* \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime
|
||||
*/
|
||||
|
||||
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime>::ret),
|
||||
MaxSizeAtCompileTime = internal::size_at_compile_time(internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime),
|
||||
/**< This value is equal to the maximum possible number of coefficients that this expression
|
||||
* might have. If this expression might have an arbitrarily high number of coefficients,
|
||||
* this value is set to \a Dynamic.
|
||||
@@ -206,13 +200,8 @@ template<typename Derived> class DenseBase
|
||||
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
|
||||
* that the return type of eval() is either PlainObject or const PlainObject&.
|
||||
*/
|
||||
typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
|
||||
PlainMatrix, PlainArray>::type PlainObject;
|
||||
|
||||
/** \returns the number of nonzero coefficients which is in practice the number
|
||||
* of stored coefficients. */
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
inline Index nonZeros() const { return size(); }
|
||||
typedef std::conditional_t<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
|
||||
PlainMatrix, PlainArray> PlainObject;
|
||||
|
||||
/** \returns the outer size.
|
||||
*
|
||||
@@ -269,6 +258,8 @@ template<typename Derived> class DenseBase
|
||||
EIGEN_DEPRECATED typedef CwiseNullaryOp<internal::linspaced_op<Scalar>,PlainObject> SequentialLinSpacedReturnType;
|
||||
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar>,PlainObject> RandomAccessLinSpacedReturnType;
|
||||
/** \internal Represents a vector with equally spaced coefficients that allows random access. */
|
||||
typedef CwiseNullaryOp<internal::equalspaced_op<Scalar>, PlainObject> RandomAccessEqualSpacedReturnType;
|
||||
/** \internal the return type of MatrixBase::eigenvalues() */
|
||||
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
||||
|
||||
@@ -324,9 +315,9 @@ template<typename Derived> class DenseBase
|
||||
typedef Transpose<Derived> TransposeReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
TransposeReturnType transpose();
|
||||
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
|
||||
typedef Transpose<const Derived> ConstTransposeReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
ConstTransposeReturnType transpose() const;
|
||||
const ConstTransposeReturnType transpose() const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void transposeInPlace();
|
||||
|
||||
@@ -347,6 +338,11 @@ template<typename Derived> class DenseBase
|
||||
EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
|
||||
LinSpaced(const Scalar& low, const Scalar& high);
|
||||
|
||||
EIGEN_DEVICE_FUNC static const RandomAccessEqualSpacedReturnType
|
||||
EqualSpaced(Index size, const Scalar& low, const Scalar& step);
|
||||
EIGEN_DEVICE_FUNC static const RandomAccessEqualSpacedReturnType
|
||||
EqualSpaced(const Scalar& low, const Scalar& step);
|
||||
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||
NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
|
||||
@@ -368,6 +364,8 @@ template<typename Derived> class DenseBase
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC Derived& setEqualSpaced(Index size, const Scalar& low, const Scalar& step);
|
||||
EIGEN_DEVICE_FUNC Derived& setEqualSpaced(const Scalar& low, const Scalar& step);
|
||||
EIGEN_DEVICE_FUNC Derived& setZero();
|
||||
EIGEN_DEVICE_FUNC Derived& setOnes();
|
||||
EIGEN_DEVICE_FUNC Derived& setRandom();
|
||||
@@ -387,15 +385,15 @@ template<typename Derived> class DenseBase
|
||||
EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
inline bool hasNaN() const;
|
||||
inline bool allFinite() const;
|
||||
EIGEN_DEVICE_FUNC inline bool hasNaN() const;
|
||||
EIGEN_DEVICE_FUNC inline bool allFinite() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator*=(const Scalar& other);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator/=(const Scalar& other);
|
||||
|
||||
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
|
||||
typedef internal::add_const_on_value_type_t<typename internal::eval<Derived>::type> EvalReturnType;
|
||||
/** \returns the matrix or vector obtained by evaluating this expression.
|
||||
*
|
||||
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
|
||||
@@ -439,9 +437,9 @@ template<typename Derived> class DenseBase
|
||||
EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
|
||||
EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
|
||||
template<bool Enable> EIGEN_DEVICE_FUNC
|
||||
inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
|
||||
inline const std::conditional_t<Enable,ForceAlignedAccess<Derived>,Derived&> forceAlignedAccessIf() const;
|
||||
template<bool Enable> EIGEN_DEVICE_FUNC
|
||||
inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
|
||||
inline std::conditional_t<Enable,ForceAlignedAccess<Derived>,Derived&> forceAlignedAccessIf();
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar sum() const;
|
||||
EIGEN_DEVICE_FUNC Scalar mean() const;
|
||||
@@ -621,27 +619,21 @@ template<typename Derived> class DenseBase
|
||||
/** This is the const version of iterator (aka read-only) */
|
||||
typedef random_access_iterator_type const_iterator;
|
||||
#else
|
||||
typedef typename internal::conditional< (Flags&DirectAccessBit)==DirectAccessBit,
|
||||
internal::pointer_based_stl_iterator<Derived>,
|
||||
internal::generic_randaccess_stl_iterator<Derived>
|
||||
>::type iterator_type;
|
||||
typedef std::conditional_t< (Flags&DirectAccessBit)==DirectAccessBit,
|
||||
internal::pointer_based_stl_iterator<Derived>,
|
||||
internal::generic_randaccess_stl_iterator<Derived>
|
||||
> iterator_type;
|
||||
|
||||
typedef typename internal::conditional< (Flags&DirectAccessBit)==DirectAccessBit,
|
||||
internal::pointer_based_stl_iterator<const Derived>,
|
||||
internal::generic_randaccess_stl_iterator<const Derived>
|
||||
>::type const_iterator_type;
|
||||
typedef std::conditional_t< (Flags&DirectAccessBit)==DirectAccessBit,
|
||||
internal::pointer_based_stl_iterator<const Derived>,
|
||||
internal::generic_randaccess_stl_iterator<const Derived>
|
||||
> const_iterator_type;
|
||||
|
||||
// Stl-style iterators are supported only for vectors.
|
||||
|
||||
typedef typename internal::conditional< IsVectorAtCompileTime,
|
||||
iterator_type,
|
||||
void
|
||||
>::type iterator;
|
||||
typedef std::conditional_t<IsVectorAtCompileTime, iterator_type, void> iterator;
|
||||
|
||||
typedef typename internal::conditional< IsVectorAtCompileTime,
|
||||
const_iterator_type,
|
||||
void
|
||||
>::type const_iterator;
|
||||
typedef std::conditional_t<IsVectorAtCompileTime, const_iterator_type, void> const_iterator;
|
||||
#endif
|
||||
|
||||
inline iterator begin();
|
||||
@@ -678,14 +670,13 @@ template<typename Derived> class DenseBase
|
||||
protected:
|
||||
EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase)
|
||||
/** Default constructor. Do nothing. */
|
||||
EIGEN_DEVICE_FUNC DenseBase()
|
||||
{
|
||||
EIGEN_DEVICE_FUNC constexpr DenseBase() {
|
||||
/* Just checks for self-consistency of the flags.
|
||||
* Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down
|
||||
*/
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
|
||||
&& EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
|
||||
EIGEN_STATIC_ASSERT((internal::check_implication(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
|
||||
&& internal::check_implication(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
|
||||
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -10,12 +10,14 @@
|
||||
#ifndef EIGEN_DENSECOEFFSBASE_H
|
||||
#define EIGEN_DENSECOEFFSBASE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename T> struct add_const_on_value_type_if_arithmetic
|
||||
{
|
||||
typedef typename conditional<is_arithmetic<T>::value, T, typename add_const_on_value_type<T>::type>::type type;
|
||||
typedef std::conditional_t<is_arithmetic<T>::value, T, add_const_on_value_type_t<T>> type;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -43,13 +45,13 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
// - This is the return type of the coeff() method.
|
||||
// - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references
|
||||
// to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value).
|
||||
// - The is_artihmetic check is required since "const int", "const double", etc. will cause warnings on some systems
|
||||
// - The is_arithmetic check is required since "const int", "const double", etc. will cause warnings on some systems
|
||||
// while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is
|
||||
// not possible, since the underlying expressions might not offer a valid address the reference could be referring to.
|
||||
typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
|
||||
const Scalar&,
|
||||
typename internal::conditional<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>::type
|
||||
>::type CoeffReturnType;
|
||||
typedef std::conditional_t<bool(internal::traits<Derived>::Flags&LvalueBit),
|
||||
const Scalar&,
|
||||
std::conditional_t<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>
|
||||
> CoeffReturnType;
|
||||
|
||||
typedef typename internal::add_const_on_value_type_if_arithmetic<
|
||||
typename internal::packet_traits<Scalar>::type
|
||||
|
||||
@@ -18,20 +18,20 @@
|
||||
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X)
|
||||
#endif
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
struct constructor_without_unaligned_array_assert {};
|
||||
|
||||
template<typename T, int Size>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void check_static_allocation_size()
|
||||
{
|
||||
// if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
|
||||
#if EIGEN_STACK_ALLOCATION_LIMIT
|
||||
template <typename T, int Size>
|
||||
EIGEN_DEVICE_FUNC constexpr void check_static_allocation_size() {
|
||||
// if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
|
||||
#if EIGEN_STACK_ALLOCATION_LIMIT
|
||||
EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/** \internal
|
||||
@@ -45,35 +45,30 @@ struct plain_array
|
||||
{
|
||||
T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array() { check_static_allocation_size<T, Size>(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
|
||||
check_static_allocation_size<T, Size>();
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
|
||||
#elif EIGEN_GNUC_AT_LEAST(4,7)
|
||||
#elif EIGEN_COMP_GNUC
|
||||
// GCC 4.7 is too aggressive in its optimizations and remove the alignment test based on the fact the array is declared to be aligned.
|
||||
// See this bug report: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53900
|
||||
// Hiding the origin of the array pointer behind a function argument seems to do the trick even if the function is inlined:
|
||||
template<typename PtrType>
|
||||
EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
|
||||
eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
|
||||
eigen_assert((internal::is_constant_evaluated() \
|
||||
|| (internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0) \
|
||||
&& "this assertion is explained here: " \
|
||||
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
|
||||
" **** READ THIS WEB PAGE !!! ****");
|
||||
#else
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
|
||||
eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \
|
||||
eigen_assert((internal::is_constant_evaluated() || (internal::UIntPtr(array) & (sizemask)) == 0) \
|
||||
&& "this assertion is explained here: " \
|
||||
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
|
||||
" **** READ THIS WEB PAGE !!! ****");
|
||||
@@ -84,17 +79,13 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 8>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array() {
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
|
||||
check_static_allocation_size<T, Size>();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -103,17 +94,13 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 16>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array() {
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
|
||||
check_static_allocation_size<T, Size>();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -122,17 +109,13 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 32>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array() {
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
|
||||
check_static_allocation_size<T, Size>();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -141,17 +124,13 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 64>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array() {
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
|
||||
check_static_allocation_size<T, Size>();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -159,8 +138,8 @@ template <typename T, int MatrixOrArrayOptions, int Alignment>
|
||||
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
|
||||
{
|
||||
T array[1];
|
||||
EIGEN_DEVICE_FUNC plain_array() {}
|
||||
EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array() {}
|
||||
EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {}
|
||||
};
|
||||
|
||||
struct plain_array_helper {
|
||||
@@ -201,57 +180,32 @@ struct plain_array_helper {
|
||||
*
|
||||
* \sa Matrix
|
||||
*/
|
||||
template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage;
|
||||
template<typename T, int Size, int Rows_, int Cols_, int Options_> class DenseStorage;
|
||||
|
||||
// purely fixed-size matrix
|
||||
template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage
|
||||
template<typename T, int Size, int Rows_, int Cols_, int Options_> class DenseStorage
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
internal::plain_array<T,Size,Options_> m_data;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {
|
||||
constexpr EIGEN_DEVICE_FUNC DenseStorage() {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()) {}
|
||||
#if !EIGEN_HAS_CXX11 || defined(EIGEN_DENSE_STORAGE_CTOR_PLUGIN)
|
||||
EIGEN_DEVICE_FUNC
|
||||
#if defined(EIGEN_DENSE_STORAGE_CTOR_PLUGIN)
|
||||
EIGEN_DEVICE_FUNC constexpr
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) = default;
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) = default;
|
||||
#endif
|
||||
#if !EIGEN_HAS_CXX11
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other) m_data = other.m_data;
|
||||
return *this;
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) = default;
|
||||
#endif
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
#if !EIGEN_HAS_CXX11
|
||||
EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
: m_data(std::move(other.m_data))
|
||||
{
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
{
|
||||
if (this != &other)
|
||||
m_data = std::move(other.m_data);
|
||||
return *this;
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&&) = default;
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&&) = default;
|
||||
#endif
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) = default;
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage(DenseStorage&&) = default;
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(DenseStorage&&) = default;
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
|
||||
eigen_internal_assert(size == rows * cols && rows == Rows_ && cols == Cols_);
|
||||
EIGEN_UNUSED_VARIABLE(size);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
@@ -259,57 +213,148 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
|
||||
numext::swap(m_data, other.m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return _Cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; }
|
||||
EIGEN_DEVICE_FUNC static constexpr Index cols(void) EIGEN_NOEXCEPT { return Cols_; }
|
||||
EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index) {}
|
||||
EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index) {}
|
||||
EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// null matrix
|
||||
template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
|
||||
template<typename T, int Rows_, int Cols_, int Options_>
|
||||
class DenseStorage<T, 0, Rows_, Cols_, Options_>
|
||||
{
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return _Cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return 0; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return 0; }
|
||||
static_assert(Rows_ * Cols_ == 0, "The fixed number of rows times columns must equal the storage size.");
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) {}
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) {}
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) { return *this; }
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage& ) {}
|
||||
EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT {return Rows_;}
|
||||
EIGEN_DEVICE_FUNC static constexpr Index cols(void) EIGEN_NOEXCEPT {return Cols_;}
|
||||
EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC constexpr void resize(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC constexpr const T *data() const { return 0; }
|
||||
EIGEN_DEVICE_FUNC constexpr T *data() { return 0; }
|
||||
};
|
||||
|
||||
// more specializations for null matrices; these are necessary to resolve ambiguities
|
||||
template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
|
||||
: public DenseStorage<T, 0, 0, 0, _Options> { };
|
||||
|
||||
template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
|
||||
: public DenseStorage<T, 0, 0, 0, _Options> { };
|
||||
|
||||
template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
|
||||
: public DenseStorage<T, 0, 0, 0, _Options> { };
|
||||
|
||||
// dynamic-size matrix with fixed-size storage
|
||||
template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
template<typename T, int Options_>
|
||||
class DenseStorage<T, 0, Dynamic, Dynamic, Options_> {
|
||||
Index m_rows;
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows), m_cols(other.m_cols)
|
||||
{
|
||||
internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_rows(other.m_rows), m_cols(other.m_cols) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) {
|
||||
m_rows = other.m_rows;
|
||||
m_cols = other.m_cols;
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {
|
||||
eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size.");
|
||||
}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
|
||||
numext::swap(m_rows,other.m_rows);
|
||||
numext::swap(m_cols,other.m_cols);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT {return m_cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) {
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size.");
|
||||
}
|
||||
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) {
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size.");
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return nullptr; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return nullptr; }
|
||||
};
|
||||
|
||||
template<typename T, int Rows_, int Options_>
|
||||
class DenseStorage<T, 0, Rows_, Dynamic, Options_> {
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_cols(other.m_cols) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) {
|
||||
m_cols = other.m_cols;
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {
|
||||
eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size.");
|
||||
}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
|
||||
numext::swap(m_cols, other.m_cols);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return Rows_;}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) {
|
||||
m_cols = cols;
|
||||
eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size.");
|
||||
}
|
||||
EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) {
|
||||
m_cols = cols;
|
||||
eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size.");
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return nullptr; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return nullptr; }
|
||||
};
|
||||
|
||||
template<typename T, int Cols_, int Options_>
|
||||
class DenseStorage<T, 0, Dynamic, Cols_, Options_> {
|
||||
Index m_rows;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_rows(other.m_rows) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) {
|
||||
m_rows = other.m_rows;
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {
|
||||
eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size.");
|
||||
}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
|
||||
numext::swap(m_rows, other.m_rows);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return Cols_;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) {
|
||||
m_rows = rows;
|
||||
eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size.");
|
||||
}
|
||||
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) {
|
||||
m_rows = rows;
|
||||
eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size.");
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return nullptr; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return nullptr; }
|
||||
};
|
||||
|
||||
// dynamic-size matrix with fixed-size storage
|
||||
template<typename T, int Size, int Options_>
|
||||
class DenseStorage<T, Size, Dynamic, Dynamic, Options_>
|
||||
{
|
||||
internal::plain_array<T,Size,Options_> m_data;
|
||||
Index m_rows;
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(), m_rows(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows), m_cols(other.m_cols) {
|
||||
internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
@@ -320,113 +365,121 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
|
||||
{
|
||||
internal::plain_array_helper::swap(m_data, m_rows * m_cols, other.m_data, other.m_rows * other.m_cols);
|
||||
numext::swap(m_rows,other.m_rows);
|
||||
numext::swap(m_cols,other.m_cols);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; }
|
||||
EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; }
|
||||
EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index rows, Index cols) {
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC constexpr void resize(Index, Index rows, Index cols) {
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// dynamic-size matrix with fixed-size storage and fixed width
|
||||
template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
|
||||
template<typename T, int Size, int Cols_, int Options_>
|
||||
class DenseStorage<T, Size, Dynamic, Cols_, Options_>
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
internal::plain_array<T,Size,Options_> m_data;
|
||||
Index m_rows;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows)
|
||||
{
|
||||
internal::plain_array_helper::copy(other.m_data, m_rows * _Cols, m_data);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows) {
|
||||
internal::plain_array_helper::copy(other.m_data, m_rows * Cols_, m_data);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
m_rows = other.m_rows;
|
||||
internal::plain_array_helper::copy(other.m_data, m_rows * _Cols, m_data);
|
||||
internal::plain_array_helper::copy(other.m_data, m_rows * Cols_, m_data);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
|
||||
{
|
||||
internal::plain_array_helper::swap(m_data, m_rows * _Cols, other.m_data, other.m_rows * _Cols);
|
||||
internal::plain_array_helper::swap(m_data, m_rows * Cols_, other.m_data, other.m_rows * Cols_);
|
||||
numext::swap(m_rows, other.m_rows);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT {return _Cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
|
||||
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC constexpr Index rows(void) const EIGEN_NOEXCEPT { return m_rows; }
|
||||
EIGEN_DEVICE_FUNC constexpr Index cols(void) const EIGEN_NOEXCEPT { return Cols_; }
|
||||
EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
|
||||
EIGEN_DEVICE_FUNC constexpr void resize(Index, Index rows, Index) { m_rows = rows; }
|
||||
EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// dynamic-size matrix with fixed-size storage and fixed height
|
||||
template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
|
||||
template<typename T, int Size, int Rows_, int Options_>
|
||||
class DenseStorage<T, Size, Rows_, Dynamic, Options_>
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
internal::plain_array<T,Size,Options_> m_data;
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(other.m_cols)
|
||||
{
|
||||
internal::plain_array_helper::copy(other.m_data, _Rows * m_cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(other.m_cols) {
|
||||
internal::plain_array_helper::copy(other.m_data, Rows_ * m_cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
m_cols = other.m_cols;
|
||||
internal::plain_array_helper::copy(other.m_data, _Rows * m_cols, m_data);
|
||||
internal::plain_array_helper::copy(other.m_data, Rows_ * m_cols, m_data);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
|
||||
internal::plain_array_helper::swap(m_data, _Rows * m_cols, other.m_data, _Rows * other.m_cols);
|
||||
internal::plain_array_helper::swap(m_data, Rows_ * m_cols, other.m_data, Rows_ * other.m_cols);
|
||||
numext::swap(m_cols, other.m_cols);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) { m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC constexpr Index rows(void) const EIGEN_NOEXCEPT { return Rows_; }
|
||||
EIGEN_DEVICE_FUNC constexpr Index cols(void) const EIGEN_NOEXCEPT { return m_cols; }
|
||||
EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index cols) { m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// purely dynamic matrix.
|
||||
template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
|
||||
template<typename T, int Options_>
|
||||
class DenseStorage<T, Dynamic, Dynamic, Dynamic, Options_>
|
||||
{
|
||||
T *m_data;
|
||||
Index m_rows;
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(0), m_rows(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
|
||||
{
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
|
||||
: m_data(internal::conditional_aligned_new_auto<T, (Options_ & DontAlign) == 0>(size)),
|
||||
m_rows(rows),
|
||||
m_cols(cols) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(other.m_rows*other.m_cols))
|
||||
, m_rows(other.m_rows)
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
@@ -442,7 +495,6 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
: m_data(std::move(other.m_data))
|
||||
@@ -461,8 +513,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
numext::swap(m_cols, other.m_cols);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, m_rows*m_cols); }
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
|
||||
{
|
||||
numext::swap(m_data,other.m_data);
|
||||
@@ -473,7 +524,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
|
||||
void conservativeResize(Index size, Index rows, Index cols)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(Options_&DontAlign)==0>(m_data, size, m_rows*m_cols);
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
}
|
||||
@@ -481,9 +532,9 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
{
|
||||
if(size != m_rows*m_cols)
|
||||
{
|
||||
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
|
||||
internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, m_rows*m_cols);
|
||||
if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
@@ -496,25 +547,25 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
|
||||
};
|
||||
|
||||
// matrix with dynamic width and fixed height (so that matrix has dynamic size).
|
||||
template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
|
||||
{
|
||||
template<typename T, int Rows_, int Options_>
|
||||
class DenseStorage<T, Dynamic, Rows_, Dynamic, Options_> {
|
||||
T *m_data;
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
|
||||
{
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_cols(0) {}
|
||||
explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
|
||||
: m_data(internal::conditional_aligned_new_auto<T, (Options_ & DontAlign) == 0>(size)), m_cols(cols) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
|
||||
eigen_internal_assert(size==rows*cols && rows==Rows_ && cols >=0);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(Rows_*other.m_cols))
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows)
|
||||
internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*Rows_)
|
||||
internal::smart_copy(other.m_data, other.m_data+Rows_*m_cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
@@ -525,7 +576,6 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
: m_data(std::move(other.m_data))
|
||||
@@ -541,26 +591,25 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
numext::swap(m_cols, other.m_cols);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, Rows_*m_cols); }
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
|
||||
numext::swap(m_data,other.m_data);
|
||||
numext::swap(m_cols,other.m_cols);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; }
|
||||
EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(Options_&DontAlign)==0>(m_data, size, Rows_*m_cols);
|
||||
m_cols = cols;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
|
||||
{
|
||||
if(size != _Rows*m_cols)
|
||||
if(size != Rows_*m_cols)
|
||||
{
|
||||
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
|
||||
internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, Rows_*m_cols);
|
||||
if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
@@ -572,25 +621,26 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
|
||||
};
|
||||
|
||||
// matrix with dynamic height and fixed width (so that matrix has dynamic size).
|
||||
template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
|
||||
template<typename T, int Cols_, int Options_>
|
||||
class DenseStorage<T, Dynamic, Dynamic, Cols_, Options_>
|
||||
{
|
||||
T *m_data;
|
||||
Index m_rows;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
|
||||
{
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_rows(0) {}
|
||||
explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols)
|
||||
: m_data(internal::conditional_aligned_new_auto<T, (Options_ & DontAlign) == 0>(size)), m_rows(rows) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
|
||||
eigen_internal_assert(size==rows*cols && rows>=0 && cols == Cols_);
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(other.m_rows*Cols_))
|
||||
, m_rows(other.m_rows)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols)
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*Cols_)
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*Cols_, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
@@ -601,7 +651,6 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
: m_data(std::move(other.m_data))
|
||||
@@ -617,26 +666,25 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
|
||||
numext::swap(m_rows, other.m_rows);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, Cols_*m_rows); }
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
|
||||
numext::swap(m_data,other.m_data);
|
||||
numext::swap(m_rows,other.m_rows);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) {return _Cols;}
|
||||
EIGEN_DEVICE_FUNC static constexpr Index cols(void) { return Cols_; }
|
||||
void conservativeResize(Index size, Index rows, Index)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(Options_&DontAlign)==0>(m_data, size, m_rows*Cols_);
|
||||
m_rows = rows;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
|
||||
{
|
||||
if(size != m_rows*_Cols)
|
||||
if(size != m_rows*Cols_)
|
||||
{
|
||||
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
|
||||
internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, Cols_*m_rows);
|
||||
if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_DIAGONAL_H
|
||||
#define EIGEN_DIAGONAL_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Diagonal
|
||||
@@ -18,8 +20,8 @@ namespace Eigen {
|
||||
*
|
||||
* \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix
|
||||
*
|
||||
* \param MatrixType the type of the object in which we are taking a sub/main/super diagonal
|
||||
* \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
|
||||
* \tparam MatrixType the type of the object in which we are taking a sub/main/super diagonal
|
||||
* \tparam DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
|
||||
* A positive value means a superdiagonal, a negative value means a subdiagonal.
|
||||
* You can also use DynamicIndex so the index can be set at runtime.
|
||||
*
|
||||
@@ -38,21 +40,21 @@ struct traits<Diagonal<MatrixType,DiagIndex> >
|
||||
: traits<MatrixType>
|
||||
{
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNested_;
|
||||
typedef typename MatrixType::StorageKind StorageKind;
|
||||
enum {
|
||||
RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic
|
||||
: (EIGEN_PLAIN_ENUM_MIN(MatrixType::RowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
|
||||
MatrixType::ColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
|
||||
: (plain_enum_min(MatrixType::RowsAtCompileTime - plain_enum_max(-DiagIndex, 0),
|
||||
MatrixType::ColsAtCompileTime - plain_enum_max( DiagIndex, 0))),
|
||||
ColsAtCompileTime = 1,
|
||||
MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic
|
||||
: DiagIndex == DynamicIndex ? EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime,
|
||||
MatrixType::MaxColsAtCompileTime)
|
||||
: (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
|
||||
MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
|
||||
: DiagIndex == DynamicIndex ? min_size_prefer_fixed(MatrixType::MaxRowsAtCompileTime,
|
||||
MatrixType::MaxColsAtCompileTime)
|
||||
: (plain_enum_min(MatrixType::MaxRowsAtCompileTime - plain_enum_max(-DiagIndex, 0),
|
||||
MatrixType::MaxColsAtCompileTime - plain_enum_max( DiagIndex, 0))),
|
||||
MaxColsAtCompileTime = 1,
|
||||
MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
|
||||
Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
|
||||
Flags = (unsigned int)MatrixTypeNested_::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
|
||||
MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
|
||||
InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
|
||||
OuterStrideAtCompileTime = 0
|
||||
@@ -60,12 +62,12 @@ struct traits<Diagonal<MatrixType,DiagIndex> >
|
||||
};
|
||||
}
|
||||
|
||||
template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
: public internal::dense_xpr_base< Diagonal<MatrixType,_DiagIndex> >::type
|
||||
template<typename MatrixType, int DiagIndex_> class Diagonal
|
||||
: public internal::dense_xpr_base< Diagonal<MatrixType,DiagIndex_> >::type
|
||||
{
|
||||
public:
|
||||
|
||||
enum { DiagIndex = _DiagIndex };
|
||||
enum { DiagIndex = DiagIndex_ };
|
||||
typedef typename internal::dense_xpr_base<Diagonal>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
|
||||
|
||||
@@ -95,11 +97,11 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
inline Index outerStride() const EIGEN_NOEXCEPT { return 0; }
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<MatrixType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
typedef std::conditional_t<
|
||||
internal::is_lvalue<MatrixType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
> ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
|
||||
@@ -145,7 +147,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
inline const internal::remove_all_t<typename MatrixType::Nested>&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
@@ -191,7 +193,8 @@ MatrixBase<Derived>::diagonal()
|
||||
|
||||
/** This is the const version of diagonal(). */
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalReturnType
|
||||
EIGEN_DEVICE_FUNC inline
|
||||
const typename MatrixBase<Derived>::ConstDiagonalReturnType
|
||||
MatrixBase<Derived>::diagonal() const
|
||||
{
|
||||
return ConstDiagonalReturnType(derived());
|
||||
@@ -209,18 +212,18 @@ MatrixBase<Derived>::diagonal() const
|
||||
*
|
||||
* \sa MatrixBase::diagonal(), class Diagonal */
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
|
||||
EIGEN_DEVICE_FUNC inline Diagonal<Derived, DynamicIndex>
|
||||
MatrixBase<Derived>::diagonal(Index index)
|
||||
{
|
||||
return DiagonalDynamicIndexReturnType(derived(), index);
|
||||
return Diagonal<Derived, DynamicIndex>(derived(), index);
|
||||
}
|
||||
|
||||
/** This is the const version of diagonal(Index). */
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
|
||||
EIGEN_DEVICE_FUNC inline const Diagonal<const Derived, DynamicIndex>
|
||||
MatrixBase<Derived>::diagonal(Index index) const
|
||||
{
|
||||
return ConstDiagonalDynamicIndexReturnType(derived(), index);
|
||||
return Diagonal<const Derived, DynamicIndex>(derived(), index);
|
||||
}
|
||||
|
||||
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
|
||||
@@ -237,20 +240,20 @@ MatrixBase<Derived>::diagonal(Index index) const
|
||||
template<typename Derived>
|
||||
template<int Index_>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
|
||||
inline Diagonal<Derived, Index_>
|
||||
MatrixBase<Derived>::diagonal()
|
||||
{
|
||||
return typename DiagonalIndexReturnType<Index_>::Type(derived());
|
||||
return Diagonal<Derived, Index_>(derived());
|
||||
}
|
||||
|
||||
/** This is the const version of diagonal<int>(). */
|
||||
template<typename Derived>
|
||||
template<int Index_>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
|
||||
inline const Diagonal<const Derived, Index_>
|
||||
MatrixBase<Derived>::diagonal() const
|
||||
{
|
||||
return typename ConstDiagonalIndexReturnType<Index_>::Type(derived());
|
||||
return Diagonal<const Derived, Index_>(derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -11,9 +11,23 @@
|
||||
#ifndef EIGEN_DIAGONALMATRIX_H
|
||||
#define EIGEN_DIAGONALMATRIX_H
|
||||
|
||||
namespace Eigen {
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
namespace Eigen {
|
||||
|
||||
/** \class DiagonalBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for diagonal matrices and expressions
|
||||
*
|
||||
* This is the base class that is inherited by diagonal matrix and related expression
|
||||
* types, which internally use a vector for storing the diagonal entries. Diagonal
|
||||
* types always represent square matrices.
|
||||
*
|
||||
* \tparam Derived is the derived type, a DiagonalMatrix or DiagonalWrapper.
|
||||
*
|
||||
* \sa class DiagonalMatrix, class DiagonalWrapper
|
||||
*/
|
||||
template<typename Derived>
|
||||
class DiagonalBase : public EigenBase<Derived>
|
||||
{
|
||||
@@ -37,24 +51,35 @@ class DiagonalBase : public EigenBase<Derived>
|
||||
typedef DenseMatrixType DenseType;
|
||||
typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
|
||||
|
||||
/** \returns a reference to the derived object. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
/** \returns a const reference to the derived object. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
|
||||
/**
|
||||
* Constructs a dense matrix from \c *this. Note, this directly returns a dense matrix type,
|
||||
* not an expression.
|
||||
* \returns A dense matrix, with its diagonal entries set from the the derived object. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseMatrixType toDenseMatrix() const { return derived(); }
|
||||
|
||||
/** \returns a reference to the derived object's vector of diagonal coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
|
||||
/** \returns a const reference to the derived object's vector of diagonal coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalVectorType& diagonal() { return derived().diagonal(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
/** \returns the number of rows. */
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
inline Index rows() const { return diagonal().size(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
/** \returns the number of columns. */
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
inline Index cols() const { return diagonal().size(); }
|
||||
|
||||
/** \returns the diagonal matrix product of \c *this by the dense matrix, \a matrix */
|
||||
template<typename MatrixDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<Derived,MatrixDerived,LazyProduct>
|
||||
@@ -63,88 +88,99 @@ class DiagonalBase : public EigenBase<Derived>
|
||||
return Product<Derived, MatrixDerived, LazyProduct>(derived(),matrix.derived());
|
||||
}
|
||||
|
||||
typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> > InverseReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const InverseReturnType
|
||||
inverse() const
|
||||
{
|
||||
return InverseReturnType(diagonal().cwiseInverse());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >
|
||||
operator*(const Scalar& scalar) const
|
||||
{
|
||||
return DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >(diagonal() * scalar);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
friend inline const DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >
|
||||
operator*(const Scalar& scalar, const DiagonalBase& other)
|
||||
{
|
||||
return DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >(scalar * other.diagonal());
|
||||
template <typename OtherDerived>
|
||||
using DiagonalProductReturnType = DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
|
||||
DiagonalVectorType, typename OtherDerived::DiagonalVectorType, product)>;
|
||||
|
||||
/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a other */
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC const DiagonalProductReturnType<OtherDerived> operator*(
|
||||
const DiagonalBase<OtherDerived>& other) const {
|
||||
return diagonal().cwiseProduct(other.diagonal()).asDiagonal();
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
using DiagonalInverseReturnType =
|
||||
DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType>>;
|
||||
|
||||
/** \returns the inverse \c *this. Computed as the coefficient-wise inverse of the diagonal. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
inline unspecified_expression_type
|
||||
#else
|
||||
inline const DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(DiagonalVectorType,typename OtherDerived::DiagonalVectorType,sum) >
|
||||
#endif
|
||||
operator+(const DiagonalBase<OtherDerived>& other) const
|
||||
{
|
||||
inline const DiagonalInverseReturnType inverse() const { return diagonal().cwiseInverse().asDiagonal(); }
|
||||
|
||||
using DiagonalScaleReturnType =
|
||||
DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType, Scalar, product)>;
|
||||
|
||||
/** \returns the product of \c *this by the scalar \a scalar */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalScaleReturnType operator*(const Scalar& scalar) const {
|
||||
return (diagonal() * scalar).asDiagonal();
|
||||
}
|
||||
|
||||
using ScaleDiagonalReturnType =
|
||||
DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar, DiagonalVectorType, product)>;
|
||||
|
||||
/** \returns the product of a scalar and the diagonal matrix \a other */
|
||||
EIGEN_DEVICE_FUNC
|
||||
friend inline const ScaleDiagonalReturnType operator*(const Scalar& scalar, const DiagonalBase& other) {
|
||||
return (scalar * other.diagonal()).asDiagonal();
|
||||
}
|
||||
|
||||
template <typename OtherDerived>
|
||||
using DiagonalSumReturnType = DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
|
||||
DiagonalVectorType, typename OtherDerived::DiagonalVectorType, sum)>;
|
||||
|
||||
/** \returns the sum of \c *this and the diagonal matrix \a other */
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC inline const DiagonalSumReturnType<OtherDerived> operator+(
|
||||
const DiagonalBase<OtherDerived>& other) const {
|
||||
return (diagonal() + other.diagonal()).asDiagonal();
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
inline unspecified_expression_type
|
||||
#else
|
||||
inline const DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(DiagonalVectorType,typename OtherDerived::DiagonalVectorType,difference) >
|
||||
#endif
|
||||
operator-(const DiagonalBase<OtherDerived>& other) const
|
||||
{
|
||||
template <typename OtherDerived>
|
||||
using DiagonalDifferenceReturnType = DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
|
||||
DiagonalVectorType, typename OtherDerived::DiagonalVectorType, difference)>;
|
||||
|
||||
/** \returns the difference of \c *this and the diagonal matrix \a other */
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC inline const DiagonalDifferenceReturnType<OtherDerived> operator-(
|
||||
const DiagonalBase<OtherDerived>& other) const {
|
||||
return (diagonal() - other.diagonal()).asDiagonal();
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/** \class DiagonalMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a diagonal matrix with its storage
|
||||
*
|
||||
* \param _Scalar the type of coefficients
|
||||
* \param SizeAtCompileTime the dimension of the matrix, or Dynamic
|
||||
* \param MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults
|
||||
* to SizeAtCompileTime. Most of the time, you do not need to specify it.
|
||||
*
|
||||
* \sa class DiagonalWrapper
|
||||
*/
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a diagonal matrix with its storage
|
||||
*
|
||||
* \tparam Scalar_ the type of coefficients
|
||||
* \tparam SizeAtCompileTime the dimension of the matrix, or Dynamic
|
||||
* \tparam MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults
|
||||
* to SizeAtCompileTime. Most of the time, you do not need to specify it.
|
||||
*
|
||||
* \sa class DiagonalBase, class DiagonalWrapper
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
|
||||
struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
: traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
template<typename Scalar_, int SizeAtCompileTime, int MaxSizeAtCompileTime>
|
||||
struct traits<DiagonalMatrix<Scalar_,SizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
: traits<Matrix<Scalar_,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
|
||||
typedef Matrix<Scalar_,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
|
||||
typedef DiagonalShape StorageKind;
|
||||
enum {
|
||||
Flags = LvalueBit | NoPreferredStorageOrderBit
|
||||
Flags = LvalueBit | NoPreferredStorageOrderBit | NestByRefBit
|
||||
};
|
||||
};
|
||||
}
|
||||
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
|
||||
template<typename Scalar_, int SizeAtCompileTime, int MaxSizeAtCompileTime>
|
||||
class DiagonalMatrix
|
||||
: public DiagonalBase<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
: public DiagonalBase<DiagonalMatrix<Scalar_,SizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename internal::traits<DiagonalMatrix>::DiagonalVectorType DiagonalVectorType;
|
||||
typedef const DiagonalMatrix& Nested;
|
||||
typedef _Scalar Scalar;
|
||||
typedef Scalar_ Scalar;
|
||||
typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<DiagonalMatrix>::StorageIndex StorageIndex;
|
||||
#endif
|
||||
@@ -178,10 +214,7 @@ class DiagonalMatrix
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
/** \brief Construct a diagonal matrix with fixed size from an arbitrary number of coefficients. \cpp11
|
||||
*
|
||||
* There exists C++98 anologue constructors for fixed-size diagonal matrices having 2 or 3 coefficients.
|
||||
/** \brief Construct a diagonal matrix with fixed size from an arbitrary number of coefficients.
|
||||
*
|
||||
* \warning To construct a diagonal matrix of fixed size, the number of values passed to this
|
||||
* constructor must match the fixed dimension of \c *this.
|
||||
@@ -200,7 +233,10 @@ class DiagonalMatrix
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit EIGEN_STRONG_INLINE DiagonalMatrix(const std::initializer_list<std::initializer_list<Scalar>>& list)
|
||||
: m_diagonal(list) {}
|
||||
#endif // EIGEN_HAS_CXX11
|
||||
|
||||
/** \brief Constructs a DiagonalMatrix from an r-value diagonal vector type */
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline DiagonalMatrix(DiagonalVectorType&& diag) : m_diagonal(std::move(diag)) {}
|
||||
|
||||
/** Copy constructor. */
|
||||
template<typename OtherDerived>
|
||||
@@ -239,6 +275,22 @@ class DiagonalMatrix
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef DiagonalWrapper<const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, DiagonalVectorType>>
|
||||
InitializeReturnType;
|
||||
|
||||
/** Initializes a diagonal matrix of size SizeAtCompileTime with coefficients set to zero */
|
||||
EIGEN_DEVICE_FUNC
|
||||
static const InitializeReturnType Zero() { return DiagonalVectorType::Zero().asDiagonal(); }
|
||||
/** Initializes a diagonal matrix of size dim with coefficients set to zero */
|
||||
EIGEN_DEVICE_FUNC
|
||||
static const InitializeReturnType Zero(Index size) { return DiagonalVectorType::Zero(size).asDiagonal(); }
|
||||
/** Initializes a identity matrix of size SizeAtCompileTime */
|
||||
EIGEN_DEVICE_FUNC
|
||||
static const InitializeReturnType Identity() { return DiagonalVectorType::Ones().asDiagonal(); }
|
||||
/** Initializes a identity matrix of size dim */
|
||||
EIGEN_DEVICE_FUNC
|
||||
static const InitializeReturnType Identity(Index size) { return DiagonalVectorType::Ones(size).asDiagonal(); }
|
||||
|
||||
/** Resizes to given size. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(Index size) { m_diagonal.resize(size); }
|
||||
@@ -261,7 +313,7 @@ class DiagonalMatrix
|
||||
*
|
||||
* \brief Expression of a diagonal matrix
|
||||
*
|
||||
* \param _DiagonalVectorType the type of the vector of diagonal coefficients
|
||||
* \tparam DiagonalVectorType_ the type of the vector of diagonal coefficients
|
||||
*
|
||||
* This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients,
|
||||
* instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal()
|
||||
@@ -271,10 +323,10 @@ class DiagonalMatrix
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename _DiagonalVectorType>
|
||||
struct traits<DiagonalWrapper<_DiagonalVectorType> >
|
||||
template<typename DiagonalVectorType_>
|
||||
struct traits<DiagonalWrapper<DiagonalVectorType_> >
|
||||
{
|
||||
typedef _DiagonalVectorType DiagonalVectorType;
|
||||
typedef DiagonalVectorType_ DiagonalVectorType;
|
||||
typedef typename DiagonalVectorType::Scalar Scalar;
|
||||
typedef typename DiagonalVectorType::StorageIndex StorageIndex;
|
||||
typedef DiagonalShape StorageKind;
|
||||
@@ -289,13 +341,13 @@ struct traits<DiagonalWrapper<_DiagonalVectorType> >
|
||||
};
|
||||
}
|
||||
|
||||
template<typename _DiagonalVectorType>
|
||||
template<typename DiagonalVectorType_>
|
||||
class DiagonalWrapper
|
||||
: public DiagonalBase<DiagonalWrapper<_DiagonalVectorType> >, internal::no_assignment_operator
|
||||
: public DiagonalBase<DiagonalWrapper<DiagonalVectorType_> >, internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef _DiagonalVectorType DiagonalVectorType;
|
||||
typedef DiagonalVectorType_ DiagonalVectorType;
|
||||
typedef DiagonalWrapper Nested;
|
||||
#endif
|
||||
|
||||
@@ -386,6 +438,6 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_DIAGONALMATRIX_H
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_DIAGONALPRODUCT_H
|
||||
#define EIGEN_DIAGONALPRODUCT_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_DOT_H
|
||||
#define EIGEN_DOT_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -18,14 +20,9 @@ namespace internal {
|
||||
// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
|
||||
// looking at the static assertions. Thus this is a trick to get better compile errors.
|
||||
template<typename T, typename U,
|
||||
// the NeedToTranspose condition here is taken straight from Assign.h
|
||||
bool NeedToTranspose = T::IsVectorAtCompileTime
|
||||
&& U::IsVectorAtCompileTime
|
||||
&& ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1)
|
||||
| // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
|
||||
// revert to || as soon as not needed anymore.
|
||||
(int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))
|
||||
>
|
||||
bool NeedToTranspose = T::IsVectorAtCompileTime && U::IsVectorAtCompileTime &&
|
||||
((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1) ||
|
||||
(int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))>
|
||||
struct dot_nocheck
|
||||
{
|
||||
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
|
||||
@@ -123,8 +120,8 @@ template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
|
||||
MatrixBase<Derived>::normalized() const
|
||||
{
|
||||
typedef typename internal::nested_eval<Derived,2>::type _Nested;
|
||||
_Nested n(derived());
|
||||
typedef typename internal::nested_eval<Derived,2>::type Nested_;
|
||||
Nested_ n(derived());
|
||||
RealScalar z = n.squaredNorm();
|
||||
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
|
||||
if(z>RealScalar(0))
|
||||
@@ -166,8 +163,8 @@ template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
|
||||
MatrixBase<Derived>::stableNormalized() const
|
||||
{
|
||||
typedef typename internal::nested_eval<Derived,3>::type _Nested;
|
||||
_Nested n(derived());
|
||||
typedef typename internal::nested_eval<Derived,3>::type Nested_;
|
||||
Nested_ n(derived());
|
||||
RealScalar w = n.cwiseAbs().maxCoeff();
|
||||
RealScalar z = (n/w).squaredNorm();
|
||||
if(z>RealScalar(0))
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_EIGENBASE_H
|
||||
#define EIGEN_EIGENBASE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class EigenBase
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_FORCEALIGNEDACCESS_H
|
||||
#define EIGEN_FORCEALIGNEDACCESS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class ForceAlignedAccess
|
||||
@@ -128,7 +130,7 @@ MatrixBase<Derived>::forceAlignedAccess()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<bool Enable>
|
||||
inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type
|
||||
inline add_const_on_value_type_t<std::conditional_t<Enable,ForceAlignedAccess<Derived>,Derived&>>
|
||||
MatrixBase<Derived>::forceAlignedAccessIf() const
|
||||
{
|
||||
return derived(); // FIXME This should not work but apparently is never used
|
||||
@@ -139,7 +141,7 @@ MatrixBase<Derived>::forceAlignedAccessIf() const
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<bool Enable>
|
||||
inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type
|
||||
inline std::conditional_t<Enable,ForceAlignedAccess<Derived>,Derived&>
|
||||
MatrixBase<Derived>::forceAlignedAccessIf()
|
||||
{
|
||||
return derived(); // FIXME This should not work but apparently is never used
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_FUZZY_H
|
||||
#define EIGEN_FUZZY_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_GENERAL_PRODUCT_H
|
||||
#define EIGEN_GENERAL_PRODUCT_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
enum {
|
||||
@@ -50,17 +52,17 @@ template<int Size, int MaxSize> struct product_size_category
|
||||
|
||||
template<typename Lhs, typename Rhs> struct product_type
|
||||
{
|
||||
typedef typename remove_all<Lhs>::type _Lhs;
|
||||
typedef typename remove_all<Rhs>::type _Rhs;
|
||||
typedef remove_all_t<Lhs> Lhs_;
|
||||
typedef remove_all_t<Rhs> Rhs_;
|
||||
enum {
|
||||
MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
|
||||
Rows = traits<_Lhs>::RowsAtCompileTime,
|
||||
MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
|
||||
Cols = traits<_Rhs>::ColsAtCompileTime,
|
||||
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
|
||||
traits<_Rhs>::MaxRowsAtCompileTime),
|
||||
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
|
||||
traits<_Rhs>::RowsAtCompileTime)
|
||||
MaxRows = traits<Lhs_>::MaxRowsAtCompileTime,
|
||||
Rows = traits<Lhs_>::RowsAtCompileTime,
|
||||
MaxCols = traits<Rhs_>::MaxColsAtCompileTime,
|
||||
Cols = traits<Rhs_>::ColsAtCompileTime,
|
||||
MaxDepth = min_size_prefer_fixed(traits<Lhs_>::MaxColsAtCompileTime,
|
||||
traits<Rhs_>::MaxRowsAtCompileTime),
|
||||
Depth = min_size_prefer_fixed(traits<Lhs_>::ColsAtCompileTime,
|
||||
traits<Rhs_>::RowsAtCompileTime)
|
||||
};
|
||||
|
||||
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
|
||||
@@ -180,12 +182,13 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
|
||||
PacketSize = internal::packet_traits<Scalar>::size
|
||||
};
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
|
||||
internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0,
|
||||
internal::plain_enum_min(AlignedMax, PacketSize)> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
|
||||
#else
|
||||
// Some architectures cannot align on the stack,
|
||||
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
|
||||
internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() {
|
||||
return ForceAlignment
|
||||
? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
|
||||
@@ -216,14 +219,13 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
typedef typename Dest::Scalar ResScalar;
|
||||
typedef typename Dest::RealScalar RealScalar;
|
||||
|
||||
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
||||
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
|
||||
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
||||
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
||||
|
||||
typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
|
||||
typedef Map<Matrix<ResScalar,Dynamic,1>, plain_enum_min(AlignedMax, internal::packet_traits<ResScalar>::size)> MappedDest;
|
||||
|
||||
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
|
||||
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
|
||||
@@ -231,7 +233,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
||||
ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
|
||||
|
||||
// make sure Dest is a compile-time vector type (bug 1166)
|
||||
typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
|
||||
typedef std::conditional_t<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr> ActualDest;
|
||||
|
||||
enum {
|
||||
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
||||
@@ -261,7 +263,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
||||
{
|
||||
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::is_exactly_zero(numext::imag(actualAlpha)));
|
||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
@@ -314,10 +316,10 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
|
||||
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
|
||||
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
||||
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
||||
typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
|
||||
typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
|
||||
|
||||
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
|
||||
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
|
||||
std::add_const_t<ActualLhsType> actualLhs = LhsBlasTraits::extract(lhs);
|
||||
std::add_const_t<ActualRhsType> actualRhs = RhsBlasTraits::extract(rhs);
|
||||
|
||||
ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_GENERIC_PACKET_MATH_H
|
||||
#define EIGEN_GENERIC_PACKET_MATH_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -57,12 +59,14 @@ struct default_packet_traits
|
||||
HasMax = 1,
|
||||
HasConj = 1,
|
||||
HasSetLinear = 1,
|
||||
HasSign = 1,
|
||||
HasBlend = 0,
|
||||
// This flag is used to indicate whether packet comparison is supported.
|
||||
// pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
|
||||
HasCmp = 0,
|
||||
|
||||
HasDiv = 0,
|
||||
HasReciprocal = 0,
|
||||
HasSqrt = 0,
|
||||
HasRsqrt = 0,
|
||||
HasExp = 0,
|
||||
@@ -98,8 +102,7 @@ struct default_packet_traits
|
||||
HasRound = 0,
|
||||
HasRint = 0,
|
||||
HasFloor = 0,
|
||||
HasCeil = 0,
|
||||
HasSign = 0
|
||||
HasCeil = 0
|
||||
};
|
||||
};
|
||||
|
||||
@@ -160,7 +163,7 @@ struct eigen_packet_wrapper
|
||||
{
|
||||
EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
|
||||
EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
|
||||
m_val = v;
|
||||
@@ -176,7 +179,7 @@ struct eigen_packet_wrapper
|
||||
*/
|
||||
template<typename Packet>
|
||||
struct is_scalar {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
using Scalar = typename unpacket_traits<Packet>::type;
|
||||
enum {
|
||||
value = internal::is_same<Packet, Scalar>::value
|
||||
};
|
||||
@@ -217,6 +220,15 @@ padd(const Packet& a, const Packet& b) { return a+b; }
|
||||
template<> EIGEN_DEVICE_FUNC inline bool
|
||||
padd(const bool& a, const bool& b) { return a || b; }
|
||||
|
||||
/** \internal \returns a packet version of \a *from, (un-aligned masked add)
|
||||
* There is no generic implementation. We only have implementations for specialized
|
||||
* cases. Generic case should not be called.
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline
|
||||
std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet>
|
||||
padd(const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
|
||||
|
||||
|
||||
/** \internal \returns a - b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
psub(const Packet& a, const Packet& b) { return a-b; }
|
||||
@@ -259,7 +271,7 @@ struct ptrue_impl {
|
||||
// have another option, since the scalar type requires initialization.
|
||||
template<typename T>
|
||||
struct ptrue_impl<T,
|
||||
typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
|
||||
std::enable_if_t<is_scalar<T>::value && NumTraits<T>::RequireInitialization> > {
|
||||
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
|
||||
return T(1);
|
||||
}
|
||||
@@ -285,7 +297,7 @@ struct pzero_impl {
|
||||
// for zero may not consist of all-zero bits.
|
||||
template<typename T>
|
||||
struct pzero_impl<T,
|
||||
typename internal::enable_if<is_scalar<T>::value>::type> {
|
||||
std::enable_if_t<is_scalar<T>::value>> {
|
||||
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
|
||||
return T(0);
|
||||
}
|
||||
@@ -356,16 +368,16 @@ struct bytewise_bitwise_helper {
|
||||
EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
|
||||
return binary(a, b, bit_and<unsigned char>());
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
|
||||
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
|
||||
return binary(a, b, bit_or<unsigned char>());
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
|
||||
return binary(a, b, bit_xor<unsigned char>());
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
|
||||
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
|
||||
return unary(a,bit_not<unsigned char>());
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
template<typename Op>
|
||||
EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
|
||||
@@ -398,8 +410,8 @@ struct bitwise_helper : public bytewise_bitwise_helper<T> {};
|
||||
// For integers or non-trivial scalars, use binary operators.
|
||||
template<typename T>
|
||||
struct bitwise_helper<T,
|
||||
typename internal::enable_if<
|
||||
is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
|
||||
typename std::enable_if_t<
|
||||
is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>
|
||||
> : public operator_bitwise_helper<T> {};
|
||||
|
||||
/** \internal \returns the bitwise and of \a a and \a b */
|
||||
@@ -441,7 +453,7 @@ struct pselect_impl {
|
||||
// For scalars, use ternary select.
|
||||
template<typename Packet>
|
||||
struct pselect_impl<Packet,
|
||||
typename internal::enable_if<is_scalar<Packet>::value>::type > {
|
||||
std::enable_if_t<is_scalar<Packet>::value> > {
|
||||
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
|
||||
return numext::equal_strict(mask, Packet(0)) ? b : a;
|
||||
}
|
||||
@@ -551,13 +563,13 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
parg(const Packet& a) { using numext::arg; return arg(a); }
|
||||
|
||||
|
||||
/** \internal \returns \a a logically shifted by N bits to the right */
|
||||
/** \internal \returns \a a arithmetically shifted by N bits to the right */
|
||||
template<int N> EIGEN_DEVICE_FUNC inline int
|
||||
parithmetic_shift_right(const int& a) { return a >> N; }
|
||||
template<int N> EIGEN_DEVICE_FUNC inline long int
|
||||
parithmetic_shift_right(const long int& a) { return a >> N; }
|
||||
|
||||
/** \internal \returns \a a arithmetically shifted by N bits to the right */
|
||||
/** \internal \returns \a a logically shifted by N bits to the right */
|
||||
template<int N> EIGEN_DEVICE_FUNC inline int
|
||||
plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
|
||||
template<int N> EIGEN_DEVICE_FUNC inline long int
|
||||
@@ -594,20 +606,52 @@ pldexp(const Packet &a, const Packet &exponent) {
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }
|
||||
|
||||
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
|
||||
/** \internal \returns a packet version of \a *from, from must be properly aligned */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns n elements of a packet version of \a *from, from must be properly aligned
|
||||
* offset indicates the starting element in which to load and
|
||||
* offset + n <= unpacket_traits::size
|
||||
* All elements before offset and after the last element loaded will initialized with zero */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n, const Index offset = 0)
|
||||
{
|
||||
const Index packet_size = unpacket_traits<Packet>::size;
|
||||
eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
EIGEN_ALIGN_MAX Scalar elements[packet_size] = { Scalar(0) };
|
||||
for (Index i = offset; i < numext::mini(n+offset,packet_size); i++) {
|
||||
elements[i] = from[i-offset];
|
||||
}
|
||||
return pload<Packet>(elements);
|
||||
}
|
||||
|
||||
/** \internal \returns a packet version of \a *from, (un-aligned load) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns n elements of a packet version of \a *from, (un-aligned load)
|
||||
* All elements after the last element loaded will initialized with zero */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n)
|
||||
{
|
||||
const Index packet_size = unpacket_traits<Packet>::size;
|
||||
eigen_assert(n <= packet_size && "number of elements will read past end of packet");
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
EIGEN_ALIGN_MAX Scalar elements[packet_size] = { Scalar(0) };
|
||||
for (Index i = 0; i < numext::mini(n,packet_size); i++) {
|
||||
elements[i] = from[i];
|
||||
}
|
||||
return pload<Packet>(elements);
|
||||
}
|
||||
|
||||
/** \internal \returns a packet version of \a *from, (un-aligned masked load)
|
||||
* There is no generic implementation. We only have implementations for specialized
|
||||
* cases. Generic case should not be called.
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline
|
||||
typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type
|
||||
std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet>
|
||||
ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
|
||||
|
||||
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
|
||||
@@ -692,28 +736,74 @@ peven_mask(const Packet& /*a*/) {
|
||||
}
|
||||
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
|
||||
/** \internal copy the packet \a from to \a *to, \a to must be properly aligned */
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
|
||||
{ (*to) = from; }
|
||||
|
||||
/** \internal copy n elements of the packet \a from to \a *to, \a to must be properly aligned
|
||||
* offset indicates the starting element in which to store and
|
||||
* offset + n <= unpacket_traits::size */
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0)
|
||||
{
|
||||
const Index packet_size = unpacket_traits<Packet>::size;
|
||||
eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
|
||||
EIGEN_ALIGN_MAX Scalar elements[packet_size];
|
||||
pstore<Scalar>(elements, from);
|
||||
for (Index i = 0; i < numext::mini(n,packet_size-offset); i++) {
|
||||
to[i] = elements[i + offset];
|
||||
}
|
||||
}
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, (un-aligned store) */
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
|
||||
{ (*to) = from; }
|
||||
|
||||
/** \internal copy n elements of the packet \a from to \a *to, (un-aligned store) */
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n)
|
||||
{
|
||||
const Index packet_size = unpacket_traits<Packet>::size;
|
||||
eigen_assert(n <= packet_size && "number of elements will write past end of packet");
|
||||
EIGEN_ALIGN_MAX Scalar elements[packet_size];
|
||||
pstore<Scalar>(elements, from);
|
||||
for (Index i = 0; i < numext::mini(n,packet_size); i++) {
|
||||
to[i] = elements[i];
|
||||
}
|
||||
}
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
|
||||
* There is no generic implementation. We only have implementations for specialized
|
||||
* cases. Generic case should not be called.
|
||||
*/
|
||||
template<typename Scalar, typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline
|
||||
typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type
|
||||
std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void>
|
||||
pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
|
||||
{ return ploadu<Packet>(from); }
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
|
||||
{ return ploadu<Packet>(from); }
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
|
||||
{ pstore(to, from); }
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n)
|
||||
{
|
||||
const Index packet_size = unpacket_traits<Packet>::size;
|
||||
EIGEN_ALIGN_MAX Scalar elements[packet_size] = { Scalar(0) };
|
||||
for (Index i = 0; i < numext::mini(n,packet_size); i++) {
|
||||
elements[i] = from[i*stride];
|
||||
}
|
||||
return pload<Packet>(elements);
|
||||
}
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
|
||||
{ pstore(to, from); }
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n)
|
||||
{
|
||||
const Index packet_size = unpacket_traits<Packet>::size;
|
||||
EIGEN_ALIGN_MAX Scalar elements[packet_size];
|
||||
pstore<Scalar>(elements, from);
|
||||
for (Index i = 0; i < numext::mini(n,packet_size); i++) {
|
||||
to[i*stride] = elements[i];
|
||||
}
|
||||
}
|
||||
|
||||
/** \internal tries to do cache prefetching of \a addr */
|
||||
template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
|
||||
@@ -807,20 +897,13 @@ Packet plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet plog2(const Packet& a) {
|
||||
typedef typename internal::unpacket_traits<Packet>::type Scalar;
|
||||
return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
|
||||
return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
|
||||
}
|
||||
|
||||
/** \internal \returns the square-root of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet psqrt(const Packet& a) { return numext::sqrt(a); }
|
||||
|
||||
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet prsqrt(const Packet& a) {
|
||||
typedef typename internal::unpacket_traits<Packet>::type Scalar;
|
||||
return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
|
||||
}
|
||||
|
||||
/** \internal \returns the rounded value of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pround(const Packet& a) { using numext::round; return round(a); }
|
||||
@@ -838,6 +921,24 @@ Packet print(const Packet& a) { using numext::rint; return rint(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
|
||||
|
||||
template<typename Packet, typename EnableIf = void>
|
||||
struct psign_impl {
|
||||
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
|
||||
return numext::sign(a);
|
||||
}
|
||||
};
|
||||
|
||||
/** \internal \returns the sign of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
psign(const Packet& a) {
|
||||
return psign_impl<Packet>::run(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline bool
|
||||
psign(const bool& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
/** \internal \returns the first element of a packet */
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
|
||||
@@ -849,7 +950,7 @@ pfirst(const Packet& a)
|
||||
* For packet-size smaller or equal to 4, this boils down to a noop.
|
||||
*/
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
|
||||
EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>
|
||||
predux_half_dowto4(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
@@ -881,7 +982,7 @@ predux(const Packet& a)
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
|
||||
const Packet& a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
|
||||
}
|
||||
|
||||
@@ -889,14 +990,14 @@ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
|
||||
const Packet &a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
|
||||
}
|
||||
|
||||
template <int NaNPropagation, typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
|
||||
const Packet& a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
|
||||
}
|
||||
|
||||
@@ -904,14 +1005,14 @@ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
|
||||
const Packet &a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
|
||||
}
|
||||
|
||||
template <int NaNPropagation, typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
|
||||
const Packet& a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
|
||||
}
|
||||
|
||||
@@ -943,6 +1044,35 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet&
|
||||
* The following functions might not have to be overwritten for vectorized types
|
||||
***************************************************************************/
|
||||
|
||||
// FMA instructions.
|
||||
/** \internal \returns a * b + c (coeff-wise) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b,
|
||||
const Packet& c) {
|
||||
return padd(pmul(a, b), c);
|
||||
}
|
||||
|
||||
/** \internal \returns a * b - c (coeff-wise) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b,
|
||||
const Packet& c) {
|
||||
return psub(pmul(a, b), c);
|
||||
}
|
||||
|
||||
/** \internal \returns -(a * b) + c (coeff-wise) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b,
|
||||
const Packet& c) {
|
||||
return padd(pnegate(pmul(a, b)), c);
|
||||
}
|
||||
|
||||
/** \internal \returns -(a * b) - c (coeff-wise) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b,
|
||||
const Packet& c) {
|
||||
return psub(pnegate(pmul(a, b)), c);
|
||||
}
|
||||
|
||||
/** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
|
||||
// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
|
||||
template<typename Packet>
|
||||
@@ -951,13 +1081,6 @@ inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename u
|
||||
pstore(to, pset1<Packet>(a));
|
||||
}
|
||||
|
||||
/** \internal \returns a * b + c (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pmadd(const Packet& a,
|
||||
const Packet& b,
|
||||
const Packet& c)
|
||||
{ return padd(pmul(a, b),c); }
|
||||
|
||||
/** \internal \returns a packet version of \a *from.
|
||||
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
||||
template<typename Packet, int Alignment>
|
||||
@@ -969,6 +1092,17 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_trai
|
||||
return ploadu<Packet>(from);
|
||||
}
|
||||
|
||||
/** \internal \returns n elements of a packet version of \a *from.
|
||||
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
||||
template<typename Packet, int Alignment>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from, const Index n, const Index offset = 0)
|
||||
{
|
||||
if(Alignment >= unpacket_traits<Packet>::alignment)
|
||||
return pload_partial<Packet>(from, n, offset);
|
||||
else
|
||||
return ploadu_partial<Packet>(from, n);
|
||||
}
|
||||
|
||||
/** \internal copy the packet \a from to \a *to.
|
||||
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
||||
template<typename Scalar, typename Packet, int Alignment>
|
||||
@@ -980,6 +1114,17 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& fro
|
||||
pstoreu(to, from);
|
||||
}
|
||||
|
||||
/** \internal copy n elements of the packet \a from to \a *to.
|
||||
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
||||
template<typename Scalar, typename Packet, int Alignment>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0)
|
||||
{
|
||||
if(Alignment >= unpacket_traits<Packet>::alignment)
|
||||
pstore_partial(to, from, n, offset);
|
||||
else
|
||||
pstoreu_partial(to, from, n);
|
||||
}
|
||||
|
||||
/** \internal \returns a packet version of \a *from.
|
||||
* Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
|
||||
* hardware if available to speedup the loading of data that won't be modified
|
||||
@@ -1033,6 +1178,47 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
|
||||
return ifPacket.select[0] ? thenPacket : elsePacket;
|
||||
}
|
||||
|
||||
/** \internal \returns 1 / a (coeff-wise) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
|
||||
using Scalar = typename unpacket_traits<Packet>::type;
|
||||
return pdiv(pset1<Packet>(Scalar(1)), a);
|
||||
}
|
||||
|
||||
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet prsqrt(const Packet& a) {
|
||||
return preciprocal<Packet>(psqrt(a));
|
||||
}
|
||||
|
||||
template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
|
||||
bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
|
||||
struct psignbit_impl;
|
||||
template <typename Packet, bool IsInteger>
|
||||
struct psignbit_impl<Packet, true, IsInteger> {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
|
||||
};
|
||||
template <typename Packet>
|
||||
struct psignbit_impl<Packet, false, false> {
|
||||
// generic implementation if not specialized in PacketMath.h
|
||||
// slower than arithmetic shift
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
|
||||
const Packet cst_pos_one = pset1<Packet>(Scalar(1));
|
||||
const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
|
||||
return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
|
||||
}
|
||||
};
|
||||
template <typename Packet>
|
||||
struct psignbit_impl<Packet, false, true> {
|
||||
// generic implementation for integer packets
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
|
||||
};
|
||||
/** \internal \returns the sign bit of \a a as a bitmask*/
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet
|
||||
psignbit(const Packet& a) { return psignbit_impl<Packet>::run(a); }
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -51,6 +51,8 @@
|
||||
} \
|
||||
};
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen
|
||||
{
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real)
|
||||
@@ -66,11 +68,9 @@ namespace Eigen
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asinh,scalar_asinh_op,inverse hyperbolic sine,\sa ArrayBase::asinh)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acosh,scalar_acosh_op,inverse hyperbolic cosine,\sa ArrayBase::acosh)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atanh,scalar_atanh_op,inverse hyperbolic tangent,\sa ArrayBase::atanh)
|
||||
#endif
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(logistic,scalar_logistic_op,logistic function,\sa ArrayBase::logistic)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
|
||||
@@ -99,31 +99,31 @@ namespace Eigen
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign)
|
||||
|
||||
template <typename Derived, typename ScalarExponent>
|
||||
using GlobalUnaryPowReturnType = std::enable_if_t<
|
||||
!internal::is_arithmetic<typename NumTraits<Derived>::Real>::value &&
|
||||
internal::is_arithmetic<typename NumTraits<ScalarExponent>::Real>::value,
|
||||
CwiseUnaryOp<internal::scalar_unary_pow_op<typename Derived::Scalar, ScalarExponent>, const Derived> >;
|
||||
|
||||
/** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent.
|
||||
*
|
||||
* \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar).
|
||||
*
|
||||
* \sa ArrayBase::pow()
|
||||
*
|
||||
* \relates ArrayBase
|
||||
*/
|
||||
*
|
||||
* \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given
|
||||
* expression (\c Derived::Scalar).
|
||||
*
|
||||
* \sa ArrayBase::pow()
|
||||
*
|
||||
* \relates ArrayBase
|
||||
*/
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Derived,typename ScalarExponent>
|
||||
inline const CwiseBinaryOp<internal::scalar_pow_op<Derived::Scalar,ScalarExponent>,Derived,Constant<ScalarExponent> >
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
|
||||
template <typename Derived, typename ScalarExponent>
|
||||
EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType<Derived, ScalarExponent> pow(
|
||||
const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
|
||||
#else
|
||||
template <typename Derived,typename ScalarExponent>
|
||||
EIGEN_DEVICE_FUNC inline
|
||||
EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(
|
||||
const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<typename Derived::Scalar
|
||||
EIGEN_COMMA ScalarExponent EIGEN_COMMA
|
||||
EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent)>::type,pow))
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent)
|
||||
{
|
||||
typedef typename internal::promote_scalar_arg<typename Derived::Scalar,ScalarExponent,
|
||||
EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent)>::type PromotedExponent;
|
||||
return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedExponent,pow)(x.derived(),
|
||||
typename internal::plain_constant_type<Derived,PromotedExponent>::type(x.derived().rows(), x.derived().cols(), internal::scalar_constant_op<PromotedExponent>(exponent)));
|
||||
template <typename Derived, typename ScalarExponent>
|
||||
EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType<Derived, ScalarExponent> pow(
|
||||
const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent) {
|
||||
return GlobalUnaryPowReturnType<Derived, ScalarExponent>(
|
||||
x.derived(), internal::scalar_unary_pow_op<typename Derived::Scalar, ScalarExponent>(exponent));
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -168,10 +168,9 @@ namespace Eigen
|
||||
#else
|
||||
template <typename Scalar, typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline
|
||||
EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(
|
||||
const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<typename Derived::Scalar
|
||||
EIGEN_COMMA Scalar EIGEN_COMMA
|
||||
EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar)>::type,Derived,pow))
|
||||
EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar)>::type,Derived,pow)
|
||||
pow(const Scalar& x, const Eigen::ArrayBase<Derived>& exponents) {
|
||||
typedef typename internal::promote_scalar_arg<typename Derived::Scalar,Scalar,
|
||||
EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar)>::type PromotedScalar;
|
||||
@@ -180,6 +179,25 @@ namespace Eigen
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \returns an expression of the coefficient-wise atan2(\a x, \a y). \a x and \a y must be of the same type.
|
||||
*
|
||||
* This function computes the coefficient-wise atan2().
|
||||
*
|
||||
* \sa ArrayBase::atan2()
|
||||
*
|
||||
* \relates ArrayBase
|
||||
*/
|
||||
template <typename LhsDerived, typename RhsDerived>
|
||||
inline const std::enable_if_t<
|
||||
std::is_same<typename LhsDerived::Scalar, typename RhsDerived::Scalar>::value,
|
||||
Eigen::CwiseBinaryOp<Eigen::internal::scalar_atan2_op<typename LhsDerived::Scalar, typename RhsDerived::Scalar>, const LhsDerived, const RhsDerived>
|
||||
>
|
||||
atan2(const Eigen::ArrayBase<LhsDerived>& x, const Eigen::ArrayBase<RhsDerived>& exponents) {
|
||||
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_atan2_op<typename LhsDerived::Scalar, typename RhsDerived::Scalar>, const LhsDerived, const RhsDerived>(
|
||||
x.derived(),
|
||||
exponents.derived()
|
||||
);
|
||||
}
|
||||
|
||||
namespace internal
|
||||
{
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_IO_H
|
||||
#define EIGEN_IO_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
enum { DontAlignCols = 1 };
|
||||
@@ -131,7 +133,6 @@ template<typename Derived>
|
||||
std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)
|
||||
{
|
||||
using internal::is_same;
|
||||
using internal::conditional;
|
||||
|
||||
if(_m.size() == 0)
|
||||
{
|
||||
@@ -141,22 +142,21 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
|
||||
|
||||
typename Derived::Nested m = _m;
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename
|
||||
conditional<
|
||||
typedef std::conditional_t<
|
||||
is_same<Scalar, char>::value ||
|
||||
is_same<Scalar, unsigned char>::value ||
|
||||
is_same<Scalar, numext::int8_t>::value ||
|
||||
is_same<Scalar, numext::uint8_t>::value,
|
||||
int,
|
||||
typename conditional<
|
||||
std::conditional_t<
|
||||
is_same<Scalar, std::complex<char> >::value ||
|
||||
is_same<Scalar, std::complex<unsigned char> >::value ||
|
||||
is_same<Scalar, std::complex<numext::int8_t> >::value ||
|
||||
is_same<Scalar, std::complex<numext::uint8_t> >::value,
|
||||
std::complex<int>,
|
||||
const Scalar&
|
||||
>::type
|
||||
>::type PrintType;
|
||||
>
|
||||
> PrintType;
|
||||
|
||||
Index width = 0;
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_INDEXED_VIEW_H
|
||||
#define EIGEN_INDEXED_VIEW_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -21,8 +23,8 @@ struct traits<IndexedView<XprType, RowIndices, ColIndices> >
|
||||
enum {
|
||||
RowsAtCompileTime = int(array_size<RowIndices>::value),
|
||||
ColsAtCompileTime = int(array_size<ColIndices>::value),
|
||||
MaxRowsAtCompileTime = RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) : Dynamic,
|
||||
MaxColsAtCompileTime = ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : Dynamic,
|
||||
MaxRowsAtCompileTime = RowsAtCompileTime,
|
||||
MaxColsAtCompileTime = ColsAtCompileTime,
|
||||
|
||||
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
|
||||
IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
|
||||
@@ -40,10 +42,10 @@ struct traits<IndexedView<XprType, RowIndices, ColIndices> >
|
||||
|
||||
InnerSize = XprTypeIsRowMajor ? ColsAtCompileTime : RowsAtCompileTime,
|
||||
IsBlockAlike = InnerIncr==1 && OuterIncr==1,
|
||||
IsInnerPannel = HasSameStorageOrderAsXprType && is_same<AllRange<InnerSize>,typename conditional<XprTypeIsRowMajor,ColIndices,RowIndices>::type>::value,
|
||||
IsInnerPannel = HasSameStorageOrderAsXprType && is_same<AllRange<InnerSize>,std::conditional_t<XprTypeIsRowMajor,ColIndices,RowIndices>>::value,
|
||||
|
||||
InnerStrideAtCompileTime = InnerIncr<0 || InnerIncr==DynamicIndex || XprInnerStride==Dynamic ? Dynamic : XprInnerStride * InnerIncr,
|
||||
OuterStrideAtCompileTime = OuterIncr<0 || OuterIncr==DynamicIndex || XprOuterstride==Dynamic ? Dynamic : XprOuterstride * OuterIncr,
|
||||
InnerStrideAtCompileTime = InnerIncr<0 || InnerIncr==DynamicIndex || XprInnerStride==Dynamic || InnerIncr==UndefinedIncr ? Dynamic : XprInnerStride * InnerIncr,
|
||||
OuterStrideAtCompileTime = OuterIncr<0 || OuterIncr==DynamicIndex || XprOuterstride==Dynamic || OuterIncr==UndefinedIncr ? Dynamic : XprOuterstride * OuterIncr,
|
||||
|
||||
ReturnAsScalar = is_same<RowIndices,SingleRange>::value && is_same<ColIndices,SingleRange>::value,
|
||||
ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike,
|
||||
@@ -96,7 +98,7 @@ class IndexedViewImpl;
|
||||
* - decltype(ArrayXi::LinSpaced(...))
|
||||
* - Any view/expressions of the previous types
|
||||
* - Eigen::ArithmeticSequence
|
||||
* - Eigen::internal::AllRange (helper for Eigen::all)
|
||||
* - Eigen::internal::AllRange (helper for Eigen::placeholders::all)
|
||||
* - Eigen::internal::SingleRange (helper for single index)
|
||||
* - etc.
|
||||
*
|
||||
@@ -114,7 +116,7 @@ public:
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView)
|
||||
|
||||
typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested;
|
||||
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
||||
typedef internal::remove_all_t<XprType> NestedExpression;
|
||||
|
||||
template<typename T0, typename T1>
|
||||
IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices)
|
||||
@@ -122,17 +124,17 @@ public:
|
||||
{}
|
||||
|
||||
/** \returns number of rows */
|
||||
Index rows() const { return internal::size(m_rowIndices); }
|
||||
Index rows() const { return internal::index_list_size(m_rowIndices); }
|
||||
|
||||
/** \returns number of columns */
|
||||
Index cols() const { return internal::size(m_colIndices); }
|
||||
Index cols() const { return internal::index_list_size(m_colIndices); }
|
||||
|
||||
/** \returns the nested expression */
|
||||
const typename internal::remove_all<XprType>::type&
|
||||
const internal::remove_all_t<XprType>&
|
||||
nestedExpression() const { return m_xpr; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
typename internal::remove_reference<XprType>::type&
|
||||
std::remove_reference_t<XprType>&
|
||||
nestedExpression() { return m_xpr; }
|
||||
|
||||
/** \returns a const reference to the object storing/generating the row indices */
|
||||
@@ -189,12 +191,16 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
|
||||
&& m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
|
||||
return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
|
||||
&& m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
|
||||
return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
||||
}
|
||||
|
||||
@@ -204,6 +210,8 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
|
||||
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||
Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
|
||||
Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
|
||||
eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
|
||||
&& m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
|
||||
return m_argImpl.coeffRef( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
||||
}
|
||||
|
||||
@@ -212,6 +220,8 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
|
||||
{
|
||||
Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
|
||||
Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
|
||||
eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
|
||||
&& m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
|
||||
return m_argImpl.coeffRef( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
||||
}
|
||||
|
||||
@@ -220,6 +230,8 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
|
||||
{
|
||||
Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
|
||||
Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
|
||||
eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
|
||||
&& m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
|
||||
return m_argImpl.coeff( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
|
||||
}
|
||||
|
||||
|
||||
3
libs/eigen/Eigen/src/Core/InternalHeaderCheck.h
Normal file
3
libs/eigen/Eigen/src/Core/InternalHeaderCheck.h
Normal file
@@ -0,0 +1,3 @@
|
||||
#ifndef EIGEN_CORE_MODULE_H
|
||||
#error "Please include Eigen/Core instead of including headers inside the src directory directly."
|
||||
#endif
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_INVERSE_H
|
||||
#define EIGEN_INVERSE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename XprType,typename StorageKind> class InverseImpl;
|
||||
@@ -46,9 +48,9 @@ public:
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename internal::remove_all<XprTypeNested>::type XprTypeNestedCleaned;
|
||||
typedef internal::remove_all_t<XprTypeNested> XprTypeNestedCleaned;
|
||||
typedef typename internal::ref_selector<Inverse>::type Nested;
|
||||
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
||||
typedef internal::remove_all_t<XprType> NestedExpression;
|
||||
|
||||
explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr)
|
||||
: m_xpr(xpr)
|
||||
@@ -102,7 +104,7 @@ struct unary_evaluator<Inverse<ArgType> >
|
||||
unary_evaluator(const InverseType& inv_xpr)
|
||||
: m_result(inv_xpr.rows(), inv_xpr.cols())
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_result);
|
||||
internal::construct_at<Base>(this, m_result);
|
||||
internal::call_assignment_no_alias(m_result, inv_xpr);
|
||||
}
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_MAP_H
|
||||
#define EIGEN_MAP_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -129,7 +131,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
|
||||
{
|
||||
PlainObjectType::Base::_check_template_params();
|
||||
}
|
||||
|
||||
/** Constructor in the dynamic-size vector case.
|
||||
@@ -142,7 +143,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)
|
||||
{
|
||||
PlainObjectType::Base::_check_template_params();
|
||||
}
|
||||
|
||||
/** Constructor in the dynamic-size matrix case.
|
||||
@@ -156,7 +156,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
||||
inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
|
||||
{
|
||||
PlainObjectType::Base::_check_template_params();
|
||||
}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
|
||||
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \ingroup Core_Module
|
||||
@@ -51,11 +53,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename internal::conditional<
|
||||
bool(internal::is_lvalue<Derived>::value),
|
||||
Scalar *,
|
||||
const Scalar *>::type
|
||||
PointerType;
|
||||
typedef std::conditional_t<
|
||||
bool(internal::is_lvalue<Derived>::value),
|
||||
Scalar *,
|
||||
const Scalar *>
|
||||
PointerType;
|
||||
|
||||
using Base::derived;
|
||||
// using Base::RowsAtCompileTime;
|
||||
@@ -189,7 +191,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
|
||||
void checkSanity(std::enable_if_t<(internal::traits<T>::Alignment>0),void*> = 0) const
|
||||
{
|
||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||
// innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
|
||||
@@ -202,7 +204,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const
|
||||
void checkSanity(std::enable_if_t<internal::traits<T>::Alignment==0,void*> = 0) const
|
||||
{}
|
||||
|
||||
PointerType m_data;
|
||||
@@ -245,11 +247,11 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
|
||||
using Base::rowStride;
|
||||
using Base::colStride;
|
||||
|
||||
typedef typename internal::conditional<
|
||||
typedef std::conditional_t<
|
||||
internal::is_lvalue<Derived>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
> ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return this->m_data; }
|
||||
|
||||
@@ -17,16 +17,9 @@
|
||||
#define EIGEN_LOG2E 1.442695040888963407359924681001892137426645954152985934135449406931109219L
|
||||
#define EIGEN_LN2 0.693147180559945309417232121458176568075500134360255254120680009493393621L
|
||||
|
||||
namespace Eigen {
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
|
||||
// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.
|
||||
#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500
|
||||
long abs(long x) { return (labs(x)); }
|
||||
double abs(double x) { return (fabs(x)); }
|
||||
float abs(float x) { return (fabsf(x)); }
|
||||
long double abs(long double x) { return (fabsl(x)); }
|
||||
#endif
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -236,6 +229,63 @@ struct imag_ref_retval
|
||||
typedef typename NumTraits<Scalar>::Real & type;
|
||||
};
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of sign *
|
||||
****************************************************************************/
|
||||
template<typename Scalar, bool IsComplex = (NumTraits<Scalar>::IsComplex!=0),
|
||||
bool IsInteger = (NumTraits<Scalar>::IsInteger!=0)>
|
||||
struct sign_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& a)
|
||||
{
|
||||
return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct sign_impl<Scalar, false, false>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& a)
|
||||
{
|
||||
return (std::isnan)(a) ? a : Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar, bool IsInteger>
|
||||
struct sign_impl<Scalar, true, IsInteger>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& a)
|
||||
{
|
||||
using real_type = typename NumTraits<Scalar>::Real;
|
||||
real_type aa = std::abs(a);
|
||||
if (aa==real_type(0))
|
||||
return Scalar(0);
|
||||
aa = real_type(1)/aa;
|
||||
return Scalar(a.real()*aa, a.imag()*aa );
|
||||
}
|
||||
};
|
||||
|
||||
// The sign function for bool is the identity.
|
||||
template<>
|
||||
struct sign_impl<bool, false, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline bool run(const bool& a)
|
||||
{
|
||||
return a;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct sign_retval
|
||||
{
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of conj *
|
||||
****************************************************************************/
|
||||
@@ -441,9 +491,9 @@ struct cast_impl
|
||||
// generating warnings on clang. Here we explicitly cast the real component.
|
||||
template<typename OldType, typename NewType>
|
||||
struct cast_impl<OldType, NewType,
|
||||
typename internal::enable_if<
|
||||
typename std::enable_if_t<
|
||||
!NumTraits<OldType>::IsComplex && NumTraits<NewType>::IsComplex
|
||||
>::type>
|
||||
>>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline NewType run(const OldType& x)
|
||||
@@ -469,57 +519,16 @@ inline NewType cast(const OldType& x)
|
||||
template<typename Scalar>
|
||||
struct round_impl
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
EIGEN_USING_STD(round);
|
||||
#endif
|
||||
return Scalar(round(x));
|
||||
}
|
||||
};
|
||||
|
||||
#if !EIGEN_HAS_CXX11_MATH
|
||||
#if EIGEN_HAS_C99_MATH
|
||||
// Use ::roundf for float.
|
||||
template<>
|
||||
struct round_impl<float> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline float run(const float& x)
|
||||
{
|
||||
return ::roundf(x);
|
||||
}
|
||||
};
|
||||
#else
|
||||
template<typename Scalar>
|
||||
struct round_using_floor_ceil_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
||||
// Without C99 round/roundf, resort to floor/ceil.
|
||||
EIGEN_USING_STD(floor);
|
||||
EIGEN_USING_STD(ceil);
|
||||
// If not enough precision to resolve a decimal at all, return the input.
|
||||
// Otherwise, adding 0.5 can trigger an increment by 1.
|
||||
const Scalar limit = Scalar(1ull << (NumTraits<Scalar>::digits() - 1));
|
||||
if (x >= limit || x <= -limit) {
|
||||
return x;
|
||||
}
|
||||
return (x > Scalar(0)) ? Scalar(floor(x + Scalar(0.5))) : Scalar(ceil(x - Scalar(0.5)));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct round_impl<float> : round_using_floor_ceil_impl<float> {};
|
||||
|
||||
template<>
|
||||
struct round_impl<double> : round_using_floor_ceil_impl<double> {};
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
#endif // !EIGEN_HAS_CXX11_MATH
|
||||
|
||||
template<typename Scalar>
|
||||
struct round_retval
|
||||
{
|
||||
@@ -532,36 +541,16 @@ struct round_retval
|
||||
|
||||
template<typename Scalar>
|
||||
struct rint_impl {
|
||||
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
EIGEN_USING_STD(rint);
|
||||
#endif
|
||||
EIGEN_USING_STD(rint);
|
||||
return rint(x);
|
||||
}
|
||||
};
|
||||
|
||||
#if !EIGEN_HAS_CXX11_MATH
|
||||
template<>
|
||||
struct rint_impl<double> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline double run(const double& x)
|
||||
{
|
||||
return ::rint(x);
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct rint_impl<float> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline float run(const float& x)
|
||||
{
|
||||
return ::rintf(x);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template<typename Scalar>
|
||||
struct rint_retval
|
||||
{
|
||||
@@ -574,7 +563,7 @@ struct rint_retval
|
||||
|
||||
// Visual Studio 2017 has a bug where arg(float) returns 0 for negative inputs.
|
||||
// This seems to be fixed in VS 2019.
|
||||
#if EIGEN_HAS_CXX11_MATH && (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)
|
||||
#if (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)
|
||||
// std::arg is only defined for types of std::complex, or integer types or float/double/long double
|
||||
template<typename Scalar,
|
||||
bool HasStdImpl = NumTraits<Scalar>::IsComplex || is_integral<Scalar>::value
|
||||
@@ -675,11 +664,7 @@ struct expm1_impl {
|
||||
EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
using std::expm1;
|
||||
#else
|
||||
using std_fallback::expm1;
|
||||
#endif
|
||||
return expm1(x);
|
||||
}
|
||||
};
|
||||
@@ -736,14 +721,11 @@ namespace std_fallback {
|
||||
|
||||
template<typename Scalar>
|
||||
struct log1p_impl {
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
|
||||
EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
using std::log1p;
|
||||
#else
|
||||
using std_fallback::log1p;
|
||||
#endif
|
||||
return log1p(x);
|
||||
}
|
||||
};
|
||||
@@ -751,9 +733,10 @@ struct log1p_impl {
|
||||
// Specialization for complex types that are not supported by std::log1p.
|
||||
template <typename RealScalar>
|
||||
struct log1p_impl<std::complex<RealScalar> > {
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
|
||||
|
||||
EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
|
||||
const std::complex<RealScalar>& x) {
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
|
||||
return std_fallback::log1p(x);
|
||||
}
|
||||
};
|
||||
@@ -893,7 +876,7 @@ struct random_default_impl<Scalar, false, true>
|
||||
// ScalarX is the widest of ScalarU and unsigned int.
|
||||
// We'll deal only with ScalarX and unsigned int below thus avoiding signed
|
||||
// types and arithmetic and signed overflows (which are undefined behavior).
|
||||
typedef typename conditional<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned>::type ScalarX;
|
||||
typedef std::conditional_t<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned> ScalarX;
|
||||
// The following difference doesn't overflow, provided our integer types are two's
|
||||
// complement and have the same number of padding bits in signed and unsigned variants.
|
||||
// This is the case in most modern implementations of C++.
|
||||
@@ -918,8 +901,8 @@ struct random_default_impl<Scalar, false, true>
|
||||
#else
|
||||
enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
|
||||
scalar_bits = sizeof(Scalar) * CHAR_BIT,
|
||||
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
|
||||
offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
|
||||
shift = plain_enum_max(0, int(rand_bits) - int(scalar_bits)),
|
||||
offset = NumTraits<Scalar>::IsSigned ? (1 << (plain_enum_min(rand_bits, scalar_bits)-1)) : 0
|
||||
};
|
||||
return Scalar((std::rand() >> shift) - offset);
|
||||
#endif
|
||||
@@ -956,7 +939,7 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
|
||||
// Implementation of is* functions
|
||||
|
||||
// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang.
|
||||
#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG)
|
||||
#if (!(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC) || (EIGEN_COMP_CLANG)
|
||||
#define EIGEN_USE_STD_FPCLASSIFY 1
|
||||
#else
|
||||
#define EIGEN_USE_STD_FPCLASSIFY 0
|
||||
@@ -964,22 +947,22 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::enable_if<internal::is_integral<T>::value,bool>::type
|
||||
std::enable_if_t<internal::is_integral<T>::value,bool>
|
||||
isnan_impl(const T&) { return false; }
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::enable_if<internal::is_integral<T>::value,bool>::type
|
||||
std::enable_if_t<internal::is_integral<T>::value,bool>
|
||||
isinf_impl(const T&) { return false; }
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::enable_if<internal::is_integral<T>::value,bool>::type
|
||||
std::enable_if_t<internal::is_integral<T>::value,bool>
|
||||
isfinite_impl(const T&) { return true; }
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
|
||||
std::enable_if_t<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>
|
||||
isfinite_impl(const T& x)
|
||||
{
|
||||
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
||||
@@ -994,7 +977,7 @@ isfinite_impl(const T& x)
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
|
||||
std::enable_if_t<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>
|
||||
isinf_impl(const T& x)
|
||||
{
|
||||
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
||||
@@ -1009,7 +992,7 @@ isinf_impl(const T& x)
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
|
||||
std::enable_if_t<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>
|
||||
isnan_impl(const T& x)
|
||||
{
|
||||
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
||||
@@ -1042,7 +1025,7 @@ EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_ms
|
||||
|
||||
#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC)
|
||||
|
||||
#if EIGEN_GNUC_AT_LEAST(5,0)
|
||||
#if EIGEN_COMP_GNUC
|
||||
#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only")))
|
||||
#else
|
||||
// NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol),
|
||||
@@ -1234,7 +1217,7 @@ inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
|
||||
inline internal::add_const_on_value_type_t< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) > real_ref(const Scalar& x)
|
||||
{
|
||||
return internal::real_ref_impl<Scalar>::run(x);
|
||||
}
|
||||
@@ -1262,7 +1245,7 @@ inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x)
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
|
||||
inline internal::add_const_on_value_type_t< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) > imag_ref(const Scalar& x)
|
||||
{
|
||||
return internal::imag_ref_impl<Scalar>::run(x);
|
||||
}
|
||||
@@ -1281,6 +1264,13 @@ inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
|
||||
return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(sign, Scalar) sign(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(sign, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
|
||||
@@ -1505,7 +1495,7 @@ double log(const double &x) { return ::log(x); }
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
|
||||
std::enable_if_t<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>
|
||||
abs(const T &x) {
|
||||
EIGEN_USING_STD(abs);
|
||||
return abs(x);
|
||||
@@ -1513,7 +1503,7 @@ abs(const T &x) {
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
|
||||
std::enable_if_t<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>
|
||||
abs(const T &x) {
|
||||
return x;
|
||||
}
|
||||
@@ -1541,6 +1531,37 @@ double abs(const std::complex<double>& x) {
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename Scalar, bool IsInteger = NumTraits<Scalar>::IsInteger, bool IsSigned = NumTraits<Scalar>::IsSigned>
|
||||
struct signbit_impl;
|
||||
template <typename Scalar>
|
||||
struct signbit_impl<Scalar, false, true> {
|
||||
static constexpr size_t Size = sizeof(Scalar);
|
||||
static constexpr size_t Shift = (CHAR_BIT * Size) - 1;
|
||||
using intSize_t = typename get_integer_by_size<Size>::signed_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Scalar run(const Scalar& x) {
|
||||
intSize_t a = bit_cast<intSize_t, Scalar>(x);
|
||||
a = a >> Shift;
|
||||
Scalar result = bit_cast<Scalar, intSize_t>(a);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
template <typename Scalar>
|
||||
struct signbit_impl<Scalar, true, true> {
|
||||
static constexpr size_t Size = sizeof(Scalar);
|
||||
static constexpr size_t Shift = (CHAR_BIT * Size) - 1;
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar run(const Scalar& x) { return x >> Shift; }
|
||||
};
|
||||
template <typename Scalar>
|
||||
struct signbit_impl<Scalar, true, false> {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar run(const Scalar& ) {
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar signbit(const Scalar& x) {
|
||||
return signbit_impl<Scalar>::run(x);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T exp(const T &x) {
|
||||
@@ -1659,14 +1680,12 @@ T acos(const T &x) {
|
||||
return acos(x);
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T acosh(const T &x) {
|
||||
EIGEN_USING_STD(acosh);
|
||||
return static_cast<T>(acosh(x));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(SYCL_DEVICE_ONLY)
|
||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acos, acos)
|
||||
@@ -1688,14 +1707,12 @@ T asin(const T &x) {
|
||||
return asin(x);
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T asinh(const T &x) {
|
||||
EIGEN_USING_STD(asinh);
|
||||
return static_cast<T>(asinh(x));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(SYCL_DEVICE_ONLY)
|
||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asin, asin)
|
||||
@@ -1717,14 +1734,12 @@ T atan(const T &x) {
|
||||
return static_cast<T>(atan(x));
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T atanh(const T &x) {
|
||||
EIGEN_USING_STD(atanh);
|
||||
return static_cast<T>(atanh(x));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(SYCL_DEVICE_ONLY)
|
||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atan, atan)
|
||||
@@ -2006,9 +2021,10 @@ namespace internal {
|
||||
// Specialization for complex types that are not supported by std::expm1.
|
||||
template <typename RealScalar>
|
||||
struct expm1_impl<std::complex<RealScalar> > {
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
|
||||
|
||||
EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
|
||||
const std::complex<RealScalar>& x) {
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
|
||||
RealScalar xr = x.real();
|
||||
RealScalar xi = x.imag();
|
||||
// expm1(z) = exp(z) - 1
|
||||
|
||||
@@ -11,17 +11,152 @@
|
||||
#ifndef EIGEN_MATHFUNCTIONSIMPL_H
|
||||
#define EIGEN_MATHFUNCTIONSIMPL_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal Fast reciprocal using Newton-Raphson's method.
|
||||
|
||||
Preconditions:
|
||||
1. The starting guess provided in approx_a_recip must have at least half
|
||||
the leading mantissa bits in the correct result, such that a single
|
||||
Newton-Raphson step is sufficient to get within 1-2 ulps of the currect
|
||||
result.
|
||||
2. If a is zero, approx_a_recip must be infinite with the same sign as a.
|
||||
3. If a is infinite, approx_a_recip must be zero with the same sign as a.
|
||||
|
||||
If the preconditions are satisfied, which they are for for the _*_rcp_ps
|
||||
instructions on x86, the result has a maximum relative error of 2 ulps,
|
||||
and correctly handles reciprocals of zero, infinity, and NaN.
|
||||
*/
|
||||
template <typename Packet, int Steps>
|
||||
struct generic_reciprocal_newton_step {
|
||||
static_assert(Steps > 0, "Steps must be at least 1.");
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
|
||||
run(const Packet& a, const Packet& approx_a_recip) {
|
||||
using Scalar = typename unpacket_traits<Packet>::type;
|
||||
const Packet two = pset1<Packet>(Scalar(2));
|
||||
// Refine the approximation using one Newton-Raphson step:
|
||||
// x_{i} = x_{i-1} * (2 - a * x_{i-1})
|
||||
const Packet x =
|
||||
generic_reciprocal_newton_step<Packet,Steps - 1>::run(a, approx_a_recip);
|
||||
const Packet tmp = pnmadd(a, x, two);
|
||||
// If tmp is NaN, it means that a is either +/-0 or +/-Inf.
|
||||
// In this case return the approximation directly.
|
||||
const Packet is_not_nan = pcmp_eq(tmp, tmp);
|
||||
return pselect(is_not_nan, pmul(x, tmp), x);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Packet>
|
||||
struct generic_reciprocal_newton_step<Packet, 0> {
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
|
||||
run(const Packet& /*unused*/, const Packet& approx_rsqrt) {
|
||||
return approx_rsqrt;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/** \internal Fast reciprocal sqrt using Newton-Raphson's method.
|
||||
|
||||
Preconditions:
|
||||
1. The starting guess provided in approx_a_recip must have at least half
|
||||
the leading mantissa bits in the correct result, such that a single
|
||||
Newton-Raphson step is sufficient to get within 1-2 ulps of the currect
|
||||
result.
|
||||
2. If a is zero, approx_a_recip must be infinite with the same sign as a.
|
||||
3. If a is infinite, approx_a_recip must be zero with the same sign as a.
|
||||
|
||||
If the preconditions are satisfied, which they are for for the _*_rcp_ps
|
||||
instructions on x86, the result has a maximum relative error of 2 ulps,
|
||||
and correctly handles zero, infinity, and NaN. Positive denormals are
|
||||
treated as zero.
|
||||
*/
|
||||
template <typename Packet, int Steps>
|
||||
struct generic_rsqrt_newton_step {
|
||||
static_assert(Steps > 0, "Steps must be at least 1.");
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
|
||||
run(const Packet& a, const Packet& approx_rsqrt) {
|
||||
using Scalar = typename unpacket_traits<Packet>::type;
|
||||
const Packet one_point_five = pset1<Packet>(Scalar(1.5));
|
||||
const Packet minus_half = pset1<Packet>(Scalar(-0.5));
|
||||
|
||||
// Refine the approximation using one Newton-Raphson step:
|
||||
// x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n)).
|
||||
// The approximation is expressed this way to avoid over/under-flows.
|
||||
Packet x_newton = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
|
||||
for (int step = 1; step < Steps; ++step) {
|
||||
x_newton = pmul(x_newton, pmadd(pmul(minus_half, x_newton), pmul(a, x_newton), one_point_five));
|
||||
}
|
||||
|
||||
// If approx_rsqrt is 0 or +/-inf, we should return it as is. Note:
|
||||
// on intel, approx_rsqrt can be inf for small denormal values.
|
||||
const Packet return_approx = por(pcmp_eq(approx_rsqrt, pzero(a)),
|
||||
pcmp_eq(pabs(approx_rsqrt), pset1<Packet>(NumTraits<Scalar>::infinity())));
|
||||
return pselect(return_approx, approx_rsqrt, x_newton);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Packet>
|
||||
struct generic_rsqrt_newton_step<Packet, 0> {
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
|
||||
run(const Packet& /*unused*/, const Packet& approx_rsqrt) {
|
||||
return approx_rsqrt;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/** \internal Fast sqrt using Newton-Raphson's method.
|
||||
|
||||
Preconditions:
|
||||
1. The starting guess for the reciprocal sqrt provided in approx_rsqrt must
|
||||
have at least half the leading mantissa bits in the correct result, such
|
||||
that a single Newton-Raphson step is sufficient to get within 1-2 ulps of
|
||||
the currect result.
|
||||
2. If a is zero, approx_rsqrt must be infinite.
|
||||
3. If a is infinite, approx_rsqrt must be zero.
|
||||
|
||||
If the preconditions are satisfied, which they are for for the _*_rsqrt_ps
|
||||
instructions on x86, the result has a maximum relative error of 2 ulps,
|
||||
and correctly handles zero and infinity, and NaN. Positive denormal inputs
|
||||
are treated as zero.
|
||||
*/
|
||||
template <typename Packet, int Steps=1>
|
||||
struct generic_sqrt_newton_step {
|
||||
static_assert(Steps > 0, "Steps must be at least 1.");
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
|
||||
run(const Packet& a, const Packet& approx_rsqrt) {
|
||||
using Scalar = typename unpacket_traits<Packet>::type;
|
||||
const Packet one_point_five = pset1<Packet>(Scalar(1.5));
|
||||
const Packet minus_half = pset1<Packet>(Scalar(-0.5));
|
||||
// If a is inf or zero, return a directly.
|
||||
const Packet inf_mask = pcmp_eq(a, pset1<Packet>(NumTraits<Scalar>::infinity()));
|
||||
const Packet return_a = por(pcmp_eq(a, pzero(a)), inf_mask);
|
||||
// Do a single step of Newton's iteration for reciprocal square root:
|
||||
// x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n))).
|
||||
// The Newton's step is computed this way to avoid over/under-flows.
|
||||
Packet rsqrt = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
|
||||
for (int step = 1; step < Steps; ++step) {
|
||||
rsqrt = pmul(rsqrt, pmadd(pmul(minus_half, rsqrt), pmul(a, rsqrt), one_point_five));
|
||||
}
|
||||
|
||||
// Return sqrt(x) = x * rsqrt(x) for non-zero finite positive arguments.
|
||||
// Return a itself for 0 or +inf, NaN for negative arguments.
|
||||
return pselect(return_a, a, pmul(a, rsqrt));
|
||||
}
|
||||
};
|
||||
|
||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
|
||||
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
|
||||
is accurate up to a couple of ulps in the (approximate) range [-8, 8],
|
||||
outside of which tanh(x) = +/-1 in single precision. The input is clamped
|
||||
to the range [-c, c]. The value c is chosen as the smallest value where
|
||||
the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004]
|
||||
the approxmation tanh(x) ~= x is used for better accuracy as x tends to zero.
|
||||
the approximation tanh(x) ~= x is used for better accuracy as x tends to zero.
|
||||
|
||||
This implementation works on both scalars and packets.
|
||||
*/
|
||||
@@ -88,7 +223,7 @@ RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
|
||||
EIGEN_USING_STD(sqrt);
|
||||
RealScalar p, qp;
|
||||
p = numext::maxi(x,y);
|
||||
if(p==RealScalar(0)) return RealScalar(0);
|
||||
if(numext::is_exactly_zero(p)) return RealScalar(0);
|
||||
qp = numext::mini(y,x) / p;
|
||||
return p * sqrt(RealScalar(1) + qp*qp);
|
||||
}
|
||||
@@ -138,8 +273,8 @@ EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& z) {
|
||||
|
||||
return
|
||||
(numext::isinf)(y) ? std::complex<T>(NumTraits<T>::infinity(), y)
|
||||
: x == zero ? std::complex<T>(w, y < zero ? -w : w)
|
||||
: x > zero ? std::complex<T>(w, y / (2 * w))
|
||||
: numext::is_exactly_zero(x) ? std::complex<T>(w, y < zero ? -w : w)
|
||||
: x > zero ? std::complex<T>(w, y / (2 * w))
|
||||
: std::complex<T>(numext::abs(y) / (2 * w), y < zero ? -w : w );
|
||||
}
|
||||
|
||||
@@ -177,10 +312,10 @@ EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& z) {
|
||||
const T woz = w / abs_z;
|
||||
// Corner cases consistent with 1/sqrt(z) on gcc/clang.
|
||||
return
|
||||
abs_z == zero ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())
|
||||
: ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero)
|
||||
: x == zero ? std::complex<T>(woz, y < zero ? woz : -woz)
|
||||
: x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z))
|
||||
numext::is_exactly_zero(abs_z) ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())
|
||||
: ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero)
|
||||
: numext::is_exactly_zero(x) ? std::complex<T>(woz, y < zero ? woz : -woz)
|
||||
: x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z))
|
||||
: std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz );
|
||||
}
|
||||
|
||||
|
||||
@@ -11,37 +11,39 @@
|
||||
#ifndef EIGEN_MATRIX_H
|
||||
#define EIGEN_MATRIX_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
|
||||
struct traits<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
|
||||
{
|
||||
private:
|
||||
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
|
||||
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
|
||||
constexpr static int size = internal::size_at_compile_time(Rows_,Cols_);
|
||||
typedef typename find_best_packet<Scalar_,size>::type PacketScalar;
|
||||
enum {
|
||||
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
|
||||
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
|
||||
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
|
||||
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
|
||||
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
|
||||
row_major_bit = Options_&RowMajor ? RowMajorBit : 0,
|
||||
is_dynamic_size_storage = MaxRows_==Dynamic || MaxCols_==Dynamic,
|
||||
max_size = is_dynamic_size_storage ? Dynamic : MaxRows_*MaxCols_,
|
||||
default_alignment = compute_default_alignment<Scalar_,max_size>::value,
|
||||
actual_alignment = ((Options_&DontAlign)==0) ? default_alignment : 0,
|
||||
required_alignment = unpacket_traits<PacketScalar>::alignment,
|
||||
packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
|
||||
packet_access_bit = (packet_traits<Scalar_>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
public:
|
||||
typedef _Scalar Scalar;
|
||||
typedef Scalar_ Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef Eigen::Index StorageIndex;
|
||||
typedef MatrixXpr XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = _Rows,
|
||||
ColsAtCompileTime = _Cols,
|
||||
MaxRowsAtCompileTime = _MaxRows,
|
||||
MaxColsAtCompileTime = _MaxCols,
|
||||
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
|
||||
Options = _Options,
|
||||
RowsAtCompileTime = Rows_,
|
||||
ColsAtCompileTime = Cols_,
|
||||
MaxRowsAtCompileTime = MaxRows_,
|
||||
MaxColsAtCompileTime = MaxCols_,
|
||||
Flags = compute_matrix_flags(Options_),
|
||||
Options = Options_,
|
||||
InnerStrideAtCompileTime = 1,
|
||||
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
|
||||
|
||||
@@ -63,18 +65,18 @@ public:
|
||||
* The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note").
|
||||
*
|
||||
* The first three template parameters are required:
|
||||
* \tparam _Scalar Numeric type, e.g. float, double, int or std::complex<float>.
|
||||
* \tparam Scalar_ Numeric type, e.g. float, double, int or std::complex<float>.
|
||||
* User defined scalar types are supported as well (see \ref user_defined_scalars "here").
|
||||
* \tparam _Rows Number of rows, or \b Dynamic
|
||||
* \tparam _Cols Number of columns, or \b Dynamic
|
||||
* \tparam Rows_ Number of rows, or \b Dynamic
|
||||
* \tparam Cols_ Number of columns, or \b Dynamic
|
||||
*
|
||||
* The remaining template parameters are optional -- in most cases you don't have to worry about them.
|
||||
* \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either
|
||||
* \tparam Options_ A combination of either \b #RowMajor or \b #ColMajor, and of either
|
||||
* \b #AutoAlign or \b #DontAlign.
|
||||
* The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
|
||||
* for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
|
||||
* \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
|
||||
* \tparam _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note").
|
||||
* \tparam MaxRows_ Maximum number of rows. Defaults to \a Rows_ (\ref maxrows "note").
|
||||
* \tparam MaxCols_ Maximum number of columns. Defaults to \a Cols_ (\ref maxrows "note").
|
||||
*
|
||||
* Eigen provides a number of typedefs covering the usual cases. Here are some examples:
|
||||
*
|
||||
@@ -128,12 +130,12 @@ public:
|
||||
* Note that \em dense matrices, be they Fixed-size or Dynamic-size, <em>do not</em> expand dynamically in the sense of a std::map.
|
||||
* If you want this behavior, see the Sparse module.</dd>
|
||||
*
|
||||
* <dt><b>\anchor maxrows _MaxRows and _MaxCols:</b></dt>
|
||||
* <dt><b>\anchor maxrows MaxRows_ and MaxCols_:</b></dt>
|
||||
* <dd>In most cases, one just leaves these parameters to the default values.
|
||||
* These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases
|
||||
* when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they cannot
|
||||
* exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case _MaxRows and _MaxCols
|
||||
* are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
|
||||
* exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case MaxRows_ and MaxCols_
|
||||
* are the dimensions of the original matrix, while Rows_ and Cols_ are Dynamic.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <i><b>ABI and storage layout</b></i>
|
||||
@@ -174,9 +176,9 @@ public:
|
||||
* \ref TopicStorageOrders
|
||||
*/
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
|
||||
class Matrix
|
||||
: public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
: public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -185,7 +187,7 @@ class Matrix
|
||||
*/
|
||||
typedef PlainObjectBase<Matrix> Base;
|
||||
|
||||
enum { Options = _Options };
|
||||
enum { Options = Options_ };
|
||||
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)
|
||||
|
||||
@@ -258,7 +260,6 @@ class Matrix
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Matrix() : Base()
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
|
||||
@@ -266,24 +267,18 @@ class Matrix
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
explicit Matrix(internal::constructor_without_unaligned_array_assert)
|
||||
: Base(internal::constructor_without_unaligned_array_assert())
|
||||
{ Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
|
||||
{ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
|
||||
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
|
||||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
}
|
||||
: Base(std::move(other)) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
{
|
||||
Base::operator=(std::move(other));
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
/** \copydoc PlainObjectBase(const Scalar&, const Scalar&, const Scalar&, const Scalar&, const ArgTypes&... args)
|
||||
*
|
||||
* Example: \include Matrix_variadic_ctor_cxx11.cpp
|
||||
@@ -317,9 +312,9 @@ class Matrix
|
||||
*
|
||||
* \sa Matrix(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit EIGEN_STRONG_INLINE Matrix(const std::initializer_list<std::initializer_list<Scalar>>& list) : Base(list) {}
|
||||
#endif // end EIGEN_HAS_CXX11
|
||||
EIGEN_DEVICE_FUNC explicit constexpr EIGEN_STRONG_INLINE Matrix(
|
||||
const std::initializer_list<std::initializer_list<Scalar>>& list)
|
||||
: Base(list) {}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
@@ -328,7 +323,6 @@ class Matrix
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
explicit Matrix(const T& x)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init1<T>(x);
|
||||
}
|
||||
|
||||
@@ -336,7 +330,6 @@ class Matrix
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Matrix(const T0& x, const T1& y)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init2<T0,T1>(x, y);
|
||||
}
|
||||
|
||||
@@ -388,7 +381,6 @@ class Matrix
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)
|
||||
m_storage.data()[0] = x;
|
||||
m_storage.data()[1] = y;
|
||||
@@ -400,7 +392,6 @@ class Matrix
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)
|
||||
m_storage.data()[0] = x;
|
||||
m_storage.data()[1] = y;
|
||||
@@ -480,16 +471,21 @@ class Matrix
|
||||
|
||||
#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief `Size`×`Size` matrix of type `Type`. */ \
|
||||
typedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief `Size`×`1` vector of type `Type`. */ \
|
||||
typedef Matrix<Type, Size, 1> Vector##SizeSuffix##TypeSuffix; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief `1`×`Size` vector of type `Type`. */ \
|
||||
typedef Matrix<Type, 1, Size> RowVector##SizeSuffix##TypeSuffix;
|
||||
|
||||
#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief `Size`×`Dynamic` matrix of type `Type`. */ \
|
||||
typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief `Dynamic`×`Size` matrix of type `Type`. */ \
|
||||
typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
|
||||
|
||||
#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
|
||||
@@ -511,30 +507,28 @@ EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
|
||||
#undef EIGEN_MAKE_TYPEDEFS
|
||||
#undef EIGEN_MAKE_FIXED_TYPEDEFS
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
|
||||
#define EIGEN_MAKE_TYPEDEFS(Size, SizeSuffix) \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief \cpp11 */ \
|
||||
template <typename Type> \
|
||||
using Matrix##SizeSuffix = Matrix<Type, Size, Size>; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief \cpp11 */ \
|
||||
template <typename Type> \
|
||||
using Vector##SizeSuffix = Matrix<Type, Size, 1>; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief \cpp11 */ \
|
||||
template <typename Type> \
|
||||
#define EIGEN_MAKE_TYPEDEFS(Size, SizeSuffix) \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief \cpp11 `Size`×`Size` matrix of type `Type`.*/ \
|
||||
template <typename Type> \
|
||||
using Matrix##SizeSuffix = Matrix<Type, Size, Size>; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief \cpp11 `Size`×`1` vector of type `Type`.*/ \
|
||||
template <typename Type> \
|
||||
using Vector##SizeSuffix = Matrix<Type, Size, 1>; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief \cpp11 `1`×`Size` vector of type `Type`.*/ \
|
||||
template <typename Type> \
|
||||
using RowVector##SizeSuffix = Matrix<Type, 1, Size>;
|
||||
|
||||
#define EIGEN_MAKE_FIXED_TYPEDEFS(Size) \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief \cpp11 */ \
|
||||
template <typename Type> \
|
||||
using Matrix##Size##X = Matrix<Type, Size, Dynamic>; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief \cpp11 */ \
|
||||
template <typename Type> \
|
||||
#define EIGEN_MAKE_FIXED_TYPEDEFS(Size) \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief \cpp11 `Size`×`Dynamic` matrix of type `Type` */ \
|
||||
template <typename Type> \
|
||||
using Matrix##Size##X = Matrix<Type, Size, Dynamic>; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
/** \brief \cpp11 `Dynamic`×`Size` matrix of type `Type`. */ \
|
||||
template <typename Type> \
|
||||
using Matrix##X##Size = Matrix<Type, Dynamic, Size>;
|
||||
|
||||
EIGEN_MAKE_TYPEDEFS(2, 2)
|
||||
@@ -546,20 +540,18 @@ EIGEN_MAKE_FIXED_TYPEDEFS(3)
|
||||
EIGEN_MAKE_FIXED_TYPEDEFS(4)
|
||||
|
||||
/** \ingroup matrixtypedefs
|
||||
* \brief \cpp11 */
|
||||
* \brief \cpp11 `Size`×`1` vector of type `Type`. */
|
||||
template <typename Type, int Size>
|
||||
using Vector = Matrix<Type, Size, 1>;
|
||||
|
||||
/** \ingroup matrixtypedefs
|
||||
* \brief \cpp11 */
|
||||
* \brief \cpp11 `1`×`Size` vector of type `Type`. */
|
||||
template <typename Type, int Size>
|
||||
using RowVector = Matrix<Type, 1, Size>;
|
||||
|
||||
#undef EIGEN_MAKE_TYPEDEFS
|
||||
#undef EIGEN_MAKE_FIXED_TYPEDEFS
|
||||
|
||||
#endif // EIGEN_HAS_CXX11
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATRIX_H
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_MATRIXBASE_H
|
||||
#define EIGEN_MATRIXBASE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class MatrixBase
|
||||
@@ -92,8 +94,8 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** type of the equivalent square matrix */
|
||||
typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),
|
||||
EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;
|
||||
typedef Matrix<Scalar, internal::max_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime),
|
||||
internal::max_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime)> SquareMatrixType;
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
/** \returns the size of the main diagonal, which is min(rows(),cols()).
|
||||
@@ -107,10 +109,10 @@ template<typename Derived> class MatrixBase
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
/** \internal the return type of MatrixBase::adjoint() */
|
||||
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
|
||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
|
||||
ConstTransposeReturnType
|
||||
>::type AdjointReturnType;
|
||||
typedef std::conditional_t<NumTraits<Scalar>::IsComplex,
|
||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
|
||||
ConstTransposeReturnType
|
||||
> AdjointReturnType;
|
||||
/** \internal Return type of eigenvalues() */
|
||||
typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
|
||||
/** \internal the return type of identity */
|
||||
@@ -184,6 +186,11 @@ template<typename Derived> class MatrixBase
|
||||
const Product<Derived, DiagonalDerived, LazyProduct>
|
||||
operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
|
||||
|
||||
template<typename SkewDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<Derived, SkewDerived, LazyProduct>
|
||||
operator*(const SkewSymmetricBase<SkewDerived> &skew) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
@@ -206,28 +213,22 @@ template<typename Derived> class MatrixBase
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalReturnType diagonal();
|
||||
|
||||
typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
|
||||
typedef Diagonal<const Derived> ConstDiagonalReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
ConstDiagonalReturnType diagonal() const;
|
||||
|
||||
template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
|
||||
template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
|
||||
const ConstDiagonalReturnType diagonal() const;
|
||||
|
||||
template<int Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename DiagonalIndexReturnType<Index>::Type diagonal();
|
||||
Diagonal<Derived, Index> diagonal();
|
||||
|
||||
template<int Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
|
||||
|
||||
typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
|
||||
typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
|
||||
const Diagonal<const Derived, Index> diagonal() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalDynamicIndexReturnType diagonal(Index index);
|
||||
Diagonal<Derived, DynamicIndex> diagonal(Index index);
|
||||
EIGEN_DEVICE_FUNC
|
||||
ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
|
||||
const Diagonal<const Derived, DynamicIndex> diagonal(Index index) const;
|
||||
|
||||
template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
|
||||
template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
|
||||
@@ -263,6 +264,8 @@ template<typename Derived> class MatrixBase
|
||||
EIGEN_DEVICE_FUNC
|
||||
const DiagonalWrapper<const Derived> asDiagonal() const;
|
||||
const PermutationWrapper<const Derived> asPermutation() const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
const SkewSymmetricWrapper<const Derived> asSkewSymmetric() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& setIdentity();
|
||||
@@ -277,6 +280,8 @@ template<typename Derived> class MatrixBase
|
||||
bool isUpperTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
bool isLowerTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
bool isSkewSymmetric(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
bool isOrthogonal(const MatrixBase<OtherDerived>& other,
|
||||
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
@@ -368,25 +373,23 @@ template<typename Derived> class MatrixBase
|
||||
|
||||
/////////// SVD module ///////////
|
||||
|
||||
inline JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
|
||||
inline BDCSVD<PlainObject> bdcSvd(unsigned int computationOptions = 0) const;
|
||||
template<int Options = 0>
|
||||
inline JacobiSVD<PlainObject, Options> jacobiSvd() const;
|
||||
template<int Options = 0>
|
||||
EIGEN_DEPRECATED
|
||||
inline JacobiSVD<PlainObject, Options> jacobiSvd(unsigned int computationOptions) const;
|
||||
|
||||
template<int Options = 0>
|
||||
inline BDCSVD<PlainObject, Options> bdcSvd() const;
|
||||
template<int Options = 0>
|
||||
EIGEN_DEPRECATED
|
||||
inline BDCSVD<PlainObject, Options> bdcSvd(unsigned int computationOptions) const;
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/// \internal helper struct to form the return type of the cross product
|
||||
template<typename OtherDerived> struct cross_product_return_type {
|
||||
typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
|
||||
typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
|
||||
};
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
inline typename cross_product_return_type<OtherDerived>::type
|
||||
#else
|
||||
inline PlainObject
|
||||
#endif
|
||||
inline typename internal::cross_impl<Derived, OtherDerived>::return_type
|
||||
cross(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
@@ -468,11 +471,9 @@ template<typename Derived> class MatrixBase
|
||||
const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine)
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, atanh, inverse hyperbolic cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, acosh, inverse hyperbolic cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, asinh, inverse hyperbolic sine)
|
||||
#endif
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_NESTBYVALUE_H
|
||||
#define EIGEN_NESTBYVALUE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -41,6 +43,8 @@ template<typename ExpressionType> class NestByValue
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<NestByValue>::type Base;
|
||||
static constexpr bool HasDirectAccess = internal::has_direct_access<ExpressionType>::ret;
|
||||
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
|
||||
@@ -52,6 +56,18 @@ template<typename ExpressionType> class NestByValue
|
||||
|
||||
EIGEN_DEVICE_FUNC const ExpressionType& nestedExpression() const { return m_expression; }
|
||||
|
||||
EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, const Scalar*>::type data() const {
|
||||
return m_expression.data();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, Index>::type innerStride() const {
|
||||
return m_expression.innerStride();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, Index>::type outerStride() const {
|
||||
return m_expression.outerStride();
|
||||
}
|
||||
|
||||
protected:
|
||||
const ExpressionType m_expression;
|
||||
};
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_NOALIAS_H
|
||||
#define EIGEN_NOALIAS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class NoAlias
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_NUMTRAITS_H
|
||||
#define EIGEN_NUMTRAITS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -61,10 +63,10 @@ struct default_digits_impl<T,false,false> // Floating point
|
||||
{
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static int run() {
|
||||
using std::log;
|
||||
using std::log2;
|
||||
using std::ceil;
|
||||
typedef typename NumTraits<T>::Real Real;
|
||||
return int(ceil(-log(NumTraits<Real>::epsilon())/log(static_cast<Real>(2))));
|
||||
return int(ceil(-log2(NumTraits<Real>::epsilon())));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -83,17 +85,17 @@ namespace numext {
|
||||
// TODO: Replace by std::bit_cast (available in C++20)
|
||||
template <typename Tgt, typename Src>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) {
|
||||
#if EIGEN_HAS_TYPE_TRAITS
|
||||
// The behaviour of memcpy is not specified for non-trivially copyable types
|
||||
EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Src>::value, THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Tgt>::value && std::is_default_constructible<Tgt>::value,
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
#endif
|
||||
|
||||
EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
|
||||
Tgt tgt;
|
||||
// Load src into registers first. This allows the memcpy to be elided by CUDA.
|
||||
const Src staged = src;
|
||||
EIGEN_USING_STD(memcpy)
|
||||
memcpy(&tgt, &src, sizeof(Tgt));
|
||||
memcpy(static_cast<void*>(&tgt),static_cast<const void*>(&staged), sizeof(Tgt));
|
||||
return tgt;
|
||||
}
|
||||
} // namespace numext
|
||||
@@ -162,11 +164,7 @@ template<typename T> struct GenericNumTraits
|
||||
};
|
||||
|
||||
typedef T Real;
|
||||
typedef typename internal::conditional<
|
||||
IsInteger,
|
||||
typename internal::conditional<sizeof(T)<=2, float, double>::type,
|
||||
T
|
||||
>::type NonInteger;
|
||||
typedef std::conditional_t<IsInteger, std::conditional_t<sizeof(T)<=2, float, double>, T> NonInteger;
|
||||
typedef T Nested;
|
||||
typedef T Literal;
|
||||
|
||||
@@ -252,15 +250,15 @@ template<> struct NumTraits<long double>
|
||||
static inline long double dummy_precision() { return 1e-15l; }
|
||||
};
|
||||
|
||||
template<typename _Real> struct NumTraits<std::complex<_Real> >
|
||||
: GenericNumTraits<std::complex<_Real> >
|
||||
template<typename Real_> struct NumTraits<std::complex<Real_> >
|
||||
: GenericNumTraits<std::complex<Real_> >
|
||||
{
|
||||
typedef _Real Real;
|
||||
typedef typename NumTraits<_Real>::Literal Literal;
|
||||
typedef Real_ Real;
|
||||
typedef typename NumTraits<Real_>::Literal Literal;
|
||||
enum {
|
||||
IsComplex = 1,
|
||||
RequireInitialization = NumTraits<_Real>::RequireInitialization,
|
||||
ReadCost = 2 * NumTraits<_Real>::ReadCost,
|
||||
RequireInitialization = NumTraits<Real_>::RequireInitialization,
|
||||
ReadCost = 2 * NumTraits<Real_>::ReadCost,
|
||||
AddCost = 2 * NumTraits<Real>::AddCost,
|
||||
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
|
||||
};
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_PARTIALREDUX_H
|
||||
#define EIGEN_PARTIALREDUX_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -29,7 +31,7 @@ namespace internal {
|
||||
* some (optional) processing of the outcome, e.g., division by n for mean.
|
||||
*
|
||||
* For the vectorized path let's observe that the packet-size and outer-unrolling
|
||||
* are both decided by the assignement logic. So all we have to do is to decide
|
||||
* are both decided by the assignment logic. So all we have to do is to decide
|
||||
* on the inner unrolling.
|
||||
*
|
||||
* For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h,
|
||||
@@ -54,12 +56,17 @@ struct packetwise_redux_traits
|
||||
/* Value to be returned when size==0 , by default let's return 0 */
|
||||
template<typename PacketType,typename Func>
|
||||
EIGEN_DEVICE_FUNC
|
||||
PacketType packetwise_redux_empty_value(const Func& ) { return pset1<PacketType>(0); }
|
||||
PacketType packetwise_redux_empty_value(const Func& ) {
|
||||
const typename unpacket_traits<PacketType>::type zero(0);
|
||||
return pset1<PacketType>(zero);
|
||||
}
|
||||
|
||||
/* For products the default is 1 */
|
||||
template<typename PacketType,typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) { return pset1<PacketType>(1); }
|
||||
PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) {
|
||||
return pset1<PacketType>(Scalar(1));
|
||||
}
|
||||
|
||||
/* Perform the actual reduction */
|
||||
template<typename Func, typename Evaluator,
|
||||
@@ -134,8 +141,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
||||
{
|
||||
typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
|
||||
typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;
|
||||
typedef typename internal::add_const_on_value_type<ArgTypeNested>::type ConstArgTypeNested;
|
||||
typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
|
||||
typedef add_const_on_value_type_t<ArgTypeNested> ConstArgTypeNested;
|
||||
typedef internal::remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
|
||||
typedef typename ArgType::Scalar InputScalar;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
enum {
|
||||
@@ -147,16 +154,16 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
||||
: TraversalSize==0 ? 1
|
||||
: int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),
|
||||
|
||||
_ArgFlags = evaluator<ArgType>::Flags,
|
||||
ArgFlags_ = evaluator<ArgType>::Flags,
|
||||
|
||||
_Vectorizable = bool(int(_ArgFlags)&PacketAccessBit)
|
||||
Vectorizable_ = bool(int(ArgFlags_)&PacketAccessBit)
|
||||
&& bool(MemberOp::Vectorizable)
|
||||
&& (Direction==int(Vertical) ? bool(_ArgFlags&RowMajorBit) : (_ArgFlags&RowMajorBit)==0)
|
||||
&& (Direction==int(Vertical) ? bool(ArgFlags_&RowMajorBit) : (ArgFlags_&RowMajorBit)==0)
|
||||
&& (TraversalSize!=0),
|
||||
|
||||
Flags = (traits<XprType>::Flags&RowMajorBit)
|
||||
| (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit)))
|
||||
| (_Vectorizable ? PacketAccessBit : 0)
|
||||
| (Vectorizable_ ? PacketAccessBit : 0)
|
||||
| LinearAccessBit,
|
||||
|
||||
Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_PERMUTATIONMATRIX_H
|
||||
#define EIGEN_PERMUTATIONMATRIX_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -269,13 +271,13 @@ class PermutationBase : public EigenBase<Derived>
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
|
||||
: traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_>
|
||||
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_> >
|
||||
: traits<Matrix<StorageIndex_,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
typedef PermutationStorage StorageKind;
|
||||
typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef _StorageIndex StorageIndex;
|
||||
typedef Matrix<StorageIndex_, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef StorageIndex_ StorageIndex;
|
||||
typedef void Scalar;
|
||||
};
|
||||
}
|
||||
@@ -287,14 +289,14 @@ struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _Storag
|
||||
*
|
||||
* \tparam SizeAtCompileTime the number of rows/cols, or Dynamic
|
||||
* \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
* \tparam _StorageIndex the integer type of the indices
|
||||
* \tparam StorageIndex_ the integer type of the indices
|
||||
*
|
||||
* This class represents a permutation matrix, internally stored as a vector of integers.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
|
||||
*/
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_>
|
||||
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_> >
|
||||
{
|
||||
typedef PermutationBase<PermutationMatrix> Base;
|
||||
typedef internal::traits<PermutationMatrix> Traits;
|
||||
@@ -389,20 +391,20 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
|
||||
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
|
||||
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
|
||||
: traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int PacketAccess_>
|
||||
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,PacketAccess_> >
|
||||
: traits<Matrix<StorageIndex_,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
typedef PermutationStorage StorageKind;
|
||||
typedef Map<const Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
|
||||
typedef _StorageIndex StorageIndex;
|
||||
typedef Map<const Matrix<StorageIndex_, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, PacketAccess_> IndicesType;
|
||||
typedef StorageIndex_ StorageIndex;
|
||||
typedef void Scalar;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
|
||||
class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess>
|
||||
: public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int PacketAccess_>
|
||||
class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,PacketAccess_>
|
||||
: public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,PacketAccess_> >
|
||||
{
|
||||
typedef PermutationBase<Map> Base;
|
||||
typedef internal::traits<Map> Traits;
|
||||
@@ -452,18 +454,18 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
|
||||
IndicesType m_indices;
|
||||
};
|
||||
|
||||
template<typename _IndicesType> class TranspositionsWrapper;
|
||||
template<typename IndicesType_> class TranspositionsWrapper;
|
||||
namespace internal {
|
||||
template<typename _IndicesType>
|
||||
struct traits<PermutationWrapper<_IndicesType> >
|
||||
template<typename IndicesType_>
|
||||
struct traits<PermutationWrapper<IndicesType_> >
|
||||
{
|
||||
typedef PermutationStorage StorageKind;
|
||||
typedef void Scalar;
|
||||
typedef typename _IndicesType::Scalar StorageIndex;
|
||||
typedef _IndicesType IndicesType;
|
||||
typedef typename IndicesType_::Scalar StorageIndex;
|
||||
typedef IndicesType_ IndicesType;
|
||||
enum {
|
||||
RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
|
||||
ColsAtCompileTime = _IndicesType::SizeAtCompileTime,
|
||||
RowsAtCompileTime = IndicesType_::SizeAtCompileTime,
|
||||
ColsAtCompileTime = IndicesType_::SizeAtCompileTime,
|
||||
MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
|
||||
MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
|
||||
Flags = 0
|
||||
@@ -476,14 +478,14 @@ struct traits<PermutationWrapper<_IndicesType> >
|
||||
*
|
||||
* \brief Class to view a vector of integers as a permutation matrix
|
||||
*
|
||||
* \tparam _IndicesType the type of the vector of integer (can be any compatible expression)
|
||||
* \tparam IndicesType_ the type of the vector of integer (can be any compatible expression)
|
||||
*
|
||||
* This class allows to view any vector expression of integers as a permutation matrix.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationMatrix
|
||||
*/
|
||||
template<typename _IndicesType>
|
||||
class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
|
||||
template<typename IndicesType_>
|
||||
class PermutationWrapper : public PermutationBase<PermutationWrapper<IndicesType_> >
|
||||
{
|
||||
typedef PermutationBase<PermutationWrapper> Base;
|
||||
typedef internal::traits<PermutationWrapper> Traits;
|
||||
@@ -498,7 +500,7 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
|
||||
{}
|
||||
|
||||
/** const version of indices(). */
|
||||
const typename internal::remove_all<typename IndicesType::Nested>::type&
|
||||
const internal::remove_all_t<typename IndicesType::Nested>&
|
||||
indices() const { return m_indices; }
|
||||
|
||||
protected:
|
||||
|
||||
@@ -22,23 +22,20 @@
|
||||
# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
#endif
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_ALWAYS_INLINE void run(Index, Index)
|
||||
{
|
||||
}
|
||||
template <typename Index>
|
||||
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index) {}
|
||||
};
|
||||
|
||||
template<> struct check_rows_cols_for_overflow<Dynamic> {
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
|
||||
{
|
||||
template <typename Index>
|
||||
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index cols) {
|
||||
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
|
||||
// we assume Index is signed
|
||||
Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
|
||||
@@ -64,18 +61,18 @@ namespace doxygen {
|
||||
// This is a workaround to doxygen not being able to understand the inheritance logic
|
||||
// when it is hidden by the dense_xpr_base helper struct.
|
||||
// Moreover, doxygen fails to include members that are not documented in the declaration body of
|
||||
// MatrixBase if we inherits MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >,
|
||||
// MatrixBase if we inherits MatrixBase<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >,
|
||||
// this is why we simply inherits MatrixBase, though this does not make sense.
|
||||
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename Derived> struct dense_xpr_base_dispatcher;
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
|
||||
struct dense_xpr_base_dispatcher<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
|
||||
: public MatrixBase {};
|
||||
/** This class is just a workaround for Doxygen and it does not not actually exist. */
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
|
||||
struct dense_xpr_base_dispatcher<Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
|
||||
: public ArrayBase {};
|
||||
|
||||
} // namespace doxygen
|
||||
@@ -134,6 +131,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
||||
|
||||
EIGEN_STATIC_ASSERT(internal::check_implication(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (int(Options)&RowMajor)==RowMajor), INVALID_MATRIX_TEMPLATE_PARAMETERS)
|
||||
EIGEN_STATIC_ASSERT(internal::check_implication(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (int(Options)&RowMajor)==0), INVALID_MATRIX_TEMPLATE_PARAMETERS)
|
||||
EIGEN_STATIC_ASSERT((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0), INVALID_MATRIX_TEMPLATE_PARAMETERS)
|
||||
EIGEN_STATIC_ASSERT((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0), INVALID_MATRIX_TEMPLATE_PARAMETERS)
|
||||
EIGEN_STATIC_ASSERT((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0), INVALID_MATRIX_TEMPLATE_PARAMETERS)
|
||||
EIGEN_STATIC_ASSERT((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0), INVALID_MATRIX_TEMPLATE_PARAMETERS)
|
||||
EIGEN_STATIC_ASSERT((MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic), INVALID_MATRIX_TEMPLATE_PARAMETERS)
|
||||
EIGEN_STATIC_ASSERT((MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic), INVALID_MATRIX_TEMPLATE_PARAMETERS)
|
||||
EIGEN_STATIC_ASSERT(((Options & (DontAlign|RowMajor)) == Options), INVALID_MATRIX_TEMPLATE_PARAMETERS)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Base& base() { return *static_cast<Base*>(this); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -148,12 +155,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
|
||||
*
|
||||
* See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
|
||||
{
|
||||
if(Flags & RowMajorBit)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeff(Index rowId, Index colId) const {
|
||||
if (Flags & RowMajorBit)
|
||||
return m_storage.data()[colId + rowId * m_storage.cols()];
|
||||
else // column-major
|
||||
else // column-major
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
@@ -171,12 +176,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
|
||||
*
|
||||
* See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const for details. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
if(Flags & RowMajorBit)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index rowId, Index colId) {
|
||||
if (Flags & RowMajorBit)
|
||||
return m_storage.data()[colId + rowId * m_storage.cols()];
|
||||
else // column-major
|
||||
else // column-major
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
@@ -184,28 +187,20 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
|
||||
*
|
||||
* See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const for details. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) { return m_storage.data()[index]; }
|
||||
|
||||
/** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index).
|
||||
* It is provided for convenience. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
if(Flags & RowMajorBit)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index rowId, Index colId) const {
|
||||
if (Flags & RowMajorBit)
|
||||
return m_storage.data()[colId + rowId * m_storage.cols()];
|
||||
else // column-major
|
||||
else // column-major
|
||||
return m_storage.data()[rowId + colId * m_storage.rows()];
|
||||
}
|
||||
|
||||
/** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index).
|
||||
* It is provided for convenience. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index index) const {
|
||||
return m_storage.data()[index];
|
||||
}
|
||||
|
||||
@@ -267,13 +262,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
|
||||
{
|
||||
eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime)
|
||||
&& EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime)
|
||||
&& EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime)
|
||||
&& EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index rows, Index cols) {
|
||||
eigen_assert(internal::check_implication(RowsAtCompileTime!=Dynamic, rows==RowsAtCompileTime)
|
||||
&& internal::check_implication(ColsAtCompileTime!=Dynamic, cols==ColsAtCompileTime)
|
||||
&& internal::check_implication(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic, rows<=MaxRowsAtCompileTime)
|
||||
&& internal::check_implication(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic, cols<=MaxColsAtCompileTime)
|
||||
&& rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array.");
|
||||
internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
|
||||
#ifdef EIGEN_INITIALIZE_COEFFS
|
||||
@@ -297,12 +290,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(Index size)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
|
||||
eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
|
||||
#ifdef EIGEN_INITIALIZE_COEFFS
|
||||
EIGEN_DEVICE_FUNC inline constexpr void resize(Index size) {
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
|
||||
eigen_assert(
|
||||
((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime == Dynamic || size <= MaxSizeAtCompileTime)) ||
|
||||
SizeAtCompileTime == size) &&
|
||||
size >= 0);
|
||||
#ifdef EIGEN_INITIALIZE_COEFFS
|
||||
bool size_changed = size != this->size();
|
||||
#endif
|
||||
if(RowsAtCompileTime == 1)
|
||||
@@ -322,11 +316,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(NoChange_t, Index cols)
|
||||
{
|
||||
resize(rows(), cols);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC inline constexpr void resize(NoChange_t, Index cols) { resize(rows(), cols); }
|
||||
|
||||
/** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange
|
||||
* as in the example below.
|
||||
@@ -336,11 +326,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(Index rows, NoChange_t)
|
||||
{
|
||||
resize(rows, cols());
|
||||
}
|
||||
EIGEN_DEVICE_FUNC inline constexpr void resize(Index rows, NoChange_t) { resize(rows, cols()); }
|
||||
|
||||
/** Resizes \c *this to have the same dimensions as \a other.
|
||||
* Takes care of doing all the checking that's needed.
|
||||
@@ -475,7 +461,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
|
||||
{
|
||||
// _check_template_params();
|
||||
// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
|
||||
@@ -486,11 +471,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
|
||||
: m_storage(internal::constructor_without_unaligned_array_assert())
|
||||
{
|
||||
// _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
#endif
|
||||
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
|
||||
: m_storage( std::move(other.m_storage) )
|
||||
@@ -500,11 +484,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
|
||||
{
|
||||
_check_template_params();
|
||||
m_storage = std::move(other.m_storage);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -514,17 +496,14 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
|
||||
: m_storage(size, rows, cols)
|
||||
{
|
||||
// _check_template_params();
|
||||
// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
/** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. \cpp11
|
||||
/** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This constructor is for 1D array or vectors with more than 4 coefficients.
|
||||
* There exists C++98 analogue constructors for fixed-size array/vector having 1, 2, 3, or 4 coefficients.
|
||||
*
|
||||
* \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this
|
||||
* constructor must match the the fixed number of rows (resp. columns) of \c *this.
|
||||
@@ -534,7 +513,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)
|
||||
: m_storage()
|
||||
{
|
||||
_check_template_params();
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, sizeof...(args) + 4);
|
||||
m_storage.data()[0] = a0;
|
||||
m_storage.data()[1] = a1;
|
||||
@@ -546,14 +524,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
|
||||
/** \brief Constructs a Matrix or Array and initializes it by elements given by an initializer list of initializer
|
||||
* lists \cpp11
|
||||
* lists
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit EIGEN_STRONG_INLINE PlainObjectBase(const std::initializer_list<std::initializer_list<Scalar>>& list)
|
||||
: m_storage()
|
||||
{
|
||||
_check_template_params();
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit constexpr EIGEN_STRONG_INLINE PlainObjectBase(
|
||||
const std::initializer_list<std::initializer_list<Scalar>>& list)
|
||||
: m_storage() {
|
||||
size_t list_size = 0;
|
||||
if (list.begin() != list.end()) {
|
||||
list_size = list.begin()->size();
|
||||
@@ -581,7 +556,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // end EIGEN_HAS_CXX11
|
||||
|
||||
/** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
template<typename OtherDerived>
|
||||
@@ -589,7 +563,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
|
||||
: m_storage()
|
||||
{
|
||||
_check_template_params();
|
||||
resizeLike(other);
|
||||
_set_noalias(other);
|
||||
}
|
||||
@@ -600,7 +573,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
|
||||
: m_storage()
|
||||
{
|
||||
_check_template_params();
|
||||
resizeLike(other);
|
||||
*this = other.derived();
|
||||
}
|
||||
@@ -609,7 +581,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
_check_template_params();
|
||||
// FIXME this does not automatically transpose vectors if necessary
|
||||
resize(other.rows(), other.cols());
|
||||
other.evalTo(this->derived());
|
||||
@@ -640,7 +611,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
*
|
||||
* \see class Map
|
||||
*/
|
||||
//@{
|
||||
///@{
|
||||
static inline ConstMapType Map(const Scalar* data)
|
||||
{ return ConstMapType(data); }
|
||||
static inline MapType Map(Scalar* data)
|
||||
@@ -704,7 +675,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
template<int Outer, int Inner>
|
||||
static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
|
||||
{ return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
|
||||
//@}
|
||||
///@}
|
||||
|
||||
using Base::setConstant;
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
|
||||
@@ -800,7 +771,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
|
||||
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, std::enable_if_t<Base::SizeAtCompileTime!=2,T0>* = 0)
|
||||
{
|
||||
const bool t0_is_integer_alike = internal::is_valid_index_type<T0>::value;
|
||||
const bool t1_is_integer_alike = internal::is_valid_index_type<T1>::value;
|
||||
@@ -812,7 +783,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
|
||||
EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, std::enable_if_t<Base::SizeAtCompileTime==2,T0>* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
|
||||
m_storage.data()[0] = Scalar(val0);
|
||||
@@ -822,10 +793,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
|
||||
typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
|
||||
&& (internal::is_same<T0,Index>::value)
|
||||
&& (internal::is_same<T1,Index>::value)
|
||||
&& Base::SizeAtCompileTime==2,T1>::type* = 0)
|
||||
std::enable_if_t< (!internal::is_same<Index,Scalar>::value)
|
||||
&& (internal::is_same<T0,Index>::value)
|
||||
&& (internal::is_same<T1,Index>::value)
|
||||
&& Base::SizeAtCompileTime==2,T1>* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
|
||||
m_storage.data()[0] = Scalar(val0);
|
||||
@@ -836,8 +807,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
// then the argument is meant to be the size of the object.
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
|
||||
&& ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)
|
||||
EIGEN_STRONG_INLINE void _init1(Index size, std::enable_if_t< (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
|
||||
&& ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>* = 0)
|
||||
{
|
||||
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
|
||||
const bool is_integer_alike = internal::is_valid_index_type<T>::value;
|
||||
@@ -850,7 +821,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
// We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitly converted)
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
|
||||
EIGEN_STRONG_INLINE void _init1(const Scalar& val0, std::enable_if_t<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
|
||||
m_storage.data()[0] = val0;
|
||||
@@ -860,10 +831,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Index& val0,
|
||||
typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
|
||||
&& (internal::is_same<Index,T>::value)
|
||||
&& Base::SizeAtCompileTime==1
|
||||
&& internal::is_convertible<T, Scalar>::value,T*>::type* = 0)
|
||||
std::enable_if_t< (!internal::is_same<Index,Scalar>::value)
|
||||
&& (internal::is_same<Index,T>::value)
|
||||
&& Base::SizeAtCompileTime==1
|
||||
&& internal::is_convertible<T, Scalar>::value,T*>* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
|
||||
m_storage.data()[0] = Scalar(val0);
|
||||
@@ -916,10 +887,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
|
||||
typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic
|
||||
&& Base::SizeAtCompileTime!=1
|
||||
&& internal::is_convertible<T, Scalar>::value
|
||||
&& internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0)
|
||||
std::enable_if_t< Base::SizeAtCompileTime!=Dynamic
|
||||
&& Base::SizeAtCompileTime!=1
|
||||
&& internal::is_convertible<T, Scalar>::value
|
||||
&& internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>* = 0)
|
||||
{
|
||||
Base::setConstant(val0);
|
||||
}
|
||||
@@ -928,12 +899,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _init1(const Index& val0,
|
||||
typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
|
||||
&& (internal::is_same<Index,T>::value)
|
||||
&& Base::SizeAtCompileTime!=Dynamic
|
||||
&& Base::SizeAtCompileTime!=1
|
||||
&& internal::is_convertible<T, Scalar>::value
|
||||
&& internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0)
|
||||
std::enable_if_t< (!internal::is_same<Index,Scalar>::value)
|
||||
&& (internal::is_same<Index,T>::value)
|
||||
&& Base::SizeAtCompileTime!=Dynamic
|
||||
&& Base::SizeAtCompileTime!=1
|
||||
&& internal::is_convertible<T, Scalar>::value
|
||||
&& internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>* = 0)
|
||||
{
|
||||
Base::setConstant(val0);
|
||||
}
|
||||
@@ -964,21 +935,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
void swap(DenseBase<OtherDerived> const & other)
|
||||
{ Base::swap(other.derived()); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void _check_template_params()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (int(Options)&RowMajor)==RowMajor)
|
||||
&& EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (int(Options)&RowMajor)==0)
|
||||
&& ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0))
|
||||
&& ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0))
|
||||
&& ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0))
|
||||
&& ((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0))
|
||||
&& (MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic)
|
||||
&& (MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic)
|
||||
&& (Options & (DontAlign|RowMajor)) == Options),
|
||||
INVALID_MATRIX_TEMPLATE_PARAMETERS)
|
||||
}
|
||||
|
||||
enum { IsPlainObjectBase = 1 };
|
||||
#endif
|
||||
public:
|
||||
@@ -999,11 +955,7 @@ namespace internal {
|
||||
template <typename Derived, typename OtherDerived, bool IsVector>
|
||||
struct conservative_resize_like_impl
|
||||
{
|
||||
#if EIGEN_HAS_TYPE_TRAITS
|
||||
static const bool IsRelocatable = std::is_trivially_copyable<typename Derived::Scalar>::value;
|
||||
#else
|
||||
static const bool IsRelocatable = !NumTraits<typename Derived::Scalar>::RequireInitialization;
|
||||
#endif
|
||||
static constexpr bool IsRelocatable = std::is_trivially_copyable<typename Derived::Scalar>::value;
|
||||
static void run(DenseBase<Derived>& _this, Index rows, Index cols)
|
||||
{
|
||||
if (_this.rows() == rows && _this.cols() == cols) return;
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_PRODUCT_H
|
||||
#define EIGEN_PRODUCT_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
|
||||
@@ -19,8 +21,8 @@ namespace internal {
|
||||
template<typename Lhs, typename Rhs, int Option>
|
||||
struct traits<Product<Lhs, Rhs, Option> >
|
||||
{
|
||||
typedef typename remove_all<Lhs>::type LhsCleaned;
|
||||
typedef typename remove_all<Rhs>::type RhsCleaned;
|
||||
typedef remove_all_t<Lhs> LhsCleaned;
|
||||
typedef remove_all_t<Rhs> RhsCleaned;
|
||||
typedef traits<LhsCleaned> LhsTraits;
|
||||
typedef traits<RhsCleaned> RhsTraits;
|
||||
|
||||
@@ -40,7 +42,7 @@ struct traits<Product<Lhs, Rhs, Option> >
|
||||
MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
|
||||
|
||||
// FIXME: only needed by GeneralMatrixMatrixTriangular
|
||||
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
|
||||
InnerSize = min_size_prefer_fixed(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
|
||||
|
||||
// The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
|
||||
Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
|
||||
@@ -58,8 +60,8 @@ struct traits<Product<Lhs, Rhs, Option> >
|
||||
*
|
||||
* \brief Expression of the product of two arbitrary matrices or vectors
|
||||
*
|
||||
* \tparam _Lhs the type of the left-hand side expression
|
||||
* \tparam _Rhs the type of the right-hand side expression
|
||||
* \tparam Lhs_ the type of the left-hand side expression
|
||||
* \tparam Rhs_ the type of the right-hand side expression
|
||||
*
|
||||
* This class represents an expression of the product of two arbitrary matrices.
|
||||
*
|
||||
@@ -67,16 +69,16 @@ struct traits<Product<Lhs, Rhs, Option> >
|
||||
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
|
||||
*
|
||||
*/
|
||||
template<typename _Lhs, typename _Rhs, int Option>
|
||||
class Product : public ProductImpl<_Lhs,_Rhs,Option,
|
||||
typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
|
||||
typename internal::traits<_Rhs>::StorageKind,
|
||||
internal::product_type<_Lhs,_Rhs>::ret>::ret>
|
||||
template<typename Lhs_, typename Rhs_, int Option>
|
||||
class Product : public ProductImpl<Lhs_,Rhs_,Option,
|
||||
typename internal::product_promote_storage_type<typename internal::traits<Lhs_>::StorageKind,
|
||||
typename internal::traits<Rhs_>::StorageKind,
|
||||
internal::product_type<Lhs_,Rhs_>::ret>::ret>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef _Lhs Lhs;
|
||||
typedef _Rhs Rhs;
|
||||
typedef Lhs_ Lhs;
|
||||
typedef Rhs_ Rhs;
|
||||
|
||||
typedef typename ProductImpl<
|
||||
Lhs, Rhs, Option,
|
||||
@@ -87,8 +89,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
|
||||
|
||||
typedef typename internal::ref_selector<Lhs>::type LhsNested;
|
||||
typedef typename internal::ref_selector<Rhs>::type RhsNested;
|
||||
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
||||
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
||||
typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
|
||||
typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
#ifndef EIGEN_PRODUCTEVALUATORS_H
|
||||
#define EIGEN_PRODUCTEVALUATORS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -107,14 +109,14 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
|
||||
explicit product_evaluator(const XprType& xpr)
|
||||
: m_result(xpr.rows(), xpr.cols())
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_result);
|
||||
internal::construct_at<Base>(this, m_result);
|
||||
|
||||
// FIXME shall we handle nested_eval here?,
|
||||
// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
|
||||
// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
||||
// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
||||
// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
||||
// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
||||
// typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
|
||||
// typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
|
||||
//
|
||||
// const LhsNested lhs(xpr.lhs());
|
||||
// const RhsNested rhs(xpr.rhs());
|
||||
@@ -134,7 +136,7 @@ protected:
|
||||
// Dense = Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||
std::enable_if_t<(Options==DefaultProduct || Options==AliasFreeProduct)>>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -152,7 +154,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
|
||||
// Dense += Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||
std::enable_if_t<(Options==DefaultProduct || Options==AliasFreeProduct)>>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -167,7 +169,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
||||
// Dense -= Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||
std::enable_if_t<(Options==DefaultProduct || Options==AliasFreeProduct)>>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -296,7 +298,7 @@ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, cons
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
|
||||
{
|
||||
template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
|
||||
template<typename T> struct is_row_major : std::conditional_t<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type> {};
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
|
||||
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
|
||||
@@ -370,7 +372,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
||||
typedef typename nested_eval<Rhs,1>::type RhsNested;
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
|
||||
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
|
||||
typedef internal::remove_all_t<std::conditional_t<int(Side)==OnTheRight,LhsNested,RhsNested>> MatrixType;
|
||||
|
||||
template<typename Dest>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
@@ -427,8 +429,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
||||
// 3 - it makes this fallback consistent with the heavy GEMM routine.
|
||||
// 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
|
||||
// (see https://stackoverflow.com/questions/54738495)
|
||||
// For small fixed sizes matrices, howver, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,
|
||||
// and the behavior depends also a lot on the compiler... This is why this re-writting strategy is currently
|
||||
// For small fixed sizes matrices, however, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,
|
||||
// and the behavior depends also a lot on the compiler... This is why this re-writing strategy is currently
|
||||
// enabled only when falling back from the main GEMM.
|
||||
template<typename Dst, typename Func>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@@ -448,7 +450,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
||||
blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),
|
||||
func,
|
||||
actualAlpha,
|
||||
typename conditional<HasScalarFactor,true_type,false_type>::type());
|
||||
std::conditional_t<HasScalarFactor,true_type,false_type>());
|
||||
}
|
||||
|
||||
protected:
|
||||
@@ -458,7 +460,7 @@ protected:
|
||||
void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s /* == 1 */, false_type)
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(s);
|
||||
eigen_internal_assert(s==Scalar(1));
|
||||
eigen_internal_assert(numext::is_exactly_one(s));
|
||||
call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
|
||||
}
|
||||
|
||||
@@ -526,8 +528,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
||||
typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
||||
|
||||
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
||||
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
||||
typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
|
||||
typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
|
||||
|
||||
typedef evaluator<LhsNestedCleaned> LhsEtorType;
|
||||
typedef evaluator<RhsNestedCleaned> RhsEtorType;
|
||||
@@ -535,7 +537,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
enum {
|
||||
RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
|
||||
ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
|
||||
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
|
||||
InnerSize = min_size_prefer_fixed(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
|
||||
MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
|
||||
};
|
||||
@@ -564,8 +566,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
|
||||
|
||||
// Here, we don't care about alignment larger than the usable packet size.
|
||||
LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
|
||||
RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
|
||||
LhsAlignment = plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
|
||||
RhsAlignment = plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
|
||||
|
||||
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
|
||||
|
||||
@@ -585,8 +587,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
|
||||
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
|
||||
|
||||
Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
|
||||
: bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
|
||||
Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment))!=0 ? 0 : LhsAlignment)
|
||||
: bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment))!=0 ? 0 : RhsAlignment)
|
||||
: 0,
|
||||
|
||||
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
|
||||
@@ -640,8 +642,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
}
|
||||
|
||||
protected:
|
||||
typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
|
||||
typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
|
||||
add_const_on_value_type_t<LhsNested> m_lhs;
|
||||
add_const_on_value_type_t<RhsNested> m_rhs;
|
||||
|
||||
LhsEtorType m_lhsImpl;
|
||||
RhsEtorType m_rhsImpl;
|
||||
@@ -836,22 +838,22 @@ public:
|
||||
MatrixFlags = evaluator<MatrixType>::Flags,
|
||||
DiagFlags = evaluator<DiagonalType>::Flags,
|
||||
|
||||
_StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
|
||||
StorageOrder_ = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
|
||||
: (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor
|
||||
: MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
|
||||
_SameStorageOrder = _StorageOrder == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
|
||||
SameStorageOrder_ = StorageOrder_ == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
|
||||
|
||||
_ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
|
||||
||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
|
||||
_SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
|
||||
ScalarAccessOnDiag_ = !((int(StorageOrder_) == ColMajor && int(ProductOrder) == OnTheLeft)
|
||||
||(int(StorageOrder_) == RowMajor && int(ProductOrder) == OnTheRight)),
|
||||
SameTypes_ = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
|
||||
// FIXME currently we need same types, but in the future the next rule should be the one
|
||||
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
|
||||
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit)
|
||||
&& _SameTypes
|
||||
&& (_SameStorageOrder || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
|
||||
&& (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
|
||||
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
|
||||
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
|
||||
//Vectorizable_ = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (SameTypes_ && bool(int(DiagFlags)&PacketAccessBit))),
|
||||
Vectorizable_ = bool(int(MatrixFlags)&PacketAccessBit)
|
||||
&& SameTypes_
|
||||
&& (SameStorageOrder_ || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
|
||||
&& (ScalarAccessOnDiag_ || (bool(int(DiagFlags)&PacketAccessBit))),
|
||||
LinearAccessMask_ = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
|
||||
Flags = ((HereditaryBits|LinearAccessMask_) & (unsigned int)(MatrixFlags)) | (Vectorizable_ ? PacketAccessBit : 0),
|
||||
Alignment = evaluator<MatrixType>::Alignment,
|
||||
|
||||
AsScalarProduct = (DiagonalType::SizeAtCompileTime==1)
|
||||
@@ -887,7 +889,7 @@ protected:
|
||||
{
|
||||
enum {
|
||||
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
|
||||
DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
|
||||
DiagonalPacketLoadMode = plain_enum_min(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
|
||||
};
|
||||
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
||||
m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
|
||||
@@ -913,7 +915,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
||||
typedef typename Lhs::DiagonalVectorType DiagonalType;
|
||||
|
||||
|
||||
enum { StorageOrder = Base::_StorageOrder };
|
||||
enum { StorageOrder = Base::StorageOrder_ };
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
||||
: Base(xpr.rhs(), xpr.lhs().diagonal())
|
||||
@@ -932,7 +934,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
||||
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
|
||||
// See also similar calls below.
|
||||
return this->template packet_impl<LoadMode,PacketType>(row,col, row,
|
||||
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
|
||||
std::conditional_t<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>());
|
||||
}
|
||||
|
||||
template<int LoadMode,typename PacketType>
|
||||
@@ -957,7 +959,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
||||
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
|
||||
enum { StorageOrder = Base::_StorageOrder };
|
||||
enum { StorageOrder = Base::StorageOrder_ };
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
|
||||
: Base(xpr.lhs(), xpr.rhs().diagonal())
|
||||
@@ -974,7 +976,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return this->template packet_impl<LoadMode,PacketType>(row,col, col,
|
||||
typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
|
||||
std::conditional_t<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>());
|
||||
}
|
||||
|
||||
template<int LoadMode,typename PacketType>
|
||||
@@ -1001,7 +1003,7 @@ template<typename ExpressionType, int Side, bool Transposed>
|
||||
struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
|
||||
{
|
||||
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
|
||||
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
||||
typedef remove_all_t<MatrixType> MatrixTypeCleaned;
|
||||
|
||||
template<typename Dest, typename PermutationType>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
|
||||
@@ -1109,7 +1111,7 @@ template<typename ExpressionType, int Side, bool Transposed, typename Expression
|
||||
struct transposition_matrix_product
|
||||
{
|
||||
typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
|
||||
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
|
||||
typedef remove_all_t<MatrixType> MatrixTypeCleaned;
|
||||
|
||||
template<typename Dest, typename TranspositionType>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
|
||||
@@ -1172,6 +1174,40 @@ struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShap
|
||||
}
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* skew symmetric products
|
||||
* for now we just call the generic implementation
|
||||
***************************************************************************/
|
||||
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
||||
struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, MatrixShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
generic_product_impl<typename Lhs::DenseMatrixType , Rhs, DenseShape, MatrixShape, ProductTag>::evalTo(dst, lhs, rhs);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
|
||||
struct generic_product_impl<Lhs, Rhs, MatrixShape, SkewSymmetricShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
generic_product_impl<Lhs, typename Rhs::DenseMatrixType, MatrixShape, DenseShape, ProductTag>::evalTo(dst, lhs, rhs);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int ProductTag>
|
||||
struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, SkewSymmetricShape, ProductTag>
|
||||
{
|
||||
template<typename Dest>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
generic_product_impl<typename Lhs::DenseMatrixType, typename Rhs::DenseMatrixType, DenseShape, DenseShape, ProductTag>::evalTo(dst, lhs, rhs);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -10,12 +10,13 @@
|
||||
#ifndef EIGEN_RANDOM_H
|
||||
#define EIGEN_RANDOM_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar> struct scalar_random_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)
|
||||
inline const Scalar operator() () const { return random<Scalar>(); }
|
||||
};
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_REDUX_H
|
||||
#define EIGEN_REDUX_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -198,8 +200,7 @@ struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
|
||||
Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
||||
{
|
||||
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
||||
Scalar res;
|
||||
res = eval.coeffByOuterInner(0, 0);
|
||||
Scalar res = eval.coeffByOuterInner(0, 0);
|
||||
for(Index i = 1; i < xpr.innerSize(); ++i)
|
||||
res = func(res, eval.coeffByOuterInner(0, i));
|
||||
for(Index i = 1; i < xpr.outerSize(); ++i)
|
||||
@@ -238,7 +239,7 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
|
||||
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
|
||||
enum {
|
||||
alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
|
||||
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
|
||||
alignment = plain_enum_max(alignment0, Evaluator::Alignment)
|
||||
};
|
||||
const Index alignedStart = internal::first_default_aligned(xpr);
|
||||
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
||||
@@ -353,12 +354,12 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
};
|
||||
|
||||
// evaluator adaptor
|
||||
template<typename _XprType>
|
||||
class redux_evaluator : public internal::evaluator<_XprType>
|
||||
template<typename XprType_>
|
||||
class redux_evaluator : public internal::evaluator<XprType_>
|
||||
{
|
||||
typedef internal::evaluator<_XprType> Base;
|
||||
typedef internal::evaluator<XprType_> Base;
|
||||
public:
|
||||
typedef _XprType XprType;
|
||||
typedef XprType_ XprType;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
|
||||
|
||||
|
||||
@@ -10,20 +10,22 @@
|
||||
#ifndef EIGEN_REF_H
|
||||
#define EIGEN_REF_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename _PlainObjectType, int _Options, typename _StrideType>
|
||||
struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|
||||
: public traits<Map<_PlainObjectType, _Options, _StrideType> >
|
||||
template<typename PlainObjectType_, int Options_, typename StrideType_>
|
||||
struct traits<Ref<PlainObjectType_, Options_, StrideType_> >
|
||||
: public traits<Map<PlainObjectType_, Options_, StrideType_> >
|
||||
{
|
||||
typedef _PlainObjectType PlainObjectType;
|
||||
typedef _StrideType StrideType;
|
||||
typedef PlainObjectType_ PlainObjectType;
|
||||
typedef StrideType_ StrideType;
|
||||
enum {
|
||||
Options = _Options,
|
||||
Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit,
|
||||
Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment
|
||||
Options = Options_,
|
||||
Flags = traits<Map<PlainObjectType_, Options_, StrideType_> >::Flags | NestByRefBit,
|
||||
Alignment = traits<Map<PlainObjectType_, Options_, StrideType_> >::Alignment
|
||||
};
|
||||
|
||||
template<typename Derived> struct match {
|
||||
@@ -46,7 +48,7 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|
||||
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
|
||||
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
|
||||
};
|
||||
typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
|
||||
typedef std::conditional_t<MatchAtCompileTime,internal::true_type,internal::false_type> type;
|
||||
};
|
||||
|
||||
};
|
||||
@@ -197,8 +199,8 @@ protected:
|
||||
return false;
|
||||
}
|
||||
|
||||
::new (static_cast<Base*>(this)) Base(expr.data(), rows, cols);
|
||||
::new (&m_stride) StrideBase(
|
||||
internal::construct_at<Base>(this, expr.data(), rows, cols);
|
||||
internal::construct_at(&m_stride,
|
||||
(StrideType::OuterStrideAtCompileTime == 0) ? 0 : outer_stride,
|
||||
(StrideType::InnerStrideAtCompileTime == 0) ? 0 : inner_stride );
|
||||
return true;
|
||||
@@ -285,7 +287,7 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
typedef internal::traits<Ref> Traits;
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
|
||||
std::enable_if_t<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>* = 0);
|
||||
public:
|
||||
|
||||
typedef RefBase<Ref> Base;
|
||||
@@ -295,17 +297,17 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
std::enable_if_t<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
|
||||
// Construction must pass since we will not create temprary storage in the non-const case.
|
||||
// Construction must pass since we will not create temporary storage in the non-const case.
|
||||
const bool success = Base::construct(expr.derived());
|
||||
EIGEN_UNUSED_VARIABLE(success)
|
||||
eigen_assert(success);
|
||||
}
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
std::enable_if_t<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>* = 0)
|
||||
#else
|
||||
/** Implicit constructor from any dense expression */
|
||||
template<typename Derived>
|
||||
@@ -337,7 +339,7 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
|
||||
std::enable_if_t<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>* = 0)
|
||||
{
|
||||
// std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
|
||||
// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_REPLICATE_H
|
||||
#define EIGEN_REPLICATE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -21,7 +23,7 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
|
||||
typedef typename traits<MatrixType>::StorageKind StorageKind;
|
||||
typedef typename traits<MatrixType>::XprKind XprKind;
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNested_;
|
||||
enum {
|
||||
RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
|
||||
? Dynamic
|
||||
@@ -62,19 +64,19 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
||||
: public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
|
||||
{
|
||||
typedef typename internal::traits<Replicate>::MatrixTypeNested MatrixTypeNested;
|
||||
typedef typename internal::traits<Replicate>::_MatrixTypeNested _MatrixTypeNested;
|
||||
typedef typename internal::traits<Replicate>::MatrixTypeNested_ MatrixTypeNested_;
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<Replicate>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
typedef internal::remove_all_t<MatrixType> NestedExpression;
|
||||
|
||||
template<typename OriginalMatrixType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline explicit Replicate(const OriginalMatrixType& matrix)
|
||||
: m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<std::remove_const_t<MatrixType>,OriginalMatrixType>::value),
|
||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
|
||||
eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
|
||||
}
|
||||
@@ -84,7 +86,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
||||
inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
|
||||
: m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<std::remove_const_t<MatrixType>,OriginalMatrixType>::value),
|
||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
|
||||
}
|
||||
|
||||
@@ -94,7 +96,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
||||
inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _MatrixTypeNested& nestedExpression() const
|
||||
const MatrixTypeNested_& nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_RESHAPED_H
|
||||
#define EIGEN_RESHAPED_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Reshaped
|
||||
@@ -27,10 +29,9 @@ namespace Eigen {
|
||||
* It is the return type of DenseBase::reshaped(NRowsType,NColsType) and
|
||||
* most of the time this is the only way it is used.
|
||||
*
|
||||
* However, in C++98, if you want to directly maniputate reshaped expressions,
|
||||
* for instance if you want to write a function returning such an expression, you
|
||||
* will need to use this class. In C++11, it is advised to use the \em auto
|
||||
* keyword for such use cases.
|
||||
* If you want to directly manipulate reshaped expressions,
|
||||
* for instance if you want to write a function returning such an expression,
|
||||
* it is advised to use the \em auto keyword for such use cases.
|
||||
*
|
||||
* Here is an example illustrating the dynamic case:
|
||||
* \include class_Reshaped.cpp
|
||||
@@ -156,7 +157,7 @@ class ReshapedImpl_dense<XprType,Rows,Cols,Order,false>
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ReshapedImpl_dense)
|
||||
|
||||
typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested;
|
||||
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
||||
typedef internal::remove_all_t<XprType> NestedExpression;
|
||||
|
||||
class InnerIterator;
|
||||
|
||||
@@ -186,12 +187,12 @@ class ReshapedImpl_dense<XprType,Rows,Cols,Order,false>
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<XprType>::type&
|
||||
const internal::remove_all_t<XprType>&
|
||||
nestedExpression() const { return m_xpr; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::remove_reference<XprType>::type&
|
||||
std::remove_reference_t<XprType>&
|
||||
nestedExpression() { return m_xpr; }
|
||||
|
||||
protected:
|
||||
@@ -231,7 +232,7 @@ class ReshapedImpl_dense<XprType, Rows, Cols, Order, true>
|
||||
{}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
|
||||
const internal::remove_all_t<XprTypeNested>& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
@@ -250,7 +251,7 @@ class ReshapedImpl_dense<XprType, Rows, Cols, Order, true>
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return ((Flags&RowMajorBit)==RowMajorBit) ? this->cols() : this->rows();
|
||||
return (((Flags&RowMajorBit)==RowMajorBit) ? this->cols() : this->rows()) * m_xpr.innerStride();
|
||||
}
|
||||
|
||||
protected:
|
||||
@@ -324,7 +325,7 @@ struct reshaped_evaluator<ArgType, Rows, Cols, Order, /* HasDirectAccess */ fals
|
||||
|
||||
typedef std::pair<Index, Index> RowCol;
|
||||
|
||||
inline RowCol index_remap(Index rowId, Index colId) const
|
||||
EIGEN_DEVICE_FUNC inline RowCol index_remap(Index rowId, Index colId) const
|
||||
{
|
||||
if(Order==ColMajor)
|
||||
{
|
||||
@@ -443,7 +444,7 @@ struct reshaped_evaluator<ArgType, Rows, Cols, Order, /* HasDirectAccess */ true
|
||||
: mapbase_evaluator<XprType, typename XprType::PlainObject>(xpr)
|
||||
{
|
||||
// TODO: for the 3.4 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
|
||||
eigen_assert(((internal::UIntPtr(xpr.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
|
||||
eigen_assert(((internal::UIntPtr(xpr.data()) % plain_enum_max(1, evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_RETURNBYVALUE_H
|
||||
#define EIGEN_RETURNBYVALUE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -104,7 +106,7 @@ struct evaluator<ReturnByValue<Derived> >
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
: m_result(xpr.rows(), xpr.cols())
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_result);
|
||||
internal::construct_at<Base>(this, m_result);
|
||||
xpr.evalTo(m_result);
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
#ifndef EIGEN_REVERSE_H
|
||||
#define EIGEN_REVERSE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -24,13 +26,13 @@ struct traits<Reverse<MatrixType, Direction> >
|
||||
typedef typename traits<MatrixType>::StorageKind StorageKind;
|
||||
typedef typename traits<MatrixType>::XprKind XprKind;
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNested_;
|
||||
enum {
|
||||
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
|
||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||
Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
|
||||
Flags = MatrixTypeNested_::Flags & (RowMajorBit | LvalueBit)
|
||||
};
|
||||
};
|
||||
|
||||
@@ -67,7 +69,7 @@ template<typename MatrixType, int Direction> class Reverse
|
||||
|
||||
typedef typename internal::dense_xpr_base<Reverse>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
typedef internal::remove_all_t<MatrixType> NestedExpression;
|
||||
using Base::IsRowMajor;
|
||||
|
||||
protected:
|
||||
@@ -99,7 +101,7 @@ template<typename MatrixType, int Direction> class Reverse
|
||||
return -m_matrix.innerStride();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
EIGEN_DEVICE_FUNC const internal::remove_all_t<typename MatrixType::Nested>&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
@@ -173,10 +175,10 @@ struct vectorwise_reverse_inplace_impl<Vertical>
|
||||
template<typename ExpressionType>
|
||||
static void run(ExpressionType &xpr)
|
||||
{
|
||||
const int HalfAtCompileTime = ExpressionType::RowsAtCompileTime==Dynamic?Dynamic:ExpressionType::RowsAtCompileTime/2;
|
||||
constexpr Index HalfAtCompileTime = ExpressionType::RowsAtCompileTime==Dynamic?Dynamic:ExpressionType::RowsAtCompileTime/2;
|
||||
Index half = xpr.rows()/2;
|
||||
xpr.topRows(fix<HalfAtCompileTime>(half))
|
||||
.swap(xpr.bottomRows(fix<HalfAtCompileTime>(half)).colwise().reverse());
|
||||
xpr.template topRows<HalfAtCompileTime>(half)
|
||||
.swap(xpr.template bottomRows<HalfAtCompileTime>(half).colwise().reverse());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -186,10 +188,10 @@ struct vectorwise_reverse_inplace_impl<Horizontal>
|
||||
template<typename ExpressionType>
|
||||
static void run(ExpressionType &xpr)
|
||||
{
|
||||
const int HalfAtCompileTime = ExpressionType::ColsAtCompileTime==Dynamic?Dynamic:ExpressionType::ColsAtCompileTime/2;
|
||||
constexpr Index HalfAtCompileTime = ExpressionType::ColsAtCompileTime==Dynamic?Dynamic:ExpressionType::ColsAtCompileTime/2;
|
||||
Index half = xpr.cols()/2;
|
||||
xpr.leftCols(fix<HalfAtCompileTime>(half))
|
||||
.swap(xpr.rightCols(fix<HalfAtCompileTime>(half)).rowwise().reverse());
|
||||
xpr.template leftCols<HalfAtCompileTime>(half)
|
||||
.swap(xpr.template rightCols<HalfAtCompileTime>(half).rowwise().reverse());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_SELECT_H
|
||||
#define EIGEN_SELECT_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Select
|
||||
@@ -17,9 +19,9 @@ namespace Eigen {
|
||||
*
|
||||
* \brief Expression of a coefficient wise version of the C++ ternary operator ?:
|
||||
*
|
||||
* \param ConditionMatrixType the type of the \em condition expression which must be a boolean matrix
|
||||
* \param ThenMatrixType the type of the \em then expression
|
||||
* \param ElseMatrixType the type of the \em else expression
|
||||
* \tparam ConditionMatrixType the type of the \em condition expression which must be a boolean matrix
|
||||
* \tparam ThenMatrixType the type of the \em then expression
|
||||
* \tparam ElseMatrixType the type of the \em else expression
|
||||
*
|
||||
* This class represents an expression of a coefficient wise version of the C++ ternary operator ?:.
|
||||
* It is the return type of DenseBase::select() and most of the time this is the only way it is used.
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_SELFADJOINTMATRIX_H
|
||||
#define EIGEN_SELFADJOINTMATRIX_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class SelfAdjointView
|
||||
@@ -18,8 +20,8 @@ namespace Eigen {
|
||||
*
|
||||
* \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
|
||||
*
|
||||
* \param MatrixType the type of the dense matrix storing the coefficients
|
||||
* \param TriangularPart can be either \c #Lower or \c #Upper
|
||||
* \tparam MatrixType the type of the dense matrix storing the coefficients
|
||||
* \tparam TriangularPart can be either \c #Lower or \c #Upper
|
||||
*
|
||||
* This class is an expression of a sefladjoint matrix from a triangular part of a matrix
|
||||
* with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
|
||||
@@ -33,7 +35,7 @@ template<typename MatrixType, unsigned int UpLo>
|
||||
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
|
||||
{
|
||||
typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
||||
typedef remove_all_t<MatrixTypeNested> MatrixTypeNestedCleaned;
|
||||
typedef MatrixType ExpressionType;
|
||||
typedef typename MatrixType::PlainObject FullMatrixType;
|
||||
enum {
|
||||
@@ -46,12 +48,13 @@ struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
|
||||
}
|
||||
|
||||
|
||||
template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
: public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
|
||||
template<typename MatrixType_, unsigned int UpLo> class SelfAdjointView
|
||||
: public TriangularBase<SelfAdjointView<MatrixType_, UpLo> >
|
||||
{
|
||||
public:
|
||||
EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY)
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef MatrixType_ MatrixType;
|
||||
typedef TriangularBase<SelfAdjointView> Base;
|
||||
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
|
||||
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
|
||||
@@ -60,8 +63,8 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
/** \brief The type of coefficients in this matrix */
|
||||
typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
|
||||
typedef SelfAdjointView<typename internal::add_const<MatrixType>::type, UpLo> ConstSelfAdjointView;
|
||||
typedef internal::remove_all_t<typename MatrixType::ConjugateReturnType> MatrixConjugateReturnType;
|
||||
typedef SelfAdjointView<std::add_const_t<MatrixType>, UpLo> ConstSelfAdjointView;
|
||||
|
||||
enum {
|
||||
Mode = internal::traits<SelfAdjointView>::Mode,
|
||||
@@ -71,10 +74,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
typedef typename MatrixType::PlainObject PlainObject;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
|
||||
}
|
||||
explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) { }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
inline Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
|
||||
@@ -180,16 +180,16 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
*/
|
||||
template<unsigned int TriMode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
|
||||
TriangularView<MatrixType,TriMode>,
|
||||
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type
|
||||
std::conditional_t<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
|
||||
TriangularView<MatrixType,TriMode>,
|
||||
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >
|
||||
triangularView() const
|
||||
{
|
||||
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType>::type tmp1(m_matrix);
|
||||
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType>::type tmp2(tmp1);
|
||||
return typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
|
||||
TriangularView<MatrixType,TriMode>,
|
||||
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
|
||||
std::conditional_t<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType> tmp1(m_matrix);
|
||||
std::conditional_t<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType> tmp2(tmp1);
|
||||
return std::conditional_t<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
|
||||
TriangularView<MatrixType,TriMode>,
|
||||
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >(tmp2);
|
||||
}
|
||||
|
||||
typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
|
||||
@@ -203,10 +203,10 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
*/
|
||||
template<bool Cond>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline typename internal::conditional<Cond,ConjugateReturnType,ConstSelfAdjointView>::type
|
||||
inline std::conditional_t<Cond,ConjugateReturnType,ConstSelfAdjointView>
|
||||
conjugateIf() const
|
||||
{
|
||||
typedef typename internal::conditional<Cond,ConjugateReturnType,ConstSelfAdjointView>::type ReturnType;
|
||||
typedef std::conditional_t<Cond,ConjugateReturnType,ConstSelfAdjointView> ReturnType;
|
||||
return ReturnType(m_matrix.template conjugateIf<Cond>());
|
||||
}
|
||||
|
||||
@@ -218,10 +218,10 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
|
||||
typedef SelfAdjointView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() */
|
||||
template<class Dummy=int>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TransposeReturnType transpose()
|
||||
inline TransposeReturnType transpose(std::enable_if_t<Eigen::internal::is_lvalue<MatrixType>::value, Dummy*> = nullptr)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
typename MatrixType::TransposeReturnType tmp(m_matrix);
|
||||
return TransposeReturnType(tmp);
|
||||
}
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_SELFCWISEBINARYOP_H
|
||||
#define EIGEN_SELFCWISEBINARYOP_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// TODO generalize the scalar type of 'other'
|
||||
|
||||
412
libs/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h
Normal file
412
libs/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h
Normal file
@@ -0,0 +1,412 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SKEWSYMMETRICMATRIX3_H
|
||||
#define EIGEN_SKEWSYMMETRICMATRIX3_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class SkewSymmetricBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for skew symmetric matrices and expressions
|
||||
*
|
||||
* This is the base class that is inherited by SkewSymmetricMatrix3 and related expression
|
||||
* types, which internally use a three vector for storing the entries. SkewSymmetric
|
||||
* types always represent square three times three matrices.
|
||||
*
|
||||
* This implementations follows class DiagonalMatrix
|
||||
*
|
||||
* \tparam Derived is the derived type, a SkewSymmetricMatrix3 or SkewSymmetricWrapper.
|
||||
*
|
||||
* \sa class SkewSymmetricMatrix3, class SkewSymmetricWrapper
|
||||
*/
|
||||
template<typename Derived>
|
||||
class SkewSymmetricBase : public EigenBase<Derived>
|
||||
{
|
||||
public:
|
||||
typedef typename internal::traits<Derived>::SkewSymmetricVectorType SkewSymmetricVectorType;
|
||||
typedef typename SkewSymmetricVectorType::Scalar Scalar;
|
||||
typedef typename SkewSymmetricVectorType::RealScalar RealScalar;
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime,
|
||||
ColsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime,
|
||||
MaxRowsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime,
|
||||
MaxColsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime,
|
||||
IsVectorAtCompileTime = 0,
|
||||
Flags = NoPreferredStorageOrderBit
|
||||
};
|
||||
|
||||
typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
|
||||
typedef DenseMatrixType DenseType;
|
||||
typedef SkewSymmetricMatrix3<Scalar> PlainObject;
|
||||
|
||||
/** \returns a reference to the derived object. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
/** \returns a const reference to the derived object. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
|
||||
/**
|
||||
* Constructs a dense matrix from \c *this. Note, this directly returns a dense matrix type,
|
||||
* not an expression.
|
||||
* \returns A dense matrix, with its entries set from the the derived object. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseMatrixType toDenseMatrix() const { return derived(); }
|
||||
|
||||
/** Determinant vanishes */
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
inline Scalar determinant() const { return 0; }
|
||||
|
||||
/** A.transpose() = -A */
|
||||
EIGEN_DEVICE_FUNC
|
||||
PlainObject transpose() const { return (-vector()).asSkewSymmetric(); }
|
||||
|
||||
/** \returns the exponential of this matrix using Rodrigues’ formula */
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseMatrixType exponential() const {
|
||||
DenseMatrixType retVal = DenseMatrixType::Identity();
|
||||
const SkewSymmetricVectorType& v = vector();
|
||||
if (v.isZero()) {
|
||||
return retVal;
|
||||
}
|
||||
const Scalar norm2 = v.squaredNorm();
|
||||
const Scalar norm = numext::sqrt(norm2);
|
||||
retVal += ((((1 - numext::cos(norm))/norm2)*derived())*derived()) + (numext::sin(norm)/norm)*derived().toDenseMatrix();
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/** \returns a reference to the derived object's vector of coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const SkewSymmetricVectorType& vector() const { return derived().vector(); }
|
||||
/** \returns a const reference to the derived object's vector of coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SkewSymmetricVectorType& vector() { return derived().vector(); }
|
||||
|
||||
/** \returns the number of rows. */
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
inline Index rows() const { return 3; }
|
||||
/** \returns the number of columns. */
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
inline Index cols() const { return 3; }
|
||||
|
||||
/** \returns the matrix product of \c *this by the dense matrix, \a matrix */
|
||||
template<typename MatrixDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Product<Derived,MatrixDerived,LazyProduct>
|
||||
operator*(const MatrixBase<MatrixDerived> &matrix) const
|
||||
{
|
||||
return Product<Derived, MatrixDerived, LazyProduct>(derived(), matrix.derived());
|
||||
}
|
||||
|
||||
/** \returns the matrix product of \c *this by the skew symmetric matrix, \a matrix */
|
||||
template<typename MatrixDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Product<Derived,MatrixDerived,LazyProduct>
|
||||
operator*(const SkewSymmetricBase<MatrixDerived> &matrix) const
|
||||
{
|
||||
return Product<Derived, MatrixDerived, LazyProduct>(derived(), matrix.derived());
|
||||
}
|
||||
|
||||
template <typename OtherDerived>
|
||||
using SkewSymmetricProductReturnType = SkewSymmetricWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
|
||||
SkewSymmetricVectorType, typename OtherDerived::SkewSymmetricVectorType, product)>;
|
||||
|
||||
/** \returns the wedge product of \c *this by the skew symmetric matrix \a other
|
||||
* A wedge B = AB - BA */
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC SkewSymmetricProductReturnType<OtherDerived> wedge(
|
||||
const SkewSymmetricBase<OtherDerived>& other) const {
|
||||
return vector().cross(other.vector()).asSkewSymmetric();
|
||||
}
|
||||
|
||||
using SkewSymmetricScaleReturnType =
|
||||
SkewSymmetricWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(SkewSymmetricVectorType, Scalar, product)>;
|
||||
|
||||
/** \returns the product of \c *this by the scalar \a scalar */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SkewSymmetricScaleReturnType operator*(const Scalar& scalar) const {
|
||||
return (vector() * scalar).asSkewSymmetric();
|
||||
}
|
||||
|
||||
using ScaleSkewSymmetricReturnType =
|
||||
SkewSymmetricWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar, SkewSymmetricVectorType, product)>;
|
||||
|
||||
/** \returns the product of a scalar and the skew symmetric matrix \a other */
|
||||
EIGEN_DEVICE_FUNC
|
||||
friend inline ScaleSkewSymmetricReturnType operator*(const Scalar& scalar, const SkewSymmetricBase& other) {
|
||||
return (scalar * other.vector()).asSkewSymmetric();
|
||||
}
|
||||
|
||||
template <typename OtherDerived>
|
||||
using SkewSymmetricSumReturnType = SkewSymmetricWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
|
||||
SkewSymmetricVectorType, typename OtherDerived::SkewSymmetricVectorType, sum)>;
|
||||
|
||||
/** \returns the sum of \c *this and the skew symmetric matrix \a other */
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC inline SkewSymmetricSumReturnType<OtherDerived> operator+(
|
||||
const SkewSymmetricBase<OtherDerived>& other) const {
|
||||
return (vector() + other.vector()).asSkewSymmetric();
|
||||
}
|
||||
|
||||
template <typename OtherDerived>
|
||||
using SkewSymmetricDifferenceReturnType = SkewSymmetricWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
|
||||
SkewSymmetricVectorType, typename OtherDerived::SkewSymmetricVectorType, difference)>;
|
||||
|
||||
/** \returns the difference of \c *this and the skew symmetric matrix \a other */
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC inline SkewSymmetricDifferenceReturnType<OtherDerived> operator-(
|
||||
const SkewSymmetricBase<OtherDerived>& other) const {
|
||||
return (vector() - other.vector()).asSkewSymmetric();
|
||||
}
|
||||
};
|
||||
|
||||
/** \class SkewSymmetricMatrix3
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a 3x3 skew symmetric matrix with its storage
|
||||
*
|
||||
* \tparam Scalar_ the type of coefficients
|
||||
*
|
||||
* \sa class SkewSymmetricBase, class SkewSymmetricWrapper
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename Scalar_>
|
||||
struct traits<SkewSymmetricMatrix3<Scalar_> >
|
||||
: traits<Matrix<Scalar_,3,3,0,3,3> >
|
||||
{
|
||||
typedef Matrix<Scalar_,3,1,0,3,1> SkewSymmetricVectorType;
|
||||
typedef SkewSymmetricShape StorageKind;
|
||||
enum {
|
||||
Flags = LvalueBit | NoPreferredStorageOrderBit | NestByRefBit
|
||||
};
|
||||
};
|
||||
}
|
||||
template<typename Scalar_>
|
||||
class SkewSymmetricMatrix3
|
||||
: public SkewSymmetricBase<SkewSymmetricMatrix3<Scalar_> >
|
||||
{
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename internal::traits<SkewSymmetricMatrix3>::SkewSymmetricVectorType SkewSymmetricVectorType;
|
||||
typedef const SkewSymmetricMatrix3& Nested;
|
||||
typedef Scalar_ Scalar;
|
||||
typedef typename internal::traits<SkewSymmetricMatrix3>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<SkewSymmetricMatrix3>::StorageIndex StorageIndex;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
SkewSymmetricVectorType m_vector;
|
||||
|
||||
public:
|
||||
|
||||
/** const version of vector(). */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const SkewSymmetricVectorType& vector() const { return m_vector; }
|
||||
/** \returns a reference to the stored vector of coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SkewSymmetricVectorType& vector() { return m_vector; }
|
||||
|
||||
/** Default constructor without initialization */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SkewSymmetricMatrix3() {}
|
||||
|
||||
/** Constructor from three scalars */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SkewSymmetricMatrix3(const Scalar& x, const Scalar& y, const Scalar& z) : m_vector(x,y,z) {}
|
||||
|
||||
/** \brief Constructs a SkewSymmetricMatrix3 from an r-value vector type */
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline SkewSymmetricMatrix3(SkewSymmetricVectorType&& vec) : m_vector(std::move(vec)) {}
|
||||
|
||||
/** generic constructor from expression of the coefficients */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline SkewSymmetricMatrix3(const MatrixBase<OtherDerived>& other) : m_vector(other)
|
||||
{}
|
||||
|
||||
/** Copy constructor. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline SkewSymmetricMatrix3(const SkewSymmetricBase<OtherDerived>& other) : m_vector(other.vector()) {}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** copy constructor. prevent a default copy constructor from hiding the other templated constructor */
|
||||
inline SkewSymmetricMatrix3(const SkewSymmetricMatrix3& other) : m_vector(other.vector()) {}
|
||||
#endif
|
||||
|
||||
/** Copy operator. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
SkewSymmetricMatrix3& operator=(const SkewSymmetricBase<OtherDerived>& other)
|
||||
{
|
||||
m_vector = other.vector();
|
||||
return *this;
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
SkewSymmetricMatrix3& operator=(const SkewSymmetricMatrix3& other)
|
||||
{
|
||||
m_vector = other.vector();
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef SkewSymmetricWrapper<const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, SkewSymmetricVectorType>>
|
||||
InitializeReturnType;
|
||||
|
||||
/** Initializes a skew symmetric matrix with coefficients set to zero */
|
||||
EIGEN_DEVICE_FUNC
|
||||
static InitializeReturnType Zero() { return SkewSymmetricVectorType::Zero().asSkewSymmetric(); }
|
||||
|
||||
/** Sets all coefficients to zero. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setZero() { m_vector.setZero(); }
|
||||
};
|
||||
|
||||
/** \class SkewSymmetricWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a skew symmetric matrix
|
||||
*
|
||||
* \tparam SkewSymmetricVectorType_ the type of the vector of coefficients
|
||||
*
|
||||
* This class is an expression of a skew symmetric matrix, but not storing its own vector of coefficients,
|
||||
* instead wrapping an existing vector expression. It is the return type of MatrixBase::asSkewSymmetric()
|
||||
* and most of the time this is the only way that it is used.
|
||||
*
|
||||
* \sa class SkewSymmetricMatrix3, class SkewSymmetricBase, MatrixBase::asSkewSymmetric()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename SkewSymmetricVectorType_>
|
||||
struct traits<SkewSymmetricWrapper<SkewSymmetricVectorType_> >
|
||||
{
|
||||
typedef SkewSymmetricVectorType_ SkewSymmetricVectorType;
|
||||
typedef typename SkewSymmetricVectorType::Scalar Scalar;
|
||||
typedef typename SkewSymmetricVectorType::StorageIndex StorageIndex;
|
||||
typedef SkewSymmetricShape StorageKind;
|
||||
typedef typename traits<SkewSymmetricVectorType>::XprKind XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime,
|
||||
ColsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime,
|
||||
MaxRowsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime,
|
||||
MaxColsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime,
|
||||
Flags = (traits<SkewSymmetricVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename SkewSymmetricVectorType_>
|
||||
class SkewSymmetricWrapper
|
||||
: public SkewSymmetricBase<SkewSymmetricWrapper<SkewSymmetricVectorType_> >, internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef SkewSymmetricVectorType_ SkewSymmetricVectorType;
|
||||
typedef SkewSymmetricWrapper Nested;
|
||||
#endif
|
||||
|
||||
/** Constructor from expression of coefficients to wrap. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline SkewSymmetricWrapper(SkewSymmetricVectorType& a_vector) : m_vector(a_vector) {}
|
||||
|
||||
/** \returns a const reference to the wrapped expression of coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const SkewSymmetricVectorType& vector() const { return m_vector; }
|
||||
|
||||
protected:
|
||||
typename SkewSymmetricVectorType::Nested m_vector;
|
||||
};
|
||||
|
||||
/** \returns a pseudo-expression of a skew symmetric matrix with *this as vector of coefficients
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \sa class SkewSymmetricWrapper, class SkewSymmetricMatrix3, vector(), isSkewSymmetric()
|
||||
**/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline const SkewSymmetricWrapper<const Derived>
|
||||
MatrixBase<Derived>::asSkewSymmetric() const
|
||||
{
|
||||
return SkewSymmetricWrapper<const Derived>(derived());
|
||||
}
|
||||
|
||||
/** \returns true if *this is approximately equal to a skew symmetric matrix,
|
||||
* within the precision given by \a prec.
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isSkewSymmetric(const RealScalar& prec) const
|
||||
{
|
||||
if(cols() != rows()) return false;
|
||||
return (this->transpose() + *this).isZero(prec);
|
||||
}
|
||||
|
||||
/** \returns the matrix product of \c *this by the skew symmetric matrix \skew.
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename SkewDerived>
|
||||
EIGEN_DEVICE_FUNC inline const Product<Derived, SkewDerived, LazyProduct>
|
||||
MatrixBase<Derived>::operator*(const SkewSymmetricBase<SkewDerived> &skew) const
|
||||
{
|
||||
return Product<Derived, SkewDerived, LazyProduct>(derived(), skew.derived());
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> struct storage_kind_to_shape<SkewSymmetricShape> { typedef SkewSymmetricShape Shape; };
|
||||
|
||||
struct SkewSymmetric2Dense {};
|
||||
|
||||
template<> struct AssignmentKind<DenseShape,SkewSymmetricShape> { typedef SkewSymmetric2Dense Kind; };
|
||||
|
||||
// SkewSymmetric matrix to Dense assignment
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, SkewSymmetric2Dense>
|
||||
{
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{
|
||||
if((dst.rows()!=3) || (dst.cols()!=3)) {
|
||||
dst.resize(3, 3);
|
||||
}
|
||||
dst.diagonal().setZero();
|
||||
const typename SrcXprType::SkewSymmetricVectorType v = src.vector();
|
||||
dst(0, 1) = -v(2);
|
||||
dst(1, 0) = v(2);
|
||||
dst(0, 2) = v(1);
|
||||
dst(2, 0) = -v(1);
|
||||
dst(1, 2) = -v(0);
|
||||
dst(2, 1) = v(0);
|
||||
}
|
||||
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{ dst.vector() += src.vector(); }
|
||||
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{ dst.vector() -= src.vector(); }
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SKEWSYMMETRICMATRIX3_H
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_SOLVE_H
|
||||
#define EIGEN_SOLVE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl;
|
||||
@@ -77,7 +79,7 @@ public:
|
||||
|
||||
protected:
|
||||
const Decomposition &m_dec;
|
||||
const RhsType &m_rhs;
|
||||
const typename internal::ref_selector<RhsType>::type m_rhs;
|
||||
};
|
||||
|
||||
|
||||
@@ -123,7 +125,7 @@ struct evaluator<Solve<Decomposition,RhsType> >
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
|
||||
: m_result(solve.rows(), solve.cols())
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_result);
|
||||
internal::construct_at<Base>(this, m_result);
|
||||
solve.dec()._solve_impl(solve.rhs(), m_result);
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_SOLVETRIANGULAR_H
|
||||
#define EIGEN_SOLVETRIANGULAR_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -87,7 +89,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
|
||||
|
||||
static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
|
||||
{
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
|
||||
add_const_on_value_type_t<ActualLhsType> actualLhs = LhsProductTraits::extract(lhs);
|
||||
|
||||
const Index size = lhs.rows();
|
||||
const Index othersize = Side==OnTheLeft? rhs.cols() : rhs.rows();
|
||||
@@ -174,11 +176,11 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(c
|
||||
return;
|
||||
|
||||
enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
|
||||
typedef typename internal::conditional<copy,
|
||||
typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
|
||||
typedef std::conditional_t<copy,
|
||||
typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&> OtherCopy;
|
||||
OtherCopy otherCopy(other);
|
||||
|
||||
internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
|
||||
internal::triangular_solver_selector<MatrixType, std::remove_reference_t<OtherCopy>,
|
||||
Side, Mode>::run(derived().nestedExpression(), otherCopy);
|
||||
|
||||
if (copy)
|
||||
@@ -206,7 +208,7 @@ struct traits<triangular_solve_retval<Side, TriangularType, Rhs> >
|
||||
template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval
|
||||
: public ReturnByValue<triangular_solve_retval<Side, TriangularType, Rhs> >
|
||||
{
|
||||
typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
|
||||
typedef remove_all_t<typename Rhs::Nested> RhsNestedCleaned;
|
||||
typedef ReturnByValue<triangular_solve_retval> Base;
|
||||
|
||||
triangular_solve_retval(const TriangularType& tri, const Rhs& rhs)
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_SOLVERBASE_H
|
||||
#define EIGEN_SOLVERBASE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -28,7 +30,7 @@ struct solve_assertion<Transpose<Derived> >
|
||||
template<bool Transpose_, typename Rhs>
|
||||
static void run(const type& transpose, const Rhs& b)
|
||||
{
|
||||
internal::solve_assertion<typename internal::remove_all<Derived>::type>::template run<true>(transpose.nestedExpression(), b);
|
||||
internal::solve_assertion<internal::remove_all_t<Derived>>::template run<true>(transpose.nestedExpression(), b);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -40,7 +42,7 @@ struct solve_assertion<CwiseUnaryOp<Eigen::internal::scalar_conjugate_op<Scalar>
|
||||
template<bool Transpose_, typename Rhs>
|
||||
static void run(const type& adjoint, const Rhs& b)
|
||||
{
|
||||
internal::solve_assertion<typename internal::remove_all<Transpose<Derived> >::type>::template run<true>(adjoint.nestedExpression(), b);
|
||||
internal::solve_assertion<internal::remove_all_t<Transpose<Derived> >>::template run<true>(adjoint.nestedExpression(), b);
|
||||
}
|
||||
};
|
||||
} // end namespace internal
|
||||
@@ -79,12 +81,11 @@ class SolverBase : public EigenBase<Derived>
|
||||
enum {
|
||||
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
||||
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime>::ret),
|
||||
SizeAtCompileTime = (internal::size_of_xpr_at_compile_time<Derived>::ret),
|
||||
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
|
||||
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime>::ret),
|
||||
MaxSizeAtCompileTime = internal::size_at_compile_time(internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime),
|
||||
IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
|
||||
|| internal::traits<Derived>::MaxColsAtCompileTime == 1,
|
||||
NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2
|
||||
@@ -105,12 +106,12 @@ class SolverBase : public EigenBase<Derived>
|
||||
inline const Solve<Derived, Rhs>
|
||||
solve(const MatrixBase<Rhs>& b) const
|
||||
{
|
||||
internal::solve_assertion<typename internal::remove_all<Derived>::type>::template run<false>(derived(), b);
|
||||
internal::solve_assertion<internal::remove_all_t<Derived>>::template run<false>(derived(), b);
|
||||
return Solve<Derived, Rhs>(derived(), b.derived());
|
||||
}
|
||||
|
||||
/** \internal the return type of transpose() */
|
||||
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
|
||||
typedef Transpose<const Derived> ConstTransposeReturnType;
|
||||
/** \returns an expression of the transposed of the factored matrix.
|
||||
*
|
||||
* A typical usage is to solve for the transposed problem A^T x = b:
|
||||
@@ -118,16 +119,16 @@ class SolverBase : public EigenBase<Derived>
|
||||
*
|
||||
* \sa adjoint(), solve()
|
||||
*/
|
||||
inline ConstTransposeReturnType transpose() const
|
||||
inline const ConstTransposeReturnType transpose() const
|
||||
{
|
||||
return ConstTransposeReturnType(derived());
|
||||
}
|
||||
|
||||
/** \internal the return type of adjoint() */
|
||||
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
|
||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
|
||||
ConstTransposeReturnType
|
||||
>::type AdjointReturnType;
|
||||
typedef std::conditional_t<NumTraits<Scalar>::IsComplex,
|
||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const ConstTransposeReturnType>,
|
||||
const ConstTransposeReturnType
|
||||
> AdjointReturnType;
|
||||
/** \returns an expression of the adjoint of the factored matrix
|
||||
*
|
||||
* A typical usage is to solve for the adjoint problem A' x = b:
|
||||
@@ -137,7 +138,7 @@ class SolverBase : public EigenBase<Derived>
|
||||
*
|
||||
* \sa transpose(), solve()
|
||||
*/
|
||||
inline AdjointReturnType adjoint() const
|
||||
inline const AdjointReturnType adjoint() const
|
||||
{
|
||||
return AdjointReturnType(derived().transpose());
|
||||
}
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_STABLENORM_H
|
||||
#define EIGEN_STABLENORM_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -57,7 +59,7 @@ void stable_norm_impl_inner_step(const VectorType &vec, RealScalar& ssq, RealSca
|
||||
const Index blockSize = 4096;
|
||||
|
||||
typedef typename internal::nested_eval<VectorType,2>::type VectorTypeCopy;
|
||||
typedef typename internal::remove_all<VectorTypeCopy>::type VectorTypeCopyClean;
|
||||
typedef internal::remove_all_t<VectorTypeCopy> VectorTypeCopyClean;
|
||||
const VectorTypeCopy copy(vec);
|
||||
|
||||
enum {
|
||||
@@ -66,8 +68,8 @@ void stable_norm_impl_inner_step(const VectorType &vec, RealScalar& ssq, RealSca
|
||||
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
|
||||
&& (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
|
||||
};
|
||||
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<VectorTypeCopyClean>::Alignment>,
|
||||
typename VectorTypeCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
|
||||
typedef std::conditional_t<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<VectorTypeCopyClean>::Alignment>,
|
||||
typename VectorTypeCopyClean::ConstSegmentReturnType> SegmentWrapper;
|
||||
Index n = vec.size();
|
||||
|
||||
Index bi = internal::first_default_aligned(copy);
|
||||
@@ -79,7 +81,7 @@ void stable_norm_impl_inner_step(const VectorType &vec, RealScalar& ssq, RealSca
|
||||
|
||||
template<typename VectorType>
|
||||
typename VectorType::RealScalar
|
||||
stable_norm_impl(const VectorType &vec, typename enable_if<VectorType::IsVectorAtCompileTime>::type* = 0 )
|
||||
stable_norm_impl(const VectorType &vec, std::enable_if_t<VectorType::IsVectorAtCompileTime>* = 0 )
|
||||
{
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
@@ -101,7 +103,7 @@ stable_norm_impl(const VectorType &vec, typename enable_if<VectorType::IsVectorA
|
||||
|
||||
template<typename MatrixType>
|
||||
typename MatrixType::RealScalar
|
||||
stable_norm_impl(const MatrixType &mat, typename enable_if<!MatrixType::IsVectorAtCompileTime>::type* = 0 )
|
||||
stable_norm_impl(const MatrixType &mat, std::enable_if_t<!MatrixType::IsVectorAtCompileTime>* = 0 )
|
||||
{
|
||||
using std::sqrt;
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_STLITERATORS_H
|
||||
#define EIGEN_STLITERATORS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -25,7 +27,7 @@ protected:
|
||||
typedef typename traits::XprType XprType;
|
||||
typedef indexed_based_stl_iterator_base<typename traits::non_const_iterator> non_const_iterator;
|
||||
typedef indexed_based_stl_iterator_base<typename traits::const_iterator> const_iterator;
|
||||
typedef typename internal::conditional<internal::is_const<XprType>::value,non_const_iterator,const_iterator>::type other_iterator;
|
||||
typedef std::conditional_t<internal::is_const<XprType>::value,non_const_iterator,const_iterator> other_iterator;
|
||||
// NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class:
|
||||
friend class indexed_based_stl_iterator_base<typename traits::const_iterator>;
|
||||
friend class indexed_based_stl_iterator_base<typename traits::non_const_iterator>;
|
||||
@@ -104,7 +106,7 @@ protected:
|
||||
typedef typename traits::XprType XprType;
|
||||
typedef indexed_based_stl_reverse_iterator_base<typename traits::non_const_iterator> non_const_iterator;
|
||||
typedef indexed_based_stl_reverse_iterator_base<typename traits::const_iterator> const_iterator;
|
||||
typedef typename internal::conditional<internal::is_const<XprType>::value,non_const_iterator,const_iterator>::type other_iterator;
|
||||
typedef std::conditional_t<internal::is_const<XprType>::value,non_const_iterator,const_iterator> other_iterator;
|
||||
// NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class:
|
||||
friend class indexed_based_stl_reverse_iterator_base<typename traits::const_iterator>;
|
||||
friend class indexed_based_stl_reverse_iterator_base<typename traits::non_const_iterator>;
|
||||
@@ -179,18 +181,18 @@ template<typename XprType>
|
||||
class pointer_based_stl_iterator
|
||||
{
|
||||
enum { is_lvalue = internal::is_lvalue<XprType>::value };
|
||||
typedef pointer_based_stl_iterator<typename internal::remove_const<XprType>::type> non_const_iterator;
|
||||
typedef pointer_based_stl_iterator<typename internal::add_const<XprType>::type> const_iterator;
|
||||
typedef typename internal::conditional<internal::is_const<XprType>::value,non_const_iterator,const_iterator>::type other_iterator;
|
||||
typedef pointer_based_stl_iterator<std::remove_const_t<XprType>> non_const_iterator;
|
||||
typedef pointer_based_stl_iterator<std::add_const_t<XprType>> const_iterator;
|
||||
typedef std::conditional_t<internal::is_const<XprType>::value,non_const_iterator,const_iterator> other_iterator;
|
||||
// NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class:
|
||||
friend class pointer_based_stl_iterator<typename internal::add_const<XprType>::type>;
|
||||
friend class pointer_based_stl_iterator<typename internal::remove_const<XprType>::type>;
|
||||
friend class pointer_based_stl_iterator<std::add_const_t<XprType>>;
|
||||
friend class pointer_based_stl_iterator<std::remove_const_t<XprType>>;
|
||||
public:
|
||||
typedef Index difference_type;
|
||||
typedef typename XprType::Scalar value_type;
|
||||
typedef std::random_access_iterator_tag iterator_category;
|
||||
typedef typename internal::conditional<bool(is_lvalue), value_type*, const value_type*>::type pointer;
|
||||
typedef typename internal::conditional<bool(is_lvalue), value_type&, const value_type&>::type reference;
|
||||
typedef std::conditional_t<bool(is_lvalue), value_type*, const value_type*> pointer;
|
||||
typedef std::conditional_t<bool(is_lvalue), value_type&, const value_type&> reference;
|
||||
|
||||
|
||||
pointer_based_stl_iterator() EIGEN_NO_THROW : m_ptr(0) {}
|
||||
@@ -256,12 +258,12 @@ protected:
|
||||
internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_incr;
|
||||
};
|
||||
|
||||
template<typename _XprType>
|
||||
struct indexed_based_stl_iterator_traits<generic_randaccess_stl_iterator<_XprType> >
|
||||
template<typename XprType_>
|
||||
struct indexed_based_stl_iterator_traits<generic_randaccess_stl_iterator<XprType_> >
|
||||
{
|
||||
typedef _XprType XprType;
|
||||
typedef generic_randaccess_stl_iterator<typename internal::remove_const<XprType>::type> non_const_iterator;
|
||||
typedef generic_randaccess_stl_iterator<typename internal::add_const<XprType>::type> const_iterator;
|
||||
typedef XprType_ XprType;
|
||||
typedef generic_randaccess_stl_iterator<std::remove_const_t<XprType>> non_const_iterator;
|
||||
typedef generic_randaccess_stl_iterator<std::add_const_t<XprType>> const_iterator;
|
||||
};
|
||||
|
||||
template<typename XprType>
|
||||
@@ -283,13 +285,13 @@ protected:
|
||||
|
||||
// TODO currently const Transpose/Reshape expressions never returns const references,
|
||||
// so lets return by value too.
|
||||
//typedef typename internal::conditional<bool(has_direct_access), const value_type&, const value_type>::type read_only_ref_t;
|
||||
//typedef std::conditional_t<bool(has_direct_access), const value_type&, const value_type> read_only_ref_t;
|
||||
typedef const value_type read_only_ref_t;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename internal::conditional<bool(is_lvalue), value_type *, const value_type *>::type pointer;
|
||||
typedef typename internal::conditional<bool(is_lvalue), value_type&, read_only_ref_t>::type reference;
|
||||
typedef std::conditional_t<bool(is_lvalue), value_type *, const value_type *> pointer;
|
||||
typedef std::conditional_t<bool(is_lvalue), value_type&, read_only_ref_t> reference;
|
||||
|
||||
generic_randaccess_stl_iterator() : Base() {}
|
||||
generic_randaccess_stl_iterator(XprType& xpr, Index index) : Base(xpr,index) {}
|
||||
@@ -301,12 +303,12 @@ public:
|
||||
pointer operator->() const { return &((*mp_xpr)(m_index)); }
|
||||
};
|
||||
|
||||
template<typename _XprType, DirectionType Direction>
|
||||
struct indexed_based_stl_iterator_traits<subvector_stl_iterator<_XprType,Direction> >
|
||||
template<typename XprType_, DirectionType Direction>
|
||||
struct indexed_based_stl_iterator_traits<subvector_stl_iterator<XprType_,Direction> >
|
||||
{
|
||||
typedef _XprType XprType;
|
||||
typedef subvector_stl_iterator<typename internal::remove_const<XprType>::type, Direction> non_const_iterator;
|
||||
typedef subvector_stl_iterator<typename internal::add_const<XprType>::type, Direction> const_iterator;
|
||||
typedef XprType_ XprType;
|
||||
typedef subvector_stl_iterator<std::remove_const_t<XprType>, Direction> non_const_iterator;
|
||||
typedef subvector_stl_iterator<std::add_const_t<XprType>, Direction> const_iterator;
|
||||
};
|
||||
|
||||
template<typename XprType, DirectionType Direction>
|
||||
@@ -320,12 +322,12 @@ protected:
|
||||
using Base::m_index;
|
||||
using Base::mp_xpr;
|
||||
|
||||
typedef typename internal::conditional<Direction==Vertical,typename XprType::ColXpr,typename XprType::RowXpr>::type SubVectorType;
|
||||
typedef typename internal::conditional<Direction==Vertical,typename XprType::ConstColXpr,typename XprType::ConstRowXpr>::type ConstSubVectorType;
|
||||
typedef std::conditional_t<Direction==Vertical,typename XprType::ColXpr,typename XprType::RowXpr> SubVectorType;
|
||||
typedef std::conditional_t<Direction==Vertical,typename XprType::ConstColXpr,typename XprType::ConstRowXpr> ConstSubVectorType;
|
||||
|
||||
|
||||
public:
|
||||
typedef typename internal::conditional<bool(is_lvalue), SubVectorType, ConstSubVectorType>::type reference;
|
||||
typedef std::conditional_t<bool(is_lvalue), SubVectorType, ConstSubVectorType> reference;
|
||||
typedef typename reference::PlainObject value_type;
|
||||
|
||||
private:
|
||||
@@ -349,12 +351,12 @@ public:
|
||||
pointer operator->() const { return (*mp_xpr).template subVector<Direction>(m_index); }
|
||||
};
|
||||
|
||||
template<typename _XprType, DirectionType Direction>
|
||||
struct indexed_based_stl_iterator_traits<subvector_stl_reverse_iterator<_XprType,Direction> >
|
||||
template<typename XprType_, DirectionType Direction>
|
||||
struct indexed_based_stl_iterator_traits<subvector_stl_reverse_iterator<XprType_,Direction> >
|
||||
{
|
||||
typedef _XprType XprType;
|
||||
typedef subvector_stl_reverse_iterator<typename internal::remove_const<XprType>::type, Direction> non_const_iterator;
|
||||
typedef subvector_stl_reverse_iterator<typename internal::add_const<XprType>::type, Direction> const_iterator;
|
||||
typedef XprType_ XprType;
|
||||
typedef subvector_stl_reverse_iterator<std::remove_const_t<XprType>, Direction> non_const_iterator;
|
||||
typedef subvector_stl_reverse_iterator<std::add_const_t<XprType>, Direction> const_iterator;
|
||||
};
|
||||
|
||||
template<typename XprType, DirectionType Direction>
|
||||
@@ -368,12 +370,12 @@ protected:
|
||||
using Base::m_index;
|
||||
using Base::mp_xpr;
|
||||
|
||||
typedef typename internal::conditional<Direction==Vertical,typename XprType::ColXpr,typename XprType::RowXpr>::type SubVectorType;
|
||||
typedef typename internal::conditional<Direction==Vertical,typename XprType::ConstColXpr,typename XprType::ConstRowXpr>::type ConstSubVectorType;
|
||||
typedef std::conditional_t<Direction==Vertical,typename XprType::ColXpr,typename XprType::RowXpr> SubVectorType;
|
||||
typedef std::conditional_t<Direction==Vertical,typename XprType::ConstColXpr,typename XprType::ConstRowXpr> ConstSubVectorType;
|
||||
|
||||
|
||||
public:
|
||||
typedef typename internal::conditional<bool(is_lvalue), SubVectorType, ConstSubVectorType>::type reference;
|
||||
typedef std::conditional_t<bool(is_lvalue), SubVectorType, ConstSubVectorType> reference;
|
||||
typedef typename reference::PlainObject value_type;
|
||||
|
||||
private:
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_STRIDE_H
|
||||
#define EIGEN_STRIDE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Stride
|
||||
@@ -31,27 +33,31 @@ namespace Eigen {
|
||||
* arguments to the constructor.
|
||||
*
|
||||
* Indeed, this class takes two template parameters:
|
||||
* \tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
|
||||
* \tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
|
||||
* \tparam OuterStrideAtCompileTime_ the outer stride, or Dynamic if you want to specify it at runtime.
|
||||
* \tparam InnerStrideAtCompileTime_ the inner stride, or Dynamic if you want to specify it at runtime.
|
||||
*
|
||||
* Here is an example:
|
||||
* \include Map_general_stride.cpp
|
||||
* Output: \verbinclude Map_general_stride.out
|
||||
*
|
||||
* Both strides can be negative, however, a negative stride of -1 cannot be specified at compiletime
|
||||
* Both strides can be negative. However, a negative stride of -1 cannot be specified at compile time
|
||||
* because of the ambiguity with Dynamic which is defined to -1 (historically, negative strides were
|
||||
* not allowed).
|
||||
*
|
||||
* Note that for compile-time vectors (ColsAtCompileTime==1 or RowsAtCompile==1),
|
||||
* the inner stride is the pointer increment between two consecutive elements,
|
||||
* regardless of storage layout.
|
||||
*
|
||||
* \sa class InnerStride, class OuterStride, \ref TopicStorageOrders
|
||||
*/
|
||||
template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
|
||||
template<int OuterStrideAtCompileTime_, int InnerStrideAtCompileTime_>
|
||||
class Stride
|
||||
{
|
||||
public:
|
||||
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
|
||||
enum {
|
||||
InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
|
||||
OuterStrideAtCompileTime = _OuterStrideAtCompileTime
|
||||
InnerStrideAtCompileTime = InnerStrideAtCompileTime_,
|
||||
OuterStrideAtCompileTime = OuterStrideAtCompileTime_
|
||||
};
|
||||
|
||||
/** Default constructor, for use when strides are fixed at compile time */
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_SWAP_H
|
||||
#define EIGEN_SWAP_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_TRANSPOSE_H
|
||||
#define EIGEN_TRANSPOSE_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -18,7 +20,7 @@ template<typename MatrixType>
|
||||
struct traits<Transpose<MatrixType> > : public traits<MatrixType>
|
||||
{
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
|
||||
typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNestedPlain;
|
||||
enum {
|
||||
RowsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
ColsAtCompileTime = MatrixType::RowsAtCompileTime,
|
||||
@@ -58,7 +60,7 @@ template<typename MatrixType> class Transpose
|
||||
|
||||
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
typedef internal::remove_all_t<MatrixType> NestedExpression;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit EIGEN_STRONG_INLINE Transpose(MatrixType& matrix) : m_matrix(matrix) {}
|
||||
@@ -72,12 +74,12 @@ template<typename MatrixType> class Transpose
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const typename internal::remove_all<MatrixTypeNested>::type&
|
||||
const internal::remove_all_t<MatrixTypeNested>&
|
||||
nestedExpression() const { return m_matrix; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename internal::remove_reference<MatrixTypeNested>::type&
|
||||
std::remove_reference_t<MatrixTypeNested>&
|
||||
nestedExpression() { return m_matrix; }
|
||||
|
||||
/** \internal */
|
||||
@@ -130,11 +132,11 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Index outerStride() const { return derived().nestedExpression().outerStride(); }
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<MatrixType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
typedef std::conditional_t<
|
||||
internal::is_lvalue<MatrixType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
> ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
|
||||
@@ -178,7 +180,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
|
||||
* \sa transposeInPlace(), adjoint() */
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Transpose<Derived>
|
||||
typename DenseBase<Derived>::TransposeReturnType
|
||||
DenseBase<Derived>::transpose()
|
||||
{
|
||||
return TransposeReturnType(derived());
|
||||
@@ -191,7 +193,7 @@ DenseBase<Derived>::transpose()
|
||||
* \sa transposeInPlace(), adjoint() */
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename DenseBase<Derived>::ConstTransposeReturnType
|
||||
const typename DenseBase<Derived>::ConstTransposeReturnType
|
||||
DenseBase<Derived>::transpose() const
|
||||
{
|
||||
return ConstTransposeReturnType(derived());
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_TRANSPOSITIONS_H
|
||||
#define EIGEN_TRANSPOSITIONS_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Derived>
|
||||
@@ -113,11 +115,11 @@ class TranspositionsBase
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_>
|
||||
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_> >
|
||||
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_> >
|
||||
{
|
||||
typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef Matrix<StorageIndex_, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef TranspositionsStorage StorageKind;
|
||||
};
|
||||
}
|
||||
@@ -151,8 +153,8 @@ struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageInde
|
||||
* \sa class PermutationMatrix
|
||||
*/
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_>
|
||||
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_> >
|
||||
{
|
||||
typedef internal::traits<Transpositions> Traits;
|
||||
public:
|
||||
@@ -199,19 +201,19 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
|
||||
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
|
||||
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,_PacketAccess> >
|
||||
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int PacketAccess_>
|
||||
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_>,PacketAccess_> >
|
||||
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_> >
|
||||
{
|
||||
typedef Map<const Matrix<_StorageIndex,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
|
||||
typedef _StorageIndex StorageIndex;
|
||||
typedef Map<const Matrix<StorageIndex_,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, PacketAccess_> IndicesType;
|
||||
typedef StorageIndex_ StorageIndex;
|
||||
typedef TranspositionsStorage StorageKind;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int PacketAccess>
|
||||
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess>
|
||||
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess> >
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int PacketAccess>
|
||||
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_>,PacketAccess>
|
||||
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_>,PacketAccess> >
|
||||
{
|
||||
typedef internal::traits<Map> Traits;
|
||||
public:
|
||||
@@ -260,17 +262,17 @@ class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,P
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template<typename _IndicesType>
|
||||
struct traits<TranspositionsWrapper<_IndicesType> >
|
||||
: traits<PermutationWrapper<_IndicesType> >
|
||||
template<typename IndicesType_>
|
||||
struct traits<TranspositionsWrapper<IndicesType_> >
|
||||
: traits<PermutationWrapper<IndicesType_> >
|
||||
{
|
||||
typedef TranspositionsStorage StorageKind;
|
||||
};
|
||||
}
|
||||
|
||||
template<typename _IndicesType>
|
||||
template<typename IndicesType_>
|
||||
class TranspositionsWrapper
|
||||
: public TranspositionsBase<TranspositionsWrapper<_IndicesType> >
|
||||
: public TranspositionsBase<TranspositionsWrapper<IndicesType_> >
|
||||
{
|
||||
typedef internal::traits<TranspositionsWrapper> Traits;
|
||||
public:
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_TRIANGULARMATRIX_H
|
||||
#define EIGEN_TRIANGULARMATRIX_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -35,14 +37,13 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
|
||||
|
||||
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime>::ret),
|
||||
SizeAtCompileTime = (internal::size_of_xpr_at_compile_time<Derived>::ret),
|
||||
/**< This is equal to the number of coefficients, i.e. the number of
|
||||
* rows times the number of columns, or to \a Dynamic if this is not
|
||||
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
|
||||
|
||||
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime>::ret)
|
||||
MaxSizeAtCompileTime = internal::size_at_compile_time(internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime)
|
||||
|
||||
};
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
@@ -153,8 +154,8 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
*
|
||||
* \brief Expression of a triangular part in a matrix
|
||||
*
|
||||
* \param MatrixType the type of the object in which we are taking the triangular part
|
||||
* \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
|
||||
* \tparam MatrixType the type of the object in which we are taking the triangular part
|
||||
* \tparam Mode the kind of triangular matrix expression to construct. Can be #Upper,
|
||||
* #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
|
||||
* This is in fact a bit field; it must have either #Upper or #Lower,
|
||||
* and additionally it may have #UnitDiag or #ZeroDiag or neither.
|
||||
@@ -166,39 +167,39 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
* \sa MatrixBase::triangularView()
|
||||
*/
|
||||
namespace internal {
|
||||
template<typename MatrixType, unsigned int _Mode>
|
||||
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
|
||||
template<typename MatrixType, unsigned int Mode_>
|
||||
struct traits<TriangularView<MatrixType, Mode_> > : traits<MatrixType>
|
||||
{
|
||||
typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
|
||||
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
||||
typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNestedNonRef;
|
||||
typedef remove_all_t<MatrixTypeNested> MatrixTypeNestedCleaned;
|
||||
typedef typename MatrixType::PlainObject FullMatrixType;
|
||||
typedef MatrixType ExpressionType;
|
||||
enum {
|
||||
Mode = _Mode,
|
||||
Mode = Mode_,
|
||||
FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
|
||||
Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits | FlagsLvalueBit) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)))
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename _MatrixType, unsigned int _Mode, typename StorageKind> class TriangularViewImpl;
|
||||
template<typename MatrixType_, unsigned int Mode_, typename StorageKind> class TriangularViewImpl;
|
||||
|
||||
template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
: public TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind >
|
||||
template<typename MatrixType_, unsigned int Mode_> class TriangularView
|
||||
: public TriangularViewImpl<MatrixType_, Mode_, typename internal::traits<MatrixType_>::StorageKind >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind > Base;
|
||||
typedef TriangularViewImpl<MatrixType_, Mode_, typename internal::traits<MatrixType_>::StorageKind > Base;
|
||||
typedef typename internal::traits<TriangularView>::Scalar Scalar;
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef MatrixType_ MatrixType;
|
||||
|
||||
protected:
|
||||
typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested;
|
||||
typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;
|
||||
|
||||
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
|
||||
typedef TriangularView<typename internal::add_const<MatrixType>::type, _Mode> ConstTriangularView;
|
||||
typedef internal::remove_all_t<typename MatrixType::ConjugateReturnType> MatrixConjugateReturnType;
|
||||
typedef TriangularView<std::add_const_t<MatrixType>, Mode_> ConstTriangularView;
|
||||
|
||||
public:
|
||||
|
||||
@@ -206,7 +207,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned NestedExpression;
|
||||
|
||||
enum {
|
||||
Mode = _Mode,
|
||||
Mode = Mode_,
|
||||
Flags = internal::traits<TriangularView>::Flags,
|
||||
TransposeMode = (Mode & Upper ? Lower : 0)
|
||||
| (Mode & Lower ? Upper : 0)
|
||||
@@ -247,10 +248,10 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
*/
|
||||
template<bool Cond>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline typename internal::conditional<Cond,ConjugateReturnType,ConstTriangularView>::type
|
||||
inline std::conditional_t<Cond,ConjugateReturnType,ConstTriangularView>
|
||||
conjugateIf() const
|
||||
{
|
||||
typedef typename internal::conditional<Cond,ConjugateReturnType,ConstTriangularView>::type ReturnType;
|
||||
typedef std::conditional_t<Cond,ConjugateReturnType,ConstTriangularView> ReturnType;
|
||||
return ReturnType(m_matrix.template conjugateIf<Cond>());
|
||||
}
|
||||
|
||||
@@ -262,10 +263,10 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
|
||||
typedef TriangularView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() */
|
||||
template<class Dummy=int>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TransposeReturnType transpose()
|
||||
inline TransposeReturnType transpose(std::enable_if_t<Eigen::internal::is_lvalue<MatrixType>::value, Dummy*> = nullptr)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
typename MatrixType::TransposeReturnType tmp(m_matrix);
|
||||
return TransposeReturnType(tmp);
|
||||
}
|
||||
@@ -342,16 +343,17 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
*
|
||||
* \sa class TriangularView, MatrixBase::triangularView()
|
||||
*/
|
||||
template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_MatrixType,_Mode,Dense>
|
||||
: public TriangularBase<TriangularView<_MatrixType, _Mode> >
|
||||
template<typename MatrixType_, unsigned int Mode_> class TriangularViewImpl<MatrixType_,Mode_,Dense>
|
||||
: public TriangularBase<TriangularView<MatrixType_, Mode_> >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef TriangularView<_MatrixType, _Mode> TriangularViewType;
|
||||
typedef TriangularView<MatrixType_, Mode_> TriangularViewType;
|
||||
|
||||
typedef TriangularBase<TriangularViewType> Base;
|
||||
typedef typename internal::traits<TriangularViewType>::Scalar Scalar;
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef MatrixType_ MatrixType;
|
||||
typedef typename MatrixType::PlainObject DenseMatrixType;
|
||||
typedef DenseMatrixType PlainObject;
|
||||
|
||||
@@ -362,7 +364,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
||||
typedef typename internal::traits<TriangularViewType>::StorageKind StorageKind;
|
||||
|
||||
enum {
|
||||
Mode = _Mode,
|
||||
Mode = Mode_,
|
||||
Flags = internal::traits<TriangularViewType>::Flags
|
||||
};
|
||||
|
||||
@@ -728,10 +730,10 @@ struct evaluator_traits<TriangularView<MatrixType,Mode> >
|
||||
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
struct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased>
|
||||
: evaluator<typename internal::remove_all<MatrixType>::type>
|
||||
: evaluator<internal::remove_all_t<MatrixType>>
|
||||
{
|
||||
typedef TriangularView<MatrixType,Mode> XprType;
|
||||
typedef evaluator<typename internal::remove_all<MatrixType>::type> Base;
|
||||
typedef evaluator<internal::remove_all_t<MatrixType>> Base;
|
||||
EIGEN_DEVICE_FUNC
|
||||
unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}
|
||||
};
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_VECTORBLOCK_H
|
||||
#define EIGEN_VECTORBLOCK_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -66,6 +68,7 @@ template<typename VectorType, int Size> class VectorBlock
|
||||
};
|
||||
public:
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock)
|
||||
|
||||
using Base::operator=;
|
||||
|
||||
@@ -76,18 +79,14 @@ template<typename VectorType, int Size> class VectorBlock
|
||||
: Base(vector,
|
||||
IsColVector ? start : 0, IsColVector ? 0 : start,
|
||||
IsColVector ? size : 1, IsColVector ? 1 : size)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
|
||||
}
|
||||
{ }
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
VectorBlock(VectorType& vector, Index start)
|
||||
: Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
|
||||
}
|
||||
{ }
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_PARTIAL_REDUX_H
|
||||
#define EIGEN_PARTIAL_REDUX_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class PartialReduxExpr
|
||||
@@ -86,7 +88,6 @@ template<typename A,typename B> struct partial_redux_dummy_func;
|
||||
#define EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(MEMBER,COST,VECTORIZABLE,BINARYOP) \
|
||||
template <typename ResultType,typename Scalar> \
|
||||
struct member_##MEMBER { \
|
||||
EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \
|
||||
typedef ResultType result_type; \
|
||||
typedef BINARYOP<Scalar,Scalar> BinaryOp; \
|
||||
template<int Size> struct Cost { enum { value = COST }; }; \
|
||||
@@ -191,7 +192,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
typedef typename ExpressionType::RealScalar RealScalar;
|
||||
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
|
||||
typedef typename internal::ref_selector<ExpressionType>::non_const_type ExpressionTypeNested;
|
||||
typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
|
||||
typedef internal::remove_all_t<ExpressionTypeNested> ExpressionTypeNestedCleaned;
|
||||
|
||||
template<template<typename OutScalar,typename InputScalar> class Functor,
|
||||
typename ReturnScalar=Scalar> struct ReturnType
|
||||
@@ -230,9 +231,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
typename ExtendedType<OtherDerived>::Type
|
||||
extendedTo(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1),
|
||||
EIGEN_STATIC_ASSERT(internal::check_implication(isVertical, OtherDerived::MaxColsAtCompileTime==1),
|
||||
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
|
||||
EIGEN_STATIC_ASSERT(internal::check_implication(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
|
||||
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
|
||||
return typename ExtendedType<OtherDerived>::Type
|
||||
(other.derived(),
|
||||
@@ -253,9 +254,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
typename OppositeExtendedType<OtherDerived>::Type
|
||||
extendedToOpposite(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
|
||||
EIGEN_STATIC_ASSERT(internal::check_implication(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
|
||||
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
|
||||
EIGEN_STATIC_ASSERT(internal::check_implication(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
|
||||
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
|
||||
return typename OppositeExtendedType<OtherDerived>::Type
|
||||
(other.derived(),
|
||||
@@ -594,7 +595,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
return m_matrix += extendedTo(other.derived());
|
||||
}
|
||||
|
||||
/** Substracts the vector \a other to each subvector of \c *this */
|
||||
/** Subtracts the vector \a other to each subvector of \c *this */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
|
||||
@@ -604,7 +605,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
return m_matrix -= extendedTo(other.derived());
|
||||
}
|
||||
|
||||
/** Multiples each subvector of \c *this by the vector \a other */
|
||||
/** Multiplies each subvector of \c *this by the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
|
||||
|
||||
@@ -10,16 +10,23 @@
|
||||
#ifndef EIGEN_VISITOR_H
|
||||
#define EIGEN_VISITOR_H
|
||||
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Visitor, typename Derived, int UnrollCount, bool Vectorize=((Derived::PacketAccess!=0) && functor_traits<Visitor>::PacketAccess)>
|
||||
struct visitor_impl;
|
||||
|
||||
template<typename Visitor, typename Derived, int UnrollCount>
|
||||
struct visitor_impl
|
||||
struct visitor_impl<Visitor, Derived, UnrollCount, false>
|
||||
{
|
||||
enum {
|
||||
col = (UnrollCount-1) / Derived::RowsAtCompileTime,
|
||||
row = (UnrollCount-1) % Derived::RowsAtCompileTime
|
||||
col = Derived::IsRowMajor ? (UnrollCount-1) % Derived::ColsAtCompileTime
|
||||
: (UnrollCount-1) / Derived::RowsAtCompileTime,
|
||||
row = Derived::IsRowMajor ? (UnrollCount-1) / Derived::ColsAtCompileTime
|
||||
: (UnrollCount-1) % Derived::RowsAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@@ -31,7 +38,7 @@ struct visitor_impl
|
||||
};
|
||||
|
||||
template<typename Visitor, typename Derived>
|
||||
struct visitor_impl<Visitor, Derived, 1>
|
||||
struct visitor_impl<Visitor, Derived, 1, false>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(const Derived &mat, Visitor& visitor)
|
||||
@@ -42,24 +49,73 @@ struct visitor_impl<Visitor, Derived, 1>
|
||||
|
||||
// This specialization enables visitors on empty matrices at compile-time
|
||||
template<typename Visitor, typename Derived>
|
||||
struct visitor_impl<Visitor, Derived, 0> {
|
||||
struct visitor_impl<Visitor, Derived, 0, false> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(const Derived &/*mat*/, Visitor& /*visitor*/)
|
||||
{}
|
||||
};
|
||||
|
||||
template<typename Visitor, typename Derived>
|
||||
struct visitor_impl<Visitor, Derived, Dynamic>
|
||||
struct visitor_impl<Visitor, Derived, Dynamic, /*Vectorize=*/false>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(const Derived& mat, Visitor& visitor)
|
||||
{
|
||||
visitor.init(mat.coeff(0,0), 0, 0);
|
||||
for(Index i = 1; i < mat.rows(); ++i)
|
||||
visitor(mat.coeff(i, 0), i, 0);
|
||||
for(Index j = 1; j < mat.cols(); ++j)
|
||||
for(Index i = 0; i < mat.rows(); ++i)
|
||||
visitor(mat.coeff(i, j), i, j);
|
||||
if (Derived::IsRowMajor) {
|
||||
for(Index i = 1; i < mat.cols(); ++i) {
|
||||
visitor(mat.coeff(0, i), 0, i);
|
||||
}
|
||||
for(Index j = 1; j < mat.rows(); ++j) {
|
||||
for(Index i = 0; i < mat.cols(); ++i) {
|
||||
visitor(mat.coeff(j, i), j, i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for(Index i = 1; i < mat.rows(); ++i) {
|
||||
visitor(mat.coeff(i, 0), i, 0);
|
||||
}
|
||||
for(Index j = 1; j < mat.cols(); ++j) {
|
||||
for(Index i = 0; i < mat.rows(); ++i) {
|
||||
visitor(mat.coeff(i, j), i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Visitor, typename Derived, int UnrollSize>
|
||||
struct visitor_impl<Visitor, Derived, UnrollSize, /*Vectorize=*/true>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(const Derived& mat, Visitor& visitor)
|
||||
{
|
||||
const Index PacketSize = packet_traits<Scalar>::size;
|
||||
visitor.init(mat.coeff(0,0), 0, 0);
|
||||
if (Derived::IsRowMajor) {
|
||||
for(Index i = 0; i < mat.rows(); ++i) {
|
||||
Index j = i == 0 ? 1 : 0;
|
||||
for(; j+PacketSize-1 < mat.cols(); j += PacketSize) {
|
||||
Packet p = mat.packet(i, j);
|
||||
visitor.packet(p, i, j);
|
||||
}
|
||||
for(; j < mat.cols(); ++j)
|
||||
visitor(mat.coeff(i, j), i, j);
|
||||
}
|
||||
} else {
|
||||
for(Index j = 0; j < mat.cols(); ++j) {
|
||||
Index i = j == 0 ? 1 : 0;
|
||||
for(; i+PacketSize-1 < mat.rows(); i += PacketSize) {
|
||||
Packet p = mat.packet(i, j);
|
||||
visitor.packet(p, i, j);
|
||||
}
|
||||
for(; i < mat.rows(); ++i)
|
||||
visitor(mat.coeff(i, j), i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -68,28 +124,38 @@ template<typename XprType>
|
||||
class visitor_evaluator
|
||||
{
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef internal::evaluator<XprType> Evaluator;
|
||||
|
||||
enum {
|
||||
PacketAccess = Evaluator::Flags & PacketAccessBit,
|
||||
IsRowMajor = XprType::IsRowMajor,
|
||||
RowsAtCompileTime = XprType::RowsAtCompileTime,
|
||||
CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
|
||||
ColsAtCompileTime = XprType::ColsAtCompileTime,
|
||||
CoeffReadCost = Evaluator::CoeffReadCost
|
||||
};
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) { }
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
|
||||
typedef std::remove_const_t<typename XprType::PacketReturnType> PacketReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_xpr.rows(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_xpr.cols(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_xpr.size(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{ return m_evaluator.coeff(row, col); }
|
||||
EIGEN_DEVICE_FUNC PacketReturnType packet(Index row, Index col) const
|
||||
{ return m_evaluator.template packet<Unaligned,PacketReturnType>(row, col); }
|
||||
|
||||
protected:
|
||||
internal::evaluator<XprType> m_evaluator;
|
||||
Evaluator m_evaluator;
|
||||
const XprType &m_xpr;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** Applies the visitor \a visitor to the whole coefficients of the matrix or vector.
|
||||
@@ -152,123 +218,131 @@ struct coeff_visitor
|
||||
}
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Visitor computing the min coefficient with its value and coordinates
|
||||
*
|
||||
* \sa DenseBase::minCoeff(Index*, Index*)
|
||||
*/
|
||||
template <typename Derived, int NaNPropagation>
|
||||
struct min_coeff_visitor : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if(value < this->res)
|
||||
{
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Derived>
|
||||
struct min_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value < this->res))
|
||||
{
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Derived>
|
||||
struct min_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if((numext::isnan)(value) || value < this->res)
|
||||
{
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
template<typename Scalar, int NaNPropagation, bool is_min=true>
|
||||
struct minmax_compare {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
static EIGEN_DEVICE_FUNC inline bool compare(Scalar a, Scalar b) { return a < b; }
|
||||
static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_min<NaNPropagation>(p);}
|
||||
};
|
||||
|
||||
template<typename Scalar, int NaNPropagation>
|
||||
struct functor_traits<min_coeff_visitor<Scalar, NaNPropagation> > {
|
||||
struct minmax_compare<Scalar, NaNPropagation, false> {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
static EIGEN_DEVICE_FUNC inline bool compare(Scalar a, Scalar b) { return a > b; }
|
||||
static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_max<NaNPropagation>(p);}
|
||||
};
|
||||
|
||||
template <typename Derived, bool is_min, int NaNPropagation>
|
||||
struct minmax_coeff_visitor : coeff_visitor<Derived>
|
||||
{
|
||||
using Scalar = typename Derived::Scalar;
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
using Comparator = minmax_compare<Scalar, NaNPropagation, is_min>;
|
||||
|
||||
EIGEN_DEVICE_FUNC inline
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if(Comparator::compare(value, this->res)) {
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline
|
||||
void packet(const Packet& p, Index i, Index j) {
|
||||
const Index PacketSize = packet_traits<Scalar>::size;
|
||||
Scalar value = Comparator::predux(p);
|
||||
if (Comparator::compare(value, this->res)) {
|
||||
const Packet range = preverse(plset<Packet>(Scalar(1)));
|
||||
Packet mask = pcmp_eq(pset1<Packet>(value), p);
|
||||
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
|
||||
this->res = value;
|
||||
this->row = Derived::IsRowMajor ? i : i + max_idx;;
|
||||
this->col = Derived::IsRowMajor ? j + max_idx : j;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Suppress NaN. The only case in which we return NaN is if the matrix is all NaN, in which case,
|
||||
// the row=0, col=0 is returned for the location.
|
||||
template <typename Derived, bool is_min>
|
||||
struct minmax_coeff_visitor<Derived, is_min, PropagateNumbers> : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
using Comparator = minmax_compare<Scalar, PropagateNumbers, is_min>;
|
||||
|
||||
EIGEN_DEVICE_FUNC inline
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if ((!(numext::isnan)(value) && (numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline
|
||||
void packet(const Packet& p, Index i, Index j) {
|
||||
const Index PacketSize = packet_traits<Scalar>::size;
|
||||
Scalar value = Comparator::predux(p);
|
||||
if ((!(numext::isnan)(value) && (numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
|
||||
const Packet range = preverse(plset<Packet>(Scalar(1)));
|
||||
/* mask will be zero for NaNs, so they will be ignored. */
|
||||
Packet mask = pcmp_eq(pset1<Packet>(value), p);
|
||||
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
|
||||
this->res = value;
|
||||
this->row = Derived::IsRowMajor ? i : i + max_idx;;
|
||||
this->col = Derived::IsRowMajor ? j + max_idx : j;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// Propagate NaN. If the matrix contains NaN, the location of the first NaN will be returned in
|
||||
// row and col.
|
||||
template <typename Derived, bool is_min>
|
||||
struct minmax_coeff_visitor<Derived, is_min, PropagateNaN> : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
using Comparator = minmax_compare<Scalar, PropagateNaN, is_min>;
|
||||
|
||||
EIGEN_DEVICE_FUNC inline
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
const bool value_is_nan = (numext::isnan)(value);
|
||||
if ((value_is_nan && !(numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline
|
||||
void packet(const Packet& p, Index i, Index j) {
|
||||
const Index PacketSize = packet_traits<Scalar>::size;
|
||||
Scalar value = Comparator::predux(p);
|
||||
const bool value_is_nan = (numext::isnan)(value);
|
||||
if ((value_is_nan && !(numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
|
||||
const Packet range = preverse(plset<Packet>(Scalar(1)));
|
||||
// If the value is NaN, pick the first position of a NaN, otherwise pick the first extremal value.
|
||||
Packet mask = value_is_nan ? pnot(pcmp_eq(p, p)) : pcmp_eq(pset1<Packet>(value), p);
|
||||
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
|
||||
this->res = value;
|
||||
this->row = Derived::IsRowMajor ? i : i + max_idx;;
|
||||
this->col = Derived::IsRowMajor ? j + max_idx : j;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar, bool is_min, int NaNPropagation>
|
||||
struct functor_traits<minmax_coeff_visitor<Scalar, is_min, NaNPropagation> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Visitor computing the max coefficient with its value and coordinates
|
||||
*
|
||||
* \sa DenseBase::maxCoeff(Index*, Index*)
|
||||
*/
|
||||
template <typename Derived, int NaNPropagation>
|
||||
struct max_coeff_visitor : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if(value > this->res)
|
||||
{
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Derived>
|
||||
struct max_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value > this->res))
|
||||
{
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Derived>
|
||||
struct max_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if((numext::isnan)(value) || value > this->res)
|
||||
{
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar, int NaNPropagation>
|
||||
struct functor_traits<max_coeff_visitor<Scalar, NaNPropagation> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = true
|
||||
};
|
||||
};
|
||||
|
||||
@@ -293,7 +367,7 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||
{
|
||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||
|
||||
internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor;
|
||||
internal::minmax_coeff_visitor<Derived, true, NaNPropagation> minVisitor;
|
||||
this->visit(minVisitor);
|
||||
*rowId = minVisitor.row;
|
||||
if (colId) *colId = minVisitor.col;
|
||||
@@ -319,7 +393,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
|
||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor;
|
||||
internal::minmax_coeff_visitor<Derived, true, NaNPropagation> minVisitor;
|
||||
this->visit(minVisitor);
|
||||
*index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
|
||||
return minVisitor.res;
|
||||
@@ -344,7 +418,7 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
|
||||
{
|
||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||
|
||||
internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor;
|
||||
internal::minmax_coeff_visitor<Derived, false, NaNPropagation> maxVisitor;
|
||||
this->visit(maxVisitor);
|
||||
*rowPtr = maxVisitor.row;
|
||||
if (colPtr) *colPtr = maxVisitor.col;
|
||||
@@ -370,7 +444,7 @@ DenseBase<Derived>::maxCoeff(IndexType* index) const
|
||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor;
|
||||
internal::minmax_coeff_visitor<Derived, false, NaNPropagation> maxVisitor;
|
||||
this->visit(maxVisitor);
|
||||
*index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;
|
||||
return maxVisitor.res;
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_COMPLEX_AVX_H
|
||||
#define EIGEN_COMPLEX_AVX_H
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -99,7 +101,9 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<fl
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
|
||||
{
|
||||
return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
|
||||
const float re = std::real(from);
|
||||
const float im = std::imag(from);
|
||||
return Packet4cf(_mm256_set_ps(im, re, im, re, im, re, im, re));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
|
||||
@@ -167,15 +171,12 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const P
|
||||
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
|
||||
}
|
||||
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
||||
{
|
||||
Packet4cf num = pmul(a, pconj(b));
|
||||
__m256 tmp = _mm256_mul_ps(b.v, b.v);
|
||||
__m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
|
||||
__m256 denom = _mm256_add_ps(tmp, tmp2);
|
||||
return Packet4cf(_mm256_div_ps(num.v, denom));
|
||||
return pdiv_complex(a, b);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
|
||||
@@ -321,10 +322,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
||||
{
|
||||
Packet2cd num = pmul(a, pconj(b));
|
||||
__m256d tmp = _mm256_mul_pd(b.v, b.v);
|
||||
__m256d denom = _mm256_hadd_pd(tmp, tmp);
|
||||
return Packet2cd(_mm256_div_pd(num.v, denom));
|
||||
return pdiv_complex(a, b);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
|
||||
|
||||
@@ -14,52 +14,78 @@
|
||||
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
||||
*/
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
psin<Packet8f>(const Packet8f& _x) {
|
||||
return psin_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
pcos<Packet8f>(const Packet8f& _x) {
|
||||
return pcos_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
pasin<Packet8f>(const Packet8f& _x) {
|
||||
return pasin_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
pacos<Packet8f>(const Packet8f& _x) {
|
||||
return pacos_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
patan<Packet8f>(const Packet8f& _x) {
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
|
||||
patan<Packet4d>(const Packet4d& _x) {
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
plog<Packet8f>(const Packet8f& _x) {
|
||||
return plog_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
|
||||
plog<Packet4d>(const Packet4d& _x) {
|
||||
return plog_double(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
plog2<Packet8f>(const Packet8f& _x) {
|
||||
return plog2_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
|
||||
plog2<Packet4d>(const Packet4d& _x) {
|
||||
return plog2_double(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet8f plog1p<Packet8f>(const Packet8f& _x) {
|
||||
return generic_plog1p(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet8f pexpm1<Packet8f>(const Packet8f& _x) {
|
||||
return generic_expm1(_x);
|
||||
}
|
||||
@@ -68,110 +94,59 @@ Packet8f pexpm1<Packet8f>(const Packet8f& _x) {
|
||||
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
||||
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
pexp<Packet8f>(const Packet8f& _x) {
|
||||
return pexp_float(_x);
|
||||
}
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
ptanh<Packet8f>(const Packet8f& _x) {
|
||||
return internal::generic_fast_tanh_float(_x);
|
||||
}
|
||||
|
||||
// Exponential function for doubles.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
|
||||
pexp<Packet4d>(const Packet4d& _x) {
|
||||
return pexp_double(_x);
|
||||
}
|
||||
|
||||
// Functions for sqrt.
|
||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
||||
// exact solution. It does not handle +inf, or denormalized numbers correctly.
|
||||
// The main advantage of this approach is not just speed, but also the fact that
|
||||
// it can be inlined and pipelined with other computations, further reducing its
|
||||
// effective latency. This is similar to Quake3's fast inverse square root.
|
||||
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
||||
#if EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet8f psqrt<Packet8f>(const Packet8f& _x) {
|
||||
Packet8f minus_half_x = pmul(_x, pset1<Packet8f>(-0.5f));
|
||||
Packet8f denormal_mask = pandnot(
|
||||
pcmp_lt(_x, pset1<Packet8f>((std::numeric_limits<float>::min)())),
|
||||
pcmp_lt(_x, pzero(_x)));
|
||||
|
||||
// Compute approximate reciprocal sqrt.
|
||||
Packet8f x = _mm256_rsqrt_ps(_x);
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(minus_half_x, pmul(x,x), pset1<Packet8f>(1.5f)));
|
||||
// Flush results for denormals to zero.
|
||||
return pandnot(pmul(_x,x), denormal_mask);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
// Notice that for newer processors, it is counterproductive to use Newton
|
||||
// iteration for square root. In particular, Skylake and Zen2 processors
|
||||
// have approximately doubled throughput of the _mm_sqrt_ps instruction
|
||||
// compared to their predecessors.
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet8f psqrt<Packet8f>(const Packet8f& _x) {
|
||||
return _mm256_sqrt_ps(_x);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4d psqrt<Packet4d>(const Packet4d& _x) {
|
||||
return _mm256_sqrt_pd(_x);
|
||||
}
|
||||
|
||||
|
||||
// Even on Skylake, using Newton iteration is a win for reciprocal square root.
|
||||
#if EIGEN_FAST_MATH
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet8f neg_half = pmul(_x, p8f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
Packet8f lt_min_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
|
||||
Packet8f inf_mask = _mm256_cmp_ps(_x, p8f_inf, _CMP_EQ_OQ);
|
||||
Packet8f not_normal_finite_mask = _mm256_or_ps(lt_min_mask, inf_mask);
|
||||
|
||||
// Compute an approximate result using the rsqrt intrinsic.
|
||||
Packet8f y_approx = _mm256_rsqrt_ps(_x);
|
||||
|
||||
// Do a single step of Newton-Raphson iteration to improve the approximation.
|
||||
// This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).
|
||||
// It is essential to evaluate the inner term like this because forming
|
||||
// y_n^2 may over- or underflow.
|
||||
Packet8f y_newton = pmul(y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p8f_one_point_five));
|
||||
|
||||
// Select the result of the Newton-Raphson step for positive normal arguments.
|
||||
// For other arguments, choose the output of the intrinsic. This will
|
||||
// return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(x) = +inf if
|
||||
// x is zero or a positive denormalized float (equivalent to flushing positive
|
||||
// denormalized inputs to zero).
|
||||
return pselect<Packet8f>(not_normal_finite_mask, y_approx, y_newton);
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet8f prsqrt<Packet8f>(const Packet8f& a) {
|
||||
// _mm256_rsqrt_ps returns -inf for negative denormals.
|
||||
// _mm512_rsqrt**_ps returns -NaN for negative denormals. We may want
|
||||
// consistency here.
|
||||
// const Packet8f rsqrt = pselect(pcmp_lt(a, pzero(a)),
|
||||
// pset1<Packet8f>(-NumTraits<float>::quiet_NaN()),
|
||||
// _mm256_rsqrt_ps(a));
|
||||
return generic_rsqrt_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rsqrt_ps(a));
|
||||
}
|
||||
|
||||
#else
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
|
||||
return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(_x));
|
||||
template<> EIGEN_STRONG_INLINE Packet8f preciprocal<Packet8f>(const Packet8f& a) {
|
||||
return generic_reciprocal_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rcp_ps(a));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4d prsqrt<Packet4d>(const Packet4d& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
|
||||
return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(_x));
|
||||
}
|
||||
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, psin)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
|
||||
@@ -183,6 +158,7 @@ F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, preciprocal)
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) {
|
||||
@@ -207,6 +183,7 @@ BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, preciprocal)
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) {
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_PACKET_MATH_AVX_H
|
||||
#define EIGEN_PACKET_MATH_AVX_H
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -29,28 +31,29 @@ namespace internal {
|
||||
#endif
|
||||
|
||||
typedef __m256 Packet8f;
|
||||
typedef __m256i Packet8i;
|
||||
typedef eigen_packet_wrapper<__m256i, 0> Packet8i;
|
||||
typedef __m256d Packet4d;
|
||||
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
||||
typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
|
||||
#endif
|
||||
typedef eigen_packet_wrapper<__m128i, 3> Packet8bf;
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
// Start from 3 to be compatible with AVX512
|
||||
typedef eigen_packet_wrapper<__m256i, 3> Packet4l;
|
||||
#endif
|
||||
|
||||
template<> struct is_arithmetic<__m256> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m256i> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m256d> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<Packet8i> { enum { value = true }; };
|
||||
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
||||
template<> struct is_arithmetic<Packet8h> { enum { value = true }; };
|
||||
#endif
|
||||
template<> struct is_arithmetic<Packet8bf> { enum { value = true }; };
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
|
||||
const Packet8f p8f_##NAME = pset1<Packet8f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
|
||||
const Packet4d p4d_##NAME = pset1<Packet4d>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
|
||||
const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
|
||||
const Packet8i p8i_##NAME = pset1<Packet8i>(X)
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
template<> struct is_arithmetic<Packet4l> { enum { value = true }; };
|
||||
#endif
|
||||
|
||||
// Use the packet_traits defined in AVX512/PacketMath.h instead if we're going
|
||||
// to leverage AVX512 instructions.
|
||||
@@ -67,8 +70,12 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
|
||||
HasCmp = 1,
|
||||
HasDiv = 1,
|
||||
HasReciprocal = EIGEN_FAST_MATH,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
HasACos = 1,
|
||||
HasASin = 1,
|
||||
HasATan = 1,
|
||||
HasLog = 1,
|
||||
HasLog1p = 1,
|
||||
HasExpm1 = 1,
|
||||
@@ -102,6 +109,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasATan = 1,
|
||||
HasBlend = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
@@ -196,38 +204,74 @@ struct packet_traits<bfloat16> : default_packet_traits {
|
||||
HasNdtri = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet8i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
HasCmp = 1,
|
||||
HasDiv = 1,
|
||||
size=8
|
||||
};
|
||||
};
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
template<> struct packet_traits<int64_t> : default_packet_traits
|
||||
{
|
||||
typedef Packet4l type;
|
||||
// There is no half-size packet for current Packet4l.
|
||||
// TODO: support as SSE path.
|
||||
typedef Packet4l half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
HasCmp = 1,
|
||||
size=4
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
|
||||
template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
|
||||
|
||||
/* Proper support for integers is only provided by AVX2. In the meantime, we'll
|
||||
use SSE instructions and packets to deal with integers.
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet8i type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=8
|
||||
};
|
||||
};
|
||||
*/
|
||||
|
||||
template<> struct unpacket_traits<Packet8f> {
|
||||
typedef float type;
|
||||
typedef Packet4f half;
|
||||
typedef Packet8i integer_packet;
|
||||
typedef uint8_t mask_t;
|
||||
enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true, masked_store_available=true};
|
||||
enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true, masked_store_available=true
|
||||
#ifdef EIGEN_VECTORIZE_AVX512
|
||||
, masked_fpops_available=true
|
||||
#endif
|
||||
};
|
||||
};
|
||||
template<> struct unpacket_traits<Packet4d> {
|
||||
typedef double type;
|
||||
typedef Packet2d half;
|
||||
enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false};
|
||||
};
|
||||
template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false, masked_load_available=false, masked_store_available=false}; };
|
||||
template<> struct unpacket_traits<Packet8bf> { typedef bfloat16 type; typedef Packet8bf half; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; };
|
||||
template<> struct unpacket_traits<Packet8i> {
|
||||
typedef int type;
|
||||
typedef Packet4i half;
|
||||
enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false};
|
||||
};
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
template<> struct unpacket_traits<Packet4l> {
|
||||
typedef int64_t type;
|
||||
typedef Packet4l half;
|
||||
enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false};
|
||||
};
|
||||
#endif
|
||||
template<> struct unpacket_traits<Packet8bf> {
|
||||
typedef bfloat16 type;
|
||||
typedef Packet8bf half;
|
||||
enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};
|
||||
};
|
||||
|
||||
// Helper function for bit packing snippet of low precision comparison.
|
||||
// It packs the flags from 16x16 to 8x16.
|
||||
@@ -236,6 +280,210 @@ EIGEN_STRONG_INLINE __m128i Pack16To8(Packet8f rf) {
|
||||
_mm256_extractf128_si256(_mm256_castps_si256(rf), 1));
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pset1<Packet4l>(const int64_t& from) {
|
||||
return _mm256_set1_epi64x(from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pzero(const Packet4l& /*a*/) {
|
||||
return _mm256_setzero_si256();
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l peven_mask(const Packet4l& /*a*/) {
|
||||
return _mm256_set_epi64x(0ll, -1ll, 0ll, -1ll);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pload1<Packet4l>(const int64_t* from) {
|
||||
return _mm256_set1_epi64x(*from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l padd<Packet4l>(const Packet4l& a, const Packet4l& b) {
|
||||
return _mm256_add_epi64(a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l plset<Packet4l>(const int64_t& a) {
|
||||
return padd(pset1<Packet4l>(a), Packet4l(_mm256_set_epi64x(3ll, 2ll, 1ll, 0ll)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l psub<Packet4l>(const Packet4l& a, const Packet4l& b) {
|
||||
return _mm256_sub_epi64(a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pnegate(const Packet4l& a) {
|
||||
return psub(pzero(a), a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pconj(const Packet4l& a) {
|
||||
return a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pcmp_le(const Packet4l& a, const Packet4l& b) {
|
||||
return _mm256_xor_si256(_mm256_cmpgt_epi64(a, b), _mm256_set1_epi32(-1));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pcmp_lt(const Packet4l& a, const Packet4l& b) {
|
||||
return _mm256_cmpgt_epi64(b, a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pcmp_eq(const Packet4l& a, const Packet4l& b) {
|
||||
return _mm256_cmpeq_epi64(a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l ptrue<Packet4l>(const Packet4l& a) {
|
||||
return _mm256_cmpeq_epi64(a, a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pand<Packet4l>(const Packet4l& a, const Packet4l& b) {
|
||||
return _mm256_and_si256(a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l por<Packet4l>(const Packet4l& a, const Packet4l& b) {
|
||||
return _mm256_or_si256(a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pxor<Packet4l>(const Packet4l& a, const Packet4l& b) {
|
||||
return _mm256_xor_si256(a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pandnot<Packet4l>(const Packet4l& a, const Packet4l& b) {
|
||||
return _mm256_andnot_si256(b, a);
|
||||
}
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE Packet4l plogical_shift_right(Packet4l a) {
|
||||
return _mm256_srli_epi64(a, N);
|
||||
}
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE Packet4l plogical_shift_left(Packet4l a) {
|
||||
return _mm256_slli_epi64(a, N);
|
||||
}
|
||||
#ifdef EIGEN_VECTORIZE_AVX512FP16
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE Packet4l parithmetic_shift_right(Packet4l a) { return _mm256_srai_epi64(a, N); }
|
||||
#else
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE std::enable_if_t< (N == 0), Packet4l> parithmetic_shift_right(Packet4l a) {
|
||||
return a;
|
||||
}
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE std::enable_if_t< (N > 0) && (N < 32), Packet4l> parithmetic_shift_right(Packet4l a) {
|
||||
__m256i hi_word = _mm256_srai_epi32(a, N);
|
||||
__m256i lo_word = _mm256_srli_epi64(a, N);
|
||||
return _mm256_blend_epi32(hi_word, lo_word, 0b01010101);
|
||||
}
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE std::enable_if_t< (N >= 32) && (N < 63), Packet4l> parithmetic_shift_right(Packet4l a) {
|
||||
__m256i hi_word = _mm256_srai_epi32(a, 31);
|
||||
__m256i lo_word = _mm256_shuffle_epi32(_mm256_srai_epi32(a, N - 32), (shuffle_mask<1, 1, 3, 3>::mask));
|
||||
return _mm256_blend_epi32(hi_word, lo_word, 0b01010101);
|
||||
}
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE std::enable_if_t< (N == 63), Packet4l> parithmetic_shift_right(Packet4l a) {
|
||||
return _mm256_shuffle_epi32(_mm256_srai_epi32(a, 31), (shuffle_mask<1, 1, 3, 3>::mask));
|
||||
}
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE std::enable_if_t< (N < 0) || (N > 63), Packet4l> parithmetic_shift_right(Packet4l a) {
|
||||
return parithmetic_shift_right<int(N&63)>(a);
|
||||
}
|
||||
#endif
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pload<Packet4l>(const int64_t* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l ploadu<Packet4l>(const int64_t* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));
|
||||
}
|
||||
// Loads 2 int64_ts from memory a returns the packet {a0, a0, a1, a1}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l ploaddup<Packet4l>(const int64_t* from) {
|
||||
const Packet4l a = _mm256_castsi128_si256(_mm_loadu_si128(reinterpret_cast<const __m128i*>(from)));
|
||||
return _mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 1, 0, 1, 2, 3, 2, 3));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<int64_t>(int64_t* to, const Packet4l& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<int64_t>(int64_t* to, const Packet4l& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline Packet4l pgather<int64_t, Packet4l>(const int64_t* from, Index stride) {
|
||||
return _mm256_set_epi64x(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]);
|
||||
}
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline void pscatter<int64_t, Packet4l>(int64_t* to, const Packet4l& from, Index stride) {
|
||||
__m128i low = _mm256_extractf128_si256(from, 0);
|
||||
to[stride * 0] = _mm_extract_epi64(low, 0);
|
||||
to[stride * 1] = _mm_extract_epi64(low, 1);
|
||||
|
||||
__m128i high = _mm256_extractf128_si256(from, 1);
|
||||
to[stride * 2] = _mm_extract_epi64(high, 0);
|
||||
to[stride * 3] = _mm_extract_epi64(high, 1);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore1<Packet4l>(int64_t* to, const int64_t& a) {
|
||||
Packet4l pa = pset1<Packet4l>(a);
|
||||
pstore(to, pa);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t pfirst<Packet4l>(const Packet4l& a) {
|
||||
return _mm_cvtsi128_si64(_mm256_castsi256_si128(a));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux<Packet4l>(const Packet4l& a) {
|
||||
__m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
|
||||
return _mm_extract_epi64(r, 0) + _mm_extract_epi64(r, 1);
|
||||
}
|
||||
#define MM256_SHUFFLE_EPI64(A, B, M) _mm256_shuffle_pd(_mm256_castsi256_pd(A), _mm256_castsi256_pd(B), M)
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4l, 4>& kernel) {
|
||||
__m256d T0 = MM256_SHUFFLE_EPI64(kernel.packet[0], kernel.packet[1], 15);
|
||||
__m256d T1 = MM256_SHUFFLE_EPI64(kernel.packet[0], kernel.packet[1], 0);
|
||||
__m256d T2 = MM256_SHUFFLE_EPI64(kernel.packet[2], kernel.packet[3], 15);
|
||||
__m256d T3 = MM256_SHUFFLE_EPI64(kernel.packet[2], kernel.packet[3], 0);
|
||||
|
||||
kernel.packet[1] = _mm256_castpd_si256(_mm256_permute2f128_pd(T0, T2, 32));
|
||||
kernel.packet[3] = _mm256_castpd_si256(_mm256_permute2f128_pd(T0, T2, 49));
|
||||
kernel.packet[0] = _mm256_castpd_si256(_mm256_permute2f128_pd(T1, T3, 32));
|
||||
kernel.packet[2] = _mm256_castpd_si256(_mm256_permute2f128_pd(T1, T3, 49));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pmin<Packet4l>(const Packet4l& a, const Packet4l& b) {
|
||||
__m256i cmp = _mm256_cmpgt_epi64(a, b);
|
||||
__m256i a_min = _mm256_andnot_si256(cmp, a);
|
||||
__m256i b_min = _mm256_and_si256(cmp, b);
|
||||
return Packet4l(_mm256_or_si256(a_min, b_min));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pmax<Packet4l>(const Packet4l& a, const Packet4l& b) {
|
||||
__m256i cmp = _mm256_cmpgt_epi64(a, b);
|
||||
__m256i a_min = _mm256_and_si256(cmp, a);
|
||||
__m256i b_min = _mm256_andnot_si256(cmp, b);
|
||||
return Packet4l(_mm256_or_si256(a_min, b_min));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pabs<Packet4l>(const Packet4l& a) {
|
||||
Packet4l pz = pzero<Packet4l>(a);
|
||||
Packet4l cmp = _mm256_cmpgt_epi64(a, pz);
|
||||
return psub(cmp, pxor(a, cmp));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pmul<Packet4l>(const Packet4l& a, const Packet4l& b) {
|
||||
// 64-bit mul requires avx512, so do this with 32-bit multiplication
|
||||
__m256i upper32_a = _mm256_srli_epi64(a, 32);
|
||||
__m256i upper32_b = _mm256_srli_epi64(b, 32);
|
||||
|
||||
// upper * lower
|
||||
__m256i mul1 = _mm256_mul_epu32(upper32_a, b);
|
||||
__m256i mul2 = _mm256_mul_epu32(upper32_b, a);
|
||||
// Gives us both upper*upper and lower*lower
|
||||
__m256i mul3 = _mm256_mul_epu32(a, b);
|
||||
|
||||
__m256i high = _mm256_slli_epi64(_mm256_add_epi64(mul1, mul2), 32);
|
||||
return _mm256_add_epi64(high, mul3);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
|
||||
@@ -256,10 +504,17 @@ template<> EIGEN_STRONG_INLINE Packet4d peven_mask(const Packet4d& /*a*/) { retu
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
|
||||
#ifdef EIGEN_VECTORIZE_AVX512
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b, uint8_t umask) {
|
||||
__mmask16 mask = static_cast<__mmask16>(umask & 0x00FF);
|
||||
return _mm512_castps512_ps256(_mm512_maskz_add_ps(
|
||||
mask,
|
||||
_mm512_castps256_ps512(a),
|
||||
_mm512_castps256_ps512(b)));
|
||||
}
|
||||
#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const Packet8i& b) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
@@ -271,6 +526,10 @@ template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return padd(pset1<Packet8f>(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return padd(pset1<Packet4d>(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i plset<Packet8i>(const int& a) { return padd(pset1<Packet8i>(a), (Packet8i)_mm256_set_epi32(7,6,5,4,3,2,1,0)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b) {
|
||||
@@ -285,11 +544,17 @@ template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
|
||||
{
|
||||
return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
|
||||
const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));
|
||||
return _mm256_xor_ps(a, mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
|
||||
{
|
||||
return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
|
||||
const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x8000000000000000ULL));
|
||||
return _mm256_xor_pd(a, mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pnegate(const Packet8i& a)
|
||||
{
|
||||
return psub(pzero(a), a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
|
||||
@@ -310,36 +575,58 @@ template<> EIGEN_STRONG_INLINE Packet8i pmul<Packet8i>(const Packet8i& a, const
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)
|
||||
{ eigen_assert(false && "packet integer division are not supported by AVX");
|
||||
return pset1<Packet8i>(0);
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& a, const Packet8i& b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_AVX512
|
||||
return _mm512_cvttpd_epi32(_mm512_div_pd(_mm512_cvtepi32_pd(a), _mm512_cvtepi32_pd(b)));
|
||||
#else
|
||||
Packet4i lo = pdiv<Packet4i>(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
|
||||
Packet4i hi = pdiv<Packet4i>(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
|
||||
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||
// Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
// and even register spilling with clang>=6.0 (bug 1637).
|
||||
// Gcc stupidly generates a vfmadd132ps instruction.
|
||||
// So let's enforce it to generate a vfmadd231ps instruction since the most common use
|
||||
// case is to accumulate the result of the product.
|
||||
Packet8f res = c;
|
||||
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
return res;
|
||||
#else
|
||||
return _mm256_fmadd_ps(a,b,c);
|
||||
#endif
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
return _mm256_fmadd_ps(a, b, c);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||
// see above
|
||||
Packet4d res = c;
|
||||
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
return res;
|
||||
#else
|
||||
return _mm256_fmadd_pd(a,b,c);
|
||||
#endif
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
return _mm256_fmadd_pd(a, b, c);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8f pmsub(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
return _mm256_fmsub_ps(a, b, c);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4d pmsub(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
return _mm256_fmsub_pd(a, b, c);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8f pnmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
return _mm256_fnmadd_ps(a, b, c);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4d pnmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
return _mm256_fnmadd_pd(a, b, c);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8f pnmsub(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
return _mm256_fnmsub_ps(a, b, c);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4d pnmsub(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
return _mm256_fnmsub_pd(a, b, c);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcmp_le(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a,b,_CMP_LE_OQ); }
|
||||
@@ -352,7 +639,26 @@ template<> EIGEN_STRONG_INLINE Packet4d pcmp_lt(const Packet4d& a, const Packet4
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pcmp_lt_or_nan(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a, b, _CMP_NGE_UQ); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pcmp_eq(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a,b,_CMP_EQ_OQ); }
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pcmp_le(const Packet8i& a, const Packet8i& b) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_xor_si256(_mm256_cmpgt_epi32(a,b), _mm256_set1_epi32(-1));
|
||||
#else
|
||||
__m128i lo = _mm_cmpgt_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
|
||||
lo = _mm_xor_si128(lo, _mm_set1_epi32(-1));
|
||||
__m128i hi = _mm_cmpgt_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
|
||||
hi = _mm_xor_si128(hi, _mm_set1_epi32(-1));
|
||||
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pcmp_lt(const Packet8i& a, const Packet8i& b) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_cmpgt_epi32(b,a);
|
||||
#else
|
||||
__m128i lo = _mm_cmpgt_epi32(_mm256_extractf128_si256(b, 0), _mm256_extractf128_si256(a, 0));
|
||||
__m128i hi = _mm_cmpgt_epi32(_mm256_extractf128_si256(b, 1), _mm256_extractf128_si256(a, 1));
|
||||
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pcmp_eq(const Packet8i& a, const Packet8i& b) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_cmpeq_epi32(a,b);
|
||||
@@ -388,6 +694,15 @@ template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const
|
||||
return _mm256_min_pd(b,a);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pmin<Packet8i>(const Packet8i& a, const Packet8i& b) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_min_epi32(a, b);
|
||||
#else
|
||||
__m128i lo = _mm_min_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
|
||||
__m128i hi = _mm_min_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
|
||||
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) {
|
||||
#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
|
||||
@@ -411,6 +726,21 @@ template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const
|
||||
return _mm256_max_pd(b,a);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pmax<Packet8i>(const Packet8i& a, const Packet8i& b) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_max_epi32(a, b);
|
||||
#else
|
||||
__m128i lo = _mm_max_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
|
||||
__m128i hi = _mm_max_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
|
||||
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
template<> EIGEN_STRONG_INLINE Packet8i psign(const Packet8i& a) {
|
||||
return _mm256_sign_epi32(_mm256_set1_epi32(1), a);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Add specializations for min/max with prescribed NaN progation.
|
||||
template<>
|
||||
@@ -583,11 +913,16 @@ template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { E
|
||||
template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from, uint8_t umask) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX512
|
||||
__mmask16 mask = static_cast<__mmask16>(umask & 0x00FF);
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_castps512_ps256(_mm512_maskz_loadu_ps(mask, from));
|
||||
#else
|
||||
Packet8i mask = _mm256_set1_epi8(static_cast<char>(umask));
|
||||
const Packet8i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);
|
||||
mask = por<Packet8i>(mask, bit_mask);
|
||||
mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_maskload_ps(from, mask);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Loads 4 floats from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, a3}
|
||||
@@ -605,12 +940,26 @@ template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
|
||||
// then we can perform a consistent permutation on the global register to get everything in shape:
|
||||
return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
|
||||
}
|
||||
// Loads 2 doubles from memory a returns the packet {a0, a0 a1, a1}
|
||||
// Loads 2 doubles from memory a returns the packet {a0, a0, a1, a1}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
|
||||
{
|
||||
Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
|
||||
return _mm256_permute_pd(tmp, 3<<2);
|
||||
}
|
||||
// Loads 4 integers from memory a returns the packet {a0, a0, a1, a1, a2, a2, a3, a3}
|
||||
template<> EIGEN_STRONG_INLINE Packet8i ploaddup<Packet8i>(const int* from)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
const Packet8i a = _mm256_castsi128_si256(ploadu<Packet4i>(from));
|
||||
return _mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 0, 1, 1, 2, 2, 3, 3));
|
||||
#else
|
||||
__m256 tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
|
||||
// mimic an "inplace" permutation of the lower 128bits using a blend
|
||||
tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
|
||||
// then we can perform a consistent permutation on the global register to get everything in shape:
|
||||
return _mm256_castps_si256(_mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2)));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Loads 2 floats from memory a returns the packet {a0, a0 a0, a0, a1, a1, a1, a1}
|
||||
template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
|
||||
@@ -618,6 +967,10 @@ template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
|
||||
Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
|
||||
return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8i ploadquad<Packet8i>(const int* from)
|
||||
{
|
||||
return _mm256_insertf128_si256(_mm256_set1_epi32(*from), _mm_set1_epi32(*(from+1)), 1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
|
||||
@@ -628,11 +981,16 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d&
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from, uint8_t umask) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX512
|
||||
__mmask16 mask = static_cast<__mmask16>(umask & 0x00FF);
|
||||
EIGEN_DEBUG_UNALIGNED_STORE return _mm512_mask_storeu_ps(to, mask, _mm512_castps256_ps512(from));
|
||||
#else
|
||||
Packet8i mask = _mm256_set1_epi8(static_cast<char>(umask));
|
||||
const Packet8i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);
|
||||
mask = por<Packet8i>(mask, bit_mask);
|
||||
mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));
|
||||
EIGEN_DEBUG_UNALIGNED_STORE return _mm256_maskstore_ps(to, mask, from);
|
||||
#endif
|
||||
}
|
||||
|
||||
// NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available
|
||||
@@ -646,6 +1004,11 @@ template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const dou
|
||||
{
|
||||
return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet8i pgather<int, Packet8i>(const int* from, Index stride)
|
||||
{
|
||||
return _mm256_set_epi32(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
|
||||
from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)
|
||||
{
|
||||
@@ -670,6 +1033,20 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to,
|
||||
to[stride*2] = _mm_cvtsd_f64(high);
|
||||
to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet8i>(int* to, const Packet8i& from, Index stride)
|
||||
{
|
||||
__m128i low = _mm256_extractf128_si256(from, 0);
|
||||
to[stride*0] = _mm_extract_epi32(low, 0);
|
||||
to[stride*1] = _mm_extract_epi32(low, 1);
|
||||
to[stride*2] = _mm_extract_epi32(low, 2);
|
||||
to[stride*3] = _mm_extract_epi32(low, 3);
|
||||
|
||||
__m128i high = _mm256_extractf128_si256(from, 1);
|
||||
to[stride*4] = _mm_extract_epi32(high, 0);
|
||||
to[stride*5] = _mm_extract_epi32(high, 1);
|
||||
to[stride*6] = _mm_extract_epi32(high, 2);
|
||||
to[stride*7] = _mm_extract_epi32(high, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
|
||||
{
|
||||
@@ -720,6 +1097,17 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
|
||||
return _mm256_permute_pd(swap_halves,5);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8i preverse(const Packet8i& a)
|
||||
{
|
||||
return _mm256_castps_si256(preverse(_mm256_castsi256_ps(a)));
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
template<> EIGEN_STRONG_INLINE Packet4l preverse(const Packet4l& a)
|
||||
{
|
||||
return _mm256_castpd_si256(preverse(_mm256_castsi256_pd(a)));
|
||||
}
|
||||
#endif
|
||||
|
||||
// pabs should be ok
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
|
||||
@@ -732,6 +1120,23 @@ template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
|
||||
const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
|
||||
return _mm256_and_pd(a,mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pabs(const Packet8i& a)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_abs_epi32(a);
|
||||
#else
|
||||
__m128i lo = _mm_abs_epi32(_mm256_extractf128_si256(a, 0));
|
||||
__m128i hi = _mm_abs_epi32(_mm256_extractf128_si256(a, 1));
|
||||
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8h psignbit(const Packet8h& a) { return _mm_srai_epi16(a, 15); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf& a) { return _mm_srai_epi16(a, 15); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8f psignbit(const Packet8f& a) { return _mm256_castsi256_ps(parithmetic_shift_right<31>((Packet8i)_mm256_castps_si256(a))); }
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
template<> EIGEN_STRONG_INLINE Packet4d psignbit(const Packet4d& a) { return _mm256_castsi256_pd(parithmetic_shift_right<63>((Packet4l)_mm256_castpd_si256(a))); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pfrexp<Packet8f>(const Packet8f& a, Packet8f& exponent) {
|
||||
return pfrexp_generic(a,exponent);
|
||||
@@ -803,11 +1208,19 @@ template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux<Packet8i>(const Packet8i& a)
|
||||
{
|
||||
return predux(Packet4i(_mm_add_epi32(_mm256_castsi256_si128(a),_mm256_extractf128_si256(a,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f predux_half_dowto4<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i predux_half_dowto4<Packet8i>(const Packet8i& a)
|
||||
{
|
||||
return _mm_add_epi32(_mm256_castsi256_si128(a),_mm256_extractf128_si256(a,1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
@@ -856,7 +1269,12 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x)
|
||||
{
|
||||
return _mm256_movemask_ps(x)!=0;
|
||||
return _mm256_movemask_ps(x) != 0;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE bool predux_any(const Packet8i& x)
|
||||
{
|
||||
return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
@@ -905,6 +1323,66 @@ ptranspose(PacketBlock<Packet8f,4>& kernel) {
|
||||
kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
|
||||
}
|
||||
|
||||
#define MM256_SHUFFLE_EPI32(A, B, M) \
|
||||
_mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B), M))
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX2
|
||||
#define MM256_UNPACKLO_EPI32(A, B) \
|
||||
_mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B)))
|
||||
#define MM256_UNPACKHI_EPI32(A, B) \
|
||||
_mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B)))
|
||||
#else
|
||||
#define MM256_UNPACKLO_EPI32(A, B) _mm256_unpacklo_epi32(A, B)
|
||||
#define MM256_UNPACKHI_EPI32(A, B) _mm256_unpackhi_epi32(A, B)
|
||||
#endif
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet8i,8>& kernel) {
|
||||
__m256i T0 = MM256_UNPACKLO_EPI32(kernel.packet[0], kernel.packet[1]);
|
||||
__m256i T1 = MM256_UNPACKHI_EPI32(kernel.packet[0], kernel.packet[1]);
|
||||
__m256i T2 = MM256_UNPACKLO_EPI32(kernel.packet[2], kernel.packet[3]);
|
||||
__m256i T3 = MM256_UNPACKHI_EPI32(kernel.packet[2], kernel.packet[3]);
|
||||
__m256i T4 = MM256_UNPACKLO_EPI32(kernel.packet[4], kernel.packet[5]);
|
||||
__m256i T5 = MM256_UNPACKHI_EPI32(kernel.packet[4], kernel.packet[5]);
|
||||
__m256i T6 = MM256_UNPACKLO_EPI32(kernel.packet[6], kernel.packet[7]);
|
||||
__m256i T7 = MM256_UNPACKHI_EPI32(kernel.packet[6], kernel.packet[7]);
|
||||
__m256i S0 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256i S1 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256i S2 = MM256_SHUFFLE_EPI32(T1,T3,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256i S3 = MM256_SHUFFLE_EPI32(T1,T3,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256i S4 = MM256_SHUFFLE_EPI32(T4,T6,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256i S5 = MM256_SHUFFLE_EPI32(T4,T6,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256i S6 = MM256_SHUFFLE_EPI32(T5,T7,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256i S7 = MM256_SHUFFLE_EPI32(T5,T7,_MM_SHUFFLE(3,2,3,2));
|
||||
kernel.packet[0] = _mm256_permute2f128_si256(S0, S4, 0x20);
|
||||
kernel.packet[1] = _mm256_permute2f128_si256(S1, S5, 0x20);
|
||||
kernel.packet[2] = _mm256_permute2f128_si256(S2, S6, 0x20);
|
||||
kernel.packet[3] = _mm256_permute2f128_si256(S3, S7, 0x20);
|
||||
kernel.packet[4] = _mm256_permute2f128_si256(S0, S4, 0x31);
|
||||
kernel.packet[5] = _mm256_permute2f128_si256(S1, S5, 0x31);
|
||||
kernel.packet[6] = _mm256_permute2f128_si256(S2, S6, 0x31);
|
||||
kernel.packet[7] = _mm256_permute2f128_si256(S3, S7, 0x31);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet8i,4>& kernel) {
|
||||
__m256i T0 = MM256_UNPACKLO_EPI32(kernel.packet[0], kernel.packet[1]);
|
||||
__m256i T1 = MM256_UNPACKHI_EPI32(kernel.packet[0], kernel.packet[1]);
|
||||
__m256i T2 = MM256_UNPACKLO_EPI32(kernel.packet[2], kernel.packet[3]);
|
||||
__m256i T3 = MM256_UNPACKHI_EPI32(kernel.packet[2], kernel.packet[3]);
|
||||
|
||||
__m256i S0 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256i S1 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256i S2 = MM256_SHUFFLE_EPI32(T1,T3,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256i S3 = MM256_SHUFFLE_EPI32(T1,T3,_MM_SHUFFLE(3,2,3,2));
|
||||
|
||||
kernel.packet[0] = _mm256_permute2f128_si256(S0, S1, 0x20);
|
||||
kernel.packet[1] = _mm256_permute2f128_si256(S2, S3, 0x20);
|
||||
kernel.packet[2] = _mm256_permute2f128_si256(S0, S1, 0x31);
|
||||
kernel.packet[3] = _mm256_permute2f128_si256(S2, S3, 0x31);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4d,4>& kernel) {
|
||||
__m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
|
||||
@@ -919,21 +1397,37 @@ ptranspose(PacketBlock<Packet4d,4>& kernel) {
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
const __m256i select = _mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m256i false_mask = _mm256_cmpeq_epi32(zero, select);
|
||||
return _mm256_blendv_ps(thenPacket, elsePacket, _mm256_castsi256_ps(false_mask));
|
||||
#else
|
||||
const __m256 zero = _mm256_setzero_ps();
|
||||
const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
|
||||
return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
const __m256i select = _mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m256i false_mask = _mm256_cmpeq_epi64(select, zero);
|
||||
return _mm256_blendv_pd(thenPacket, elsePacket, _mm256_castsi256_pd(false_mask));
|
||||
#else
|
||||
const __m256d zero = _mm256_setzero_pd();
|
||||
const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
|
||||
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Packet math for Eigen::half
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
||||
template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet8h half; };
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const Eigen::half& from) {
|
||||
return _mm_set1_epi16(numext::bit_cast<numext::uint16_t>(from));
|
||||
@@ -989,18 +1483,9 @@ EIGEN_STRONG_INLINE Packet8f half2float(const Packet8h& a) {
|
||||
#ifdef EIGEN_HAS_FP16_C
|
||||
return _mm256_cvtph_ps(a);
|
||||
#else
|
||||
EIGEN_ALIGN32 Eigen::half aux[8];
|
||||
pstore(aux, a);
|
||||
float f0(aux[0]);
|
||||
float f1(aux[1]);
|
||||
float f2(aux[2]);
|
||||
float f3(aux[3]);
|
||||
float f4(aux[4]);
|
||||
float f5(aux[5]);
|
||||
float f6(aux[6]);
|
||||
float f7(aux[7]);
|
||||
|
||||
return _mm256_set_ps(f7, f6, f5, f4, f3, f2, f1, f0);
|
||||
Eigen::internal::Packet8f pp = _mm256_castsi256_ps(_mm256_insertf128_si256(
|
||||
_mm256_castsi128_si256(half2floatsse(a)), half2floatsse(_mm_srli_si128(a, 8)), 1));
|
||||
return pp;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1008,17 +1493,9 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
|
||||
#ifdef EIGEN_HAS_FP16_C
|
||||
return _mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
|
||||
#else
|
||||
EIGEN_ALIGN32 float aux[8];
|
||||
pstore(aux, a);
|
||||
const numext::uint16_t s0 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[0]));
|
||||
const numext::uint16_t s1 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[1]));
|
||||
const numext::uint16_t s2 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[2]));
|
||||
const numext::uint16_t s3 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[3]));
|
||||
const numext::uint16_t s4 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[4]));
|
||||
const numext::uint16_t s5 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[5]));
|
||||
const numext::uint16_t s6 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[6]));
|
||||
const numext::uint16_t s7 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[7]));
|
||||
return _mm_set_epi16(s7, s6, s5, s4, s3, s2, s1, s0);
|
||||
__m128i lo = float2half(_mm256_extractf128_ps(a, 0));
|
||||
__m128i hi = float2half(_mm256_extractf128_ps(a, 1));
|
||||
return _mm_packus_epi32(lo, hi);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1097,6 +1574,7 @@ template<> EIGEN_STRONG_INLINE Packet8h pnegate(const Packet8h& a) {
|
||||
return _mm_xor_si128(a, sign_mask);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
||||
template<> EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
||||
Packet8f af = half2float(a);
|
||||
Packet8f bf = half2float(b);
|
||||
@@ -1124,6 +1602,7 @@ template<> EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const
|
||||
Packet8f rf = pdiv(af, bf);
|
||||
return float2half(rf);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8h pgather<Eigen::half, Packet8h>(const Eigen::half* from, Index stride)
|
||||
{
|
||||
@@ -1152,11 +1631,14 @@ template<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet8h>(Eigen::half*
|
||||
to[stride*7] = aux[7];
|
||||
}
|
||||
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
||||
template<> EIGEN_STRONG_INLINE Eigen::half predux<Packet8h>(const Packet8h& a) {
|
||||
Packet8f af = half2float(a);
|
||||
float reduced = predux<Packet8f>(af);
|
||||
return Eigen::half(reduced);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Eigen::half predux_max<Packet8h>(const Packet8h& a) {
|
||||
Packet8f af = half2float(a);
|
||||
@@ -1272,7 +1754,6 @@ EIGEN_STRONG_INLINE Packet8f Bf16ToF32(const Packet8bf& a) {
|
||||
|
||||
// Convert float to bfloat16 according to round-to-nearest-even/denormals algorithm.
|
||||
EIGEN_STRONG_INLINE Packet8bf F32ToBf16(const Packet8f& a) {
|
||||
Packet8bf r;
|
||||
|
||||
__m256i input = _mm256_castps_si256(a);
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_TYPE_CASTING_AVX_H
|
||||
#define EIGEN_TYPE_CASTING_AVX_H
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_COMPLEX_AVX512_H
|
||||
#define EIGEN_COMPLEX_AVX512_H
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -97,7 +99,9 @@ template<> EIGEN_STRONG_INLINE Packet8cf ploadu<Packet8cf>(const std::complex<fl
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8cf pset1<Packet8cf>(const std::complex<float>& from)
|
||||
{
|
||||
return Packet8cf(_mm512_castpd_ps(pload1<Packet8d>((const double*)(const void*)&from)));
|
||||
const float re = std::real(from);
|
||||
const float im = std::imag(from);
|
||||
return Packet8cf(_mm512_set_ps(im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8cf ploaddup<Packet8cf>(const std::complex<float>* from)
|
||||
@@ -157,11 +161,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet8cf,Packet16f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8cf pdiv<Packet8cf>(const Packet8cf& a, const Packet8cf& b)
|
||||
{
|
||||
Packet8cf num = pmul(a, pconj(b));
|
||||
__m512 tmp = _mm512_mul_ps(b.v, b.v);
|
||||
__m512 tmp2 = _mm512_shuffle_ps(tmp,tmp,0xB1);
|
||||
__m512 denom = _mm512_add_ps(tmp, tmp2);
|
||||
return Packet8cf(_mm512_div_ps(num.v, denom));
|
||||
return pdiv_complex(a, b);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8cf pcplxflip<Packet8cf>(const Packet8cf& x)
|
||||
@@ -253,11 +253,7 @@ template<> EIGEN_STRONG_INLINE Packet4cd ploadu<Packet4cd>(const std::complex<do
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cd pset1<Packet4cd>(const std::complex<double>& from)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
return Packet4cd(_mm512_broadcast_f64x2(pset1<Packet1cd>(from).v));
|
||||
#else
|
||||
return Packet4cd(_mm512_castps_pd(_mm512_broadcast_f32x4( _mm_castpd_ps(pset1<Packet1cd>(from).v))));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cd ploaddup<Packet4cd>(const std::complex<double>* from) {
|
||||
@@ -309,47 +305,11 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet4cd>(const
|
||||
Packet2cd(_mm512_extractf64x4_pd(a.v,1))));
|
||||
}
|
||||
|
||||
template<> struct conj_helper<Packet4cd, Packet4cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cd, Packet4cd, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cd, Packet4cd, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cd,Packet8d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cd pdiv<Packet4cd>(const Packet4cd& a, const Packet4cd& b)
|
||||
{
|
||||
Packet4cd num = pmul(a, pconj(b));
|
||||
__m512d tmp = _mm512_mul_pd(b.v, b.v);
|
||||
__m512d denom = padd(_mm512_permute_pd(tmp,0x55), tmp);
|
||||
return Packet4cd(_mm512_div_pd(num.v, denom));
|
||||
return pdiv_complex(a, b);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cd pcplxflip<Packet4cd>(const Packet4cd& x)
|
||||
|
||||
1235
libs/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h
Normal file
1235
libs/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -10,39 +10,40 @@
|
||||
#ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
||||
#define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Disable the code for older versions of gcc that don't support many of the required avx512 instrinsics.
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923
|
||||
#if EIGEN_HAS_AVX512_MATH
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
|
||||
#define EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
|
||||
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
|
||||
#define EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
|
||||
const Packet16f p16f_##NAME = preinterpret<Packet16f,Packet16i>(pset1<Packet16i>(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
|
||||
#define EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
|
||||
const Packet8d p8d_##NAME = pset1<Packet8d>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
|
||||
#define EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
|
||||
const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
|
||||
#define EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
|
||||
const Packet16bf p16bf_##NAME = pset1<Packet16bf>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
|
||||
#define EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
|
||||
const Packet16bf p16bf_##NAME = preinterpret<Packet16bf,Packet16i>(pset1<Packet16i>(X))
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
plog<Packet16f>(const Packet16f& _x) {
|
||||
return plog_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
plog<Packet8d>(const Packet8d& _x) {
|
||||
return plog_double(_x);
|
||||
}
|
||||
@@ -51,13 +52,13 @@ F16_PACKET_FUNCTION(Packet16f, Packet16h, plog)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
plog2<Packet16f>(const Packet16f& _x) {
|
||||
return plog2_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
plog2<Packet8d>(const Packet8d& _x) {
|
||||
return plog2_double(_x);
|
||||
}
|
||||
@@ -69,23 +70,23 @@ BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)
|
||||
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
||||
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
pexp<Packet16f>(const Packet16f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
// Clamp x.
|
||||
Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
|
||||
@@ -96,7 +97,7 @@ pexp<Packet16f>(const Packet16f& _x) {
|
||||
|
||||
// Get r = x - m*ln(2). Note that we can do this without losing more than one
|
||||
// ulp precision due to the FMA instruction.
|
||||
_EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
|
||||
Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
|
||||
Packet16f r2 = pmul(r, r);
|
||||
Packet16f r3 = pmul(r2, r);
|
||||
@@ -120,7 +121,7 @@ pexp<Packet16f>(const Packet16f& _x) {
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
pexp<Packet8d>(const Packet8d& _x) {
|
||||
return pexp_double(_x);
|
||||
}
|
||||
@@ -154,49 +155,18 @@ EIGEN_STRONG_INLINE Packet16bf pldexp(const Packet16bf& a, const Packet16bf& exp
|
||||
return F32ToBf16(pldexp<Packet16f>(Bf16ToF32(a), Bf16ToF32(exponent)));
|
||||
}
|
||||
|
||||
// Functions for sqrt.
|
||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
||||
// exact solution. The main advantage of this approach is not just speed, but
|
||||
// also the fact that it can be inlined and pipelined with other computations,
|
||||
// further reducing its effective latency.
|
||||
#if EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
psqrt<Packet16f>(const Packet16f& _x) {
|
||||
Packet16f neg_half = pmul(_x, pset1<Packet16f>(-.5f));
|
||||
__mmask16 denormal_mask = _mm512_kand(
|
||||
_mm512_cmp_ps_mask(_x, pset1<Packet16f>((std::numeric_limits<float>::min)()),
|
||||
_CMP_LT_OQ),
|
||||
_mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_GE_OQ));
|
||||
|
||||
Packet16f x = _mm512_rsqrt14_ps(_x);
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet16f>(1.5f)));
|
||||
|
||||
// Flush results for denormals to zero.
|
||||
return _mm512_mask_blend_ps(denormal_mask, pmul(_x,x), _mm512_setzero_ps());
|
||||
return generic_sqrt_newton_step<Packet16f>::run(_x, _mm512_rsqrt14_ps(_x));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
psqrt<Packet8d>(const Packet8d& _x) {
|
||||
Packet8d neg_half = pmul(_x, pset1<Packet8d>(-.5));
|
||||
__mmask16 denormal_mask = _mm512_kand(
|
||||
_mm512_cmp_pd_mask(_x, pset1<Packet8d>((std::numeric_limits<double>::min)()),
|
||||
_CMP_LT_OQ),
|
||||
_mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_GE_OQ));
|
||||
|
||||
Packet8d x = _mm512_rsqrt14_pd(_x);
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
|
||||
|
||||
// Do a second step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
|
||||
|
||||
return _mm512_mask_blend_pd(denormal_mask, pmul(_x,x), _mm512_setzero_pd());
|
||||
// Double requires 2 Newton-Raphson steps for convergence.
|
||||
return generic_sqrt_newton_step<Packet8d, /*Steps=*/2>::run(_x, _mm512_rsqrt14_pd(_x));
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
@@ -223,40 +193,9 @@ EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
||||
#elif EIGEN_FAST_MATH
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
prsqrt<Packet16f>(const Packet16f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
|
||||
|
||||
Packet16f neg_half = pmul(_x, p16f_minus_half);
|
||||
|
||||
// Identity infinite, negative and denormal arguments.
|
||||
__mmask16 inf_mask = _mm512_cmp_ps_mask(_x, p16f_inf, _CMP_EQ_OQ);
|
||||
__mmask16 not_pos_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LE_OQ);
|
||||
__mmask16 not_finite_pos_mask = not_pos_mask | inf_mask;
|
||||
|
||||
// Compute an approximate result using the rsqrt intrinsic, forcing +inf
|
||||
// for denormals for consistency with AVX and SSE implementations.
|
||||
Packet16f y_approx = _mm512_rsqrt14_ps(_x);
|
||||
|
||||
// Do a single step of Newton-Raphson iteration to improve the approximation.
|
||||
// This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).
|
||||
// It is essential to evaluate the inner term like this because forming
|
||||
// y_n^2 may over- or underflow.
|
||||
Packet16f y_newton = pmul(y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p16f_one_point_five));
|
||||
|
||||
// Select the result of the Newton-Raphson step for positive finite arguments.
|
||||
// For other arguments, choose the output of the intrinsic. This will
|
||||
// return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
|
||||
return _mm512_mask_blend_ps(not_finite_pos_mask, y_newton, y_approx);
|
||||
}
|
||||
#else
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f(one, 1.0f);
|
||||
return _mm512_div_ps(p16f_one, _mm512_sqrt_ps(x));
|
||||
return generic_rsqrt_newton_step<Packet16f, /*Steps=*/1>::run(_x, _mm512_rsqrt14_ps(_x));
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -266,51 +205,28 @@ BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
|
||||
// prsqrt for double.
|
||||
#if EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
prsqrt<Packet8d>(const Packet8d& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
|
||||
|
||||
Packet8d neg_half = pmul(_x, p8d_minus_half);
|
||||
|
||||
// Identity infinite, negative and denormal arguments.
|
||||
__mmask8 inf_mask = _mm512_cmp_pd_mask(_x, p8d_inf, _CMP_EQ_OQ);
|
||||
__mmask8 not_pos_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LE_OQ);
|
||||
__mmask8 not_finite_pos_mask = not_pos_mask | inf_mask;
|
||||
|
||||
// Compute an approximate result using the rsqrt intrinsic, forcing +inf
|
||||
// for denormals for consistency with AVX and SSE implementations.
|
||||
#if defined(EIGEN_VECTORIZE_AVX512ER)
|
||||
Packet8d y_approx = _mm512_rsqrt28_pd(_x);
|
||||
#else
|
||||
Packet8d y_approx = _mm512_rsqrt14_pd(_x);
|
||||
#endif
|
||||
// Do one or two steps of Newton-Raphson's to improve the approximation, depending on the
|
||||
// starting accuracy (either 2^-14 or 2^-28, depending on whether AVX512ER is available).
|
||||
// The Newton-Raphson algorithm has quadratic convergence and roughly doubles the number
|
||||
// of correct digits for each step.
|
||||
// This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).
|
||||
// It is essential to evaluate the inner term like this because forming
|
||||
// y_n^2 may over- or underflow.
|
||||
Packet8d y_newton = pmul(y_approx, pmadd(neg_half, pmul(y_approx, y_approx), p8d_one_point_five));
|
||||
#if !defined(EIGEN_VECTORIZE_AVX512ER)
|
||||
y_newton = pmul(y_newton, pmadd(y_newton, pmul(neg_half, y_newton), p8d_one_point_five));
|
||||
#endif
|
||||
// Select the result of the Newton-Raphson step for positive finite arguments.
|
||||
// For other arguments, choose the output of the intrinsic. This will
|
||||
// return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
|
||||
return _mm512_mask_blend_pd(not_finite_pos_mask, y_newton, y_approx);
|
||||
#ifdef EIGEN_VECTORIZE_AVX512ER
|
||||
return generic_rsqrt_newton_step<Packet8d, /*Steps=*/1>::run(_x, _mm512_rsqrt28_pd(_x));
|
||||
#else
|
||||
return generic_rsqrt_newton_step<Packet8d, /*Steps=*/2>::run(_x, _mm512_rsqrt14_pd(_x));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16f preciprocal<Packet16f>(const Packet16f& a) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX512ER
|
||||
return _mm512_rcp28_ps(a);
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8d prsqrt<Packet8d>(const Packet8d& x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8d(one, 1.0f);
|
||||
return _mm512_div_pd(p8d_one, _mm512_sqrt_pd(x));
|
||||
return generic_reciprocal_newton_step<Packet16f, /*Steps=*/1>::run(a, _mm512_rcp14_ps(a));
|
||||
#endif
|
||||
}
|
||||
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal)
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet16f plog1p<Packet16f>(const Packet16f& _x) {
|
||||
return generic_plog1p(_x);
|
||||
}
|
||||
@@ -318,7 +234,7 @@ Packet16f plog1p<Packet16f>(const Packet16f& _x) {
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p)
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet16f pexpm1<Packet16f>(const Packet16f& _x) {
|
||||
return generic_expm1(_x);
|
||||
}
|
||||
@@ -326,23 +242,47 @@ Packet16f pexpm1<Packet16f>(const Packet16f& _x) {
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
|
||||
|
||||
#endif
|
||||
#endif // EIGEN_HAS_AVX512_MATH
|
||||
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
psin<Packet16f>(const Packet16f& _x) {
|
||||
return psin_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
pcos<Packet16f>(const Packet16f& _x) {
|
||||
return pcos_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
pacos<Packet16f>(const Packet16f& _x) {
|
||||
return pacos_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
pasin<Packet16f>(const Packet16f& _x) {
|
||||
return pasin_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
patan<Packet16f>(const Packet16f& _x) {
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
patan<Packet8d>(const Packet8d& _x) {
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
ptanh<Packet16f>(const Packet16f& _x) {
|
||||
return internal::generic_fast_tanh_float(_x);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
877
libs/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
Normal file
877
libs/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
Normal file
@@ -0,0 +1,877 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
//
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H
|
||||
#define EIGEN_PACKET_MATH_FP16_AVX512_H
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Disable the code for older versions of gcc that don't support many of the required avx512 math instrinsics.
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923 || EIGEN_COMP_ICC >= 1900
|
||||
#define EIGEN_HAS_AVX512_MATH 1
|
||||
#else
|
||||
#define EIGEN_HAS_AVX512_MATH 0
|
||||
#endif
|
||||
|
||||
typedef __m512h Packet32h;
|
||||
typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
|
||||
typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
|
||||
|
||||
template <>
|
||||
struct is_arithmetic<Packet8h> {
|
||||
enum { value = true };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct packet_traits<half> : default_packet_traits {
|
||||
typedef Packet32h type;
|
||||
typedef Packet16h half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 32,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasCmp = 1,
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 1,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
// These ones should be implemented in future
|
||||
HasLog = EIGEN_HAS_AVX512_MATH,
|
||||
HasLog1p = EIGEN_HAS_AVX512_MATH,
|
||||
HasExp = EIGEN_HAS_AVX512_MATH,
|
||||
HasExpm1 = EIGEN_HAS_AVX512_MATH,
|
||||
HasSqrt = EIGEN_HAS_AVX512_MATH,
|
||||
HasRsqrt = EIGEN_HAS_AVX512_MATH,
|
||||
HasBessel = 0, // EIGEN_HAS_AVX512_MATH,
|
||||
HasNdtri = 0, // EIGEN_HAS_AVX512_MATH,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasErf = 0, // EIGEN_FAST_MATH,
|
||||
HasBlend = 0,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
HasRint = 1
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<Packet32h> {
|
||||
typedef Eigen::half type;
|
||||
typedef Packet16h half;
|
||||
enum {
|
||||
size = 32,
|
||||
alignment = Aligned64,
|
||||
vectorizable = true,
|
||||
masked_load_available = false,
|
||||
masked_store_available = false
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<Packet16h> {
|
||||
typedef Eigen::half type;
|
||||
typedef Packet8h half;
|
||||
enum {
|
||||
size = 16,
|
||||
alignment = Aligned32,
|
||||
vectorizable = true,
|
||||
masked_load_available = false,
|
||||
masked_store_available = false
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<Packet8h> {
|
||||
typedef Eigen::half type;
|
||||
typedef Packet8h half;
|
||||
enum {
|
||||
size = 8,
|
||||
alignment = Aligned16,
|
||||
vectorizable = true,
|
||||
masked_load_available = false,
|
||||
masked_store_available = false
|
||||
};
|
||||
};
|
||||
|
||||
// Memory functions
|
||||
|
||||
// pset1
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {
|
||||
return _mm512_set1_ph(static_cast<_Float16>(from));
|
||||
}
|
||||
|
||||
// pset1frombits
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {
|
||||
return _mm512_castsi512_ph(_mm512_set1_epi16(from));
|
||||
}
|
||||
|
||||
// pfirst
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
return half_impl::raw_uint16_to_half(
|
||||
static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));
|
||||
#else
|
||||
Eigen::half dest[32];
|
||||
_mm512_storeu_ph(dest, from);
|
||||
return dest[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
// pload
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);
|
||||
}
|
||||
|
||||
// ploadu
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);
|
||||
}
|
||||
|
||||
// pstore
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);
|
||||
}
|
||||
|
||||
// pstoreu
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);
|
||||
}
|
||||
|
||||
// ploaddup
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {
|
||||
__m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));
|
||||
return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,
|
||||
5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),
|
||||
a);
|
||||
}
|
||||
|
||||
// ploadquad
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {
|
||||
__m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));
|
||||
return _mm512_permutexvar_ph(
|
||||
_mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),
|
||||
a);
|
||||
}
|
||||
|
||||
// pabs
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {
|
||||
return _mm512_abs_ph(a);
|
||||
}
|
||||
|
||||
// psignbit
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {
|
||||
return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));
|
||||
}
|
||||
|
||||
// pmin
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {
|
||||
return _mm512_min_ph(a, b);
|
||||
}
|
||||
|
||||
// pmax
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {
|
||||
return _mm512_max_ph(a, b);
|
||||
}
|
||||
|
||||
// plset
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {
|
||||
return _mm512_add_ph(_mm512_set1_ph(a),
|
||||
_mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f,
|
||||
19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f,
|
||||
7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
|
||||
}
|
||||
|
||||
// por
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {
|
||||
return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
|
||||
}
|
||||
|
||||
// pxor
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {
|
||||
return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
|
||||
}
|
||||
|
||||
// pand
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {
|
||||
return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
|
||||
}
|
||||
|
||||
// pandnot
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {
|
||||
return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));
|
||||
}
|
||||
|
||||
// pselect
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {
|
||||
__mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
|
||||
return _mm512_mask_blend_ph(mask32, a, b);
|
||||
}
|
||||
|
||||
// pcmp_eq
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {
|
||||
__mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);
|
||||
return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
|
||||
}
|
||||
|
||||
// pcmp_le
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {
|
||||
__mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);
|
||||
return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
|
||||
}
|
||||
|
||||
// pcmp_lt
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {
|
||||
__mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);
|
||||
return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
|
||||
}
|
||||
|
||||
// pcmp_lt_or_nan
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {
|
||||
__mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);
|
||||
return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, 0xffffu));
|
||||
}
|
||||
|
||||
// padd
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {
|
||||
return _mm512_add_ph(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
|
||||
return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
||||
return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
|
||||
}
|
||||
|
||||
// psub
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {
|
||||
return _mm512_sub_ph(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {
|
||||
return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
||||
return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
|
||||
}
|
||||
|
||||
// pmul
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {
|
||||
return _mm512_mul_ph(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
|
||||
return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
||||
return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
|
||||
}
|
||||
|
||||
// pdiv
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {
|
||||
return _mm512_div_ph(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {
|
||||
return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
||||
return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
|
||||
}
|
||||
|
||||
// pround
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {
|
||||
// Work-around for default std::round rounding mode.
|
||||
|
||||
// Mask for the sign bit
|
||||
const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));
|
||||
// The largest half-preicision float less than 0.5
|
||||
const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));
|
||||
|
||||
return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);
|
||||
}
|
||||
|
||||
// print
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {
|
||||
return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
// pceil
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {
|
||||
return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);
|
||||
}
|
||||
|
||||
// pfloor
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {
|
||||
return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);
|
||||
}
|
||||
|
||||
// predux
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {
|
||||
return (half)_mm512_reduce_add_ph(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {
|
||||
return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {
|
||||
return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));
|
||||
}
|
||||
|
||||
// predux_half_dowto4
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
__m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));
|
||||
__m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));
|
||||
|
||||
return Packet16h(padd<Packet16h>(lowHalf, highHalf));
|
||||
#else
|
||||
Eigen::half data[32];
|
||||
_mm512_storeu_ph(data, a);
|
||||
|
||||
__m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));
|
||||
__m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));
|
||||
|
||||
return Packet16h(padd<Packet16h>(lowHalf, highHalf));
|
||||
#endif
|
||||
}
|
||||
|
||||
// predux_max
|
||||
|
||||
// predux_min
|
||||
|
||||
// predux_mul
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
|
||||
// pmadd
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
|
||||
return _mm512_fmadd_ph(a, b, c);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
|
||||
return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
|
||||
return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
|
||||
}
|
||||
|
||||
// pmsub
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
|
||||
return _mm512_fmsub_ph(a, b, c);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
|
||||
return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
|
||||
return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
|
||||
}
|
||||
|
||||
// pnmadd
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
|
||||
return _mm512_fnmadd_ph(a, b, c);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
|
||||
return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
|
||||
return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
|
||||
}
|
||||
|
||||
// pnmsub
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
|
||||
return _mm512_fnmsub_ph(a, b, c);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
|
||||
return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
|
||||
return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// pnegate
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {
|
||||
return _mm512_sub_ph(_mm512_set1_ph(0.0), a);
|
||||
}
|
||||
|
||||
// pconj
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
// psqrt
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {
|
||||
return _mm512_sqrt_ph(a);
|
||||
}
|
||||
|
||||
// prsqrt
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {
|
||||
return _mm512_rsqrt_ph(a);
|
||||
}
|
||||
|
||||
// preciprocal
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {
|
||||
return _mm512_rcp_ph(a);
|
||||
}
|
||||
|
||||
// ptranspose
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {
|
||||
__m512i t[32];
|
||||
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < 16; i++) {
|
||||
t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
|
||||
t[2 * i + 1] =
|
||||
_mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
|
||||
}
|
||||
|
||||
__m512i p[32];
|
||||
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < 8; i++) {
|
||||
p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);
|
||||
p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);
|
||||
p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);
|
||||
p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);
|
||||
}
|
||||
|
||||
__m512i q[32];
|
||||
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < 4; i++) {
|
||||
q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);
|
||||
q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);
|
||||
q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);
|
||||
q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);
|
||||
q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);
|
||||
q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);
|
||||
q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);
|
||||
q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);
|
||||
}
|
||||
|
||||
__m512i f[32];
|
||||
|
||||
#define PACKET32H_TRANSPOSE_HELPER(X, Y) \
|
||||
do { \
|
||||
f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X); \
|
||||
f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \
|
||||
f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \
|
||||
f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \
|
||||
f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \
|
||||
f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \
|
||||
f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \
|
||||
f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \
|
||||
} while (false);
|
||||
|
||||
PACKET32H_TRANSPOSE_HELPER(0, 0);
|
||||
PACKET32H_TRANSPOSE_HELPER(1, 1);
|
||||
PACKET32H_TRANSPOSE_HELPER(2, 2);
|
||||
PACKET32H_TRANSPOSE_HELPER(3, 3);
|
||||
|
||||
PACKET32H_TRANSPOSE_HELPER(1, 0);
|
||||
PACKET32H_TRANSPOSE_HELPER(2, 0);
|
||||
PACKET32H_TRANSPOSE_HELPER(3, 0);
|
||||
PACKET32H_TRANSPOSE_HELPER(2, 1);
|
||||
PACKET32H_TRANSPOSE_HELPER(3, 1);
|
||||
PACKET32H_TRANSPOSE_HELPER(3, 2);
|
||||
|
||||
PACKET32H_TRANSPOSE_HELPER(0, 1);
|
||||
PACKET32H_TRANSPOSE_HELPER(0, 2);
|
||||
PACKET32H_TRANSPOSE_HELPER(0, 3);
|
||||
PACKET32H_TRANSPOSE_HELPER(1, 2);
|
||||
PACKET32H_TRANSPOSE_HELPER(1, 3);
|
||||
PACKET32H_TRANSPOSE_HELPER(2, 3);
|
||||
|
||||
#undef PACKET32H_TRANSPOSE_HELPER
|
||||
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < 32; i++) {
|
||||
a.packet[i] = _mm512_castsi512_ph(f[i]);
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {
|
||||
__m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;
|
||||
t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
|
||||
t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
|
||||
t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
|
||||
t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
|
||||
|
||||
p0 = _mm512_unpacklo_epi32(t0, t2);
|
||||
p1 = _mm512_unpackhi_epi32(t0, t2);
|
||||
p2 = _mm512_unpacklo_epi32(t1, t3);
|
||||
p3 = _mm512_unpackhi_epi32(t1, t3);
|
||||
|
||||
a0 = p0;
|
||||
a1 = p1;
|
||||
a2 = p2;
|
||||
a3 = p3;
|
||||
|
||||
a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);
|
||||
a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);
|
||||
|
||||
a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);
|
||||
a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);
|
||||
|
||||
a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);
|
||||
a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);
|
||||
|
||||
a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);
|
||||
a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);
|
||||
|
||||
a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);
|
||||
a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);
|
||||
|
||||
a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);
|
||||
a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);
|
||||
|
||||
a.packet[0] = _mm512_castsi512_ph(a0);
|
||||
a.packet[1] = _mm512_castsi512_ph(a1);
|
||||
a.packet[2] = _mm512_castsi512_ph(a2);
|
||||
a.packet[3] = _mm512_castsi512_ph(a3);
|
||||
}
|
||||
|
||||
// preverse
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {
|
||||
return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
|
||||
a);
|
||||
}
|
||||
|
||||
// pscatter
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {
|
||||
EIGEN_ALIGN64 half aux[32];
|
||||
pstore(aux, from);
|
||||
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int i = 0; i < 32; i++) {
|
||||
to[stride * i] = aux[i];
|
||||
}
|
||||
}
|
||||
|
||||
// pgather
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {
|
||||
return _mm512_castsi512_ph(_mm512_set_epi16(
|
||||
from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,
|
||||
from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,
|
||||
from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,
|
||||
from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,
|
||||
from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,
|
||||
from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,
|
||||
from[1 * stride].x, from[0 * stride].x));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);
|
||||
|
||||
EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {
|
||||
__m512d result = _mm512_undefined_pd();
|
||||
result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);
|
||||
result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);
|
||||
return _mm512_castpd_ph(result);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {
|
||||
a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));
|
||||
b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));
|
||||
}
|
||||
|
||||
// psin
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {
|
||||
Packet16h low;
|
||||
Packet16h high;
|
||||
extract2Packet16h(a, low, high);
|
||||
|
||||
Packet16h lowOut = psin(low);
|
||||
Packet16h highOut = psin(high);
|
||||
|
||||
return combine2Packet16h(lowOut, highOut);
|
||||
}
|
||||
|
||||
// pcos
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {
|
||||
Packet16h low;
|
||||
Packet16h high;
|
||||
extract2Packet16h(a, low, high);
|
||||
|
||||
Packet16h lowOut = pcos(low);
|
||||
Packet16h highOut = pcos(high);
|
||||
|
||||
return combine2Packet16h(lowOut, highOut);
|
||||
}
|
||||
|
||||
// plog
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {
|
||||
Packet16h low;
|
||||
Packet16h high;
|
||||
extract2Packet16h(a, low, high);
|
||||
|
||||
Packet16h lowOut = plog(low);
|
||||
Packet16h highOut = plog(high);
|
||||
|
||||
return combine2Packet16h(lowOut, highOut);
|
||||
}
|
||||
|
||||
// plog2
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {
|
||||
Packet16h low;
|
||||
Packet16h high;
|
||||
extract2Packet16h(a, low, high);
|
||||
|
||||
Packet16h lowOut = plog2(low);
|
||||
Packet16h highOut = plog2(high);
|
||||
|
||||
return combine2Packet16h(lowOut, highOut);
|
||||
}
|
||||
|
||||
// plog1p
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {
|
||||
Packet16h low;
|
||||
Packet16h high;
|
||||
extract2Packet16h(a, low, high);
|
||||
|
||||
Packet16h lowOut = plog1p(low);
|
||||
Packet16h highOut = plog1p(high);
|
||||
|
||||
return combine2Packet16h(lowOut, highOut);
|
||||
}
|
||||
|
||||
// pexp
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {
|
||||
Packet16h low;
|
||||
Packet16h high;
|
||||
extract2Packet16h(a, low, high);
|
||||
|
||||
Packet16h lowOut = pexp(low);
|
||||
Packet16h highOut = pexp(high);
|
||||
|
||||
return combine2Packet16h(lowOut, highOut);
|
||||
}
|
||||
|
||||
// pexpm1
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {
|
||||
Packet16h low;
|
||||
Packet16h high;
|
||||
extract2Packet16h(a, low, high);
|
||||
|
||||
Packet16h lowOut = pexpm1(low);
|
||||
Packet16h highOut = pexpm1(high);
|
||||
|
||||
return combine2Packet16h(lowOut, highOut);
|
||||
}
|
||||
|
||||
// ptanh
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {
|
||||
Packet16h low;
|
||||
Packet16h high;
|
||||
extract2Packet16h(a, low, high);
|
||||
|
||||
Packet16h lowOut = ptanh(low);
|
||||
Packet16h highOut = ptanh(high);
|
||||
|
||||
return combine2Packet16h(lowOut, highOut);
|
||||
}
|
||||
|
||||
// pfrexp
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {
|
||||
Packet16h low;
|
||||
Packet16h high;
|
||||
extract2Packet16h(a, low, high);
|
||||
|
||||
Packet16h exp1 = _mm256_undefined_si256();
|
||||
Packet16h exp2 = _mm256_undefined_si256();
|
||||
|
||||
Packet16h lowOut = pfrexp(low, exp1);
|
||||
Packet16h highOut = pfrexp(high, exp2);
|
||||
|
||||
exponent = combine2Packet16h(exp1, exp2);
|
||||
|
||||
return combine2Packet16h(lowOut, highOut);
|
||||
}
|
||||
|
||||
// pldexp
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {
|
||||
Packet16h low;
|
||||
Packet16h high;
|
||||
extract2Packet16h(a, low, high);
|
||||
|
||||
Packet16h exp1;
|
||||
Packet16h exp2;
|
||||
extract2Packet16h(exponent, exp1, exp2);
|
||||
|
||||
Packet16h lowOut = pldexp(low, exp1);
|
||||
Packet16h highOut = pldexp(high, exp2);
|
||||
|
||||
return combine2Packet16h(lowOut, highOut);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_FP16_AVX512_H
|
||||
1185
libs/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h
Normal file
1185
libs/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h
Normal file
File diff suppressed because it is too large
Load Diff
1212
libs/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc
Normal file
1212
libs/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc
Normal file
File diff suppressed because it is too large
Load Diff
@@ -10,6 +10,8 @@
|
||||
#ifndef EIGEN_TYPE_CASTING_AVX512_H
|
||||
#define EIGEN_TYPE_CASTING_AVX512_H
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -30,6 +32,56 @@ template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet16i>(cons
|
||||
return _mm512_castsi512_ps(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet16f>(const Packet16f& a) {
|
||||
return _mm512_castps_pd(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet8d>(const Packet8d& a) {
|
||||
return _mm512_castpd_ps(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f, Packet16f>(const Packet16f& a) {
|
||||
return _mm512_castps512_ps256(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet16f>(const Packet16f& a) {
|
||||
return _mm512_castps512_ps128(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4d preinterpret<Packet4d, Packet8d>(const Packet8d& a) {
|
||||
return _mm512_castpd512_pd256(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet8d>(const Packet8d& a) {
|
||||
return _mm512_castpd512_pd128(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet8f>(const Packet8f& a) {
|
||||
return _mm512_castps256_ps512(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet4f>(const Packet4f& a) {
|
||||
return _mm512_castps128_ps512(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet4d>(const Packet4d& a) {
|
||||
return _mm512_castpd256_pd512(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet2d>(const Packet2d& a) {
|
||||
return _mm512_castpd128_pd512(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet16f>(const Packet16f& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet8d>(const Packet8d& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<half, float> {
|
||||
enum {
|
||||
@@ -56,6 +108,8 @@ template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packe
|
||||
return float2half(a);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<bfloat16, float> {
|
||||
enum {
|
||||
@@ -82,6 +136,77 @@ template<> EIGEN_STRONG_INLINE Packet16bf pcast<Packet16f, Packet16bf>(const Pac
|
||||
return F32ToBf16(a);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX512FP16
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<half, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 2
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, half> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 2,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f pcast<Packet32h, Packet16f>(const Packet32h& a) {
|
||||
// Discard second-half of input.
|
||||
Packet16h low = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(a), 0));
|
||||
return _mm512_cvtxph_ps(_mm256_castsi256_ph(low));
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet32h pcast<Packet16f, Packet32h>(const Packet16f& a, const Packet16f& b) {
|
||||
__m512d result = _mm512_undefined_pd();
|
||||
result = _mm512_insertf64x4(result, _mm256_castsi256_pd(_mm512_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 0);
|
||||
result = _mm512_insertf64x4(result, _mm256_castsi256_pd(_mm512_cvtps_ph(b, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 1);
|
||||
return _mm512_castpd_ph(result);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8f pcast<Packet16h, Packet8f>(const Packet16h& a) {
|
||||
// Discard second-half of input.
|
||||
Packet8h low = _mm_castps_si128(_mm256_extractf32x4_ps(_mm256_castsi256_ps(a), 0));
|
||||
return _mm256_cvtxph_ps(_mm_castsi128_ph(low));
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pcast<Packet8f, Packet16h>(const Packet8f& a, const Packet8f& b) {
|
||||
__m256d result = _mm256_undefined_pd();
|
||||
result = _mm256_insertf64x2(result, _mm_castsi128_pd(_mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 0);
|
||||
result = _mm256_insertf64x2(result, _mm_castsi128_pd(_mm256_cvtps_ph(b, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 1);
|
||||
return _mm256_castpd_si256(result);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pcast<Packet8h, Packet4f>(const Packet8h& a) {
|
||||
Packet8f full = _mm256_cvtxph_ps(_mm_castsi128_ph(a));
|
||||
// Discard second-half of input.
|
||||
return _mm256_extractf32x4_ps(full, 0);
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pcast<Packet4f, Packet8h>(const Packet4f& a, const Packet4f& b) {
|
||||
__m256 result = _mm256_undefined_ps();
|
||||
result = _mm256_insertf128_ps(result, a, 0);
|
||||
result = _mm256_insertf128_ps(result, b, 1);
|
||||
return _mm256_cvtps_ph(result, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef EIGEN_COMPLEX32_ALTIVEC_H
|
||||
#define EIGEN_COMPLEX32_ALTIVEC_H
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@@ -100,6 +102,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSqrt = 1,
|
||||
#ifdef __VSX__
|
||||
HasBlend = 1,
|
||||
#endif
|
||||
@@ -112,53 +115,99 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
#ifdef __VSX__
|
||||
// Load a single std::complex<float> from memory and duplicate
|
||||
//
|
||||
// Using pload would read past the end of the reference in this case
|
||||
// Using vec_xl_len + vec_splat, generates poor assembly
|
||||
__asm__ ("lxvdsx %x0,%y1" : "=wa" (res.v) : "Z" (from));
|
||||
#else
|
||||
if((std::ptrdiff_t(&from) % 16) == 0)
|
||||
res.v = pload<Packet4f>((const float *)&from);
|
||||
else
|
||||
res.v = ploadu<Packet4f>((const float *)&from);
|
||||
res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { return Packet2cf(pload<Packet4f>((const float *) from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { return Packet2cf(ploadu<Packet4f>((const float*) from)); }
|
||||
template<> EIGEN_ALWAYS_INLINE Packet2cf pload_partial<Packet2cf>(const std::complex<float>* from, const Index n, const Index offset)
|
||||
{
|
||||
return Packet2cf(pload_partial<Packet4f>((const float *) from, n * 2, offset * 2));
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE Packet2cf ploadu_partial<Packet2cf>(const std::complex<float>* from, const Index n)
|
||||
{
|
||||
return Packet2cf(ploadu_partial<Packet4f>((const float*) from, n * 2));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
|
||||
template<> EIGEN_ALWAYS_INLINE void pstore_partial <std::complex<float> >(std::complex<float> * to, const Packet2cf& from, const Index n, const Index offset) { pstore_partial((float*)to, from.v, n * 2, offset * 2); }
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<std::complex<float> >(std::complex<float> * to, const Packet2cf& from, const Index n) { pstoreu_partial((float*)to, from.v, n * 2); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>* from0, const std::complex<float>* from1)
|
||||
EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>& from0, const std::complex<float>& from1)
|
||||
{
|
||||
Packet4f res0, res1;
|
||||
#ifdef __VSX__
|
||||
__asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (*from0));
|
||||
__asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (*from1));
|
||||
// Load two std::complex<float> from memory and combine
|
||||
__asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (from0));
|
||||
__asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (from1));
|
||||
#ifdef _BIG_ENDIAN
|
||||
__asm__ ("xxpermdi %x0, %x1, %x2, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
|
||||
#else
|
||||
__asm__ ("xxpermdi %x0, %x2, %x1, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
|
||||
#endif
|
||||
#else
|
||||
*reinterpret_cast<std::complex<float> *>(&res0) = *from0;
|
||||
*reinterpret_cast<std::complex<float> *>(&res1) = *from1;
|
||||
*reinterpret_cast<std::complex<float> *>(&res0) = from0;
|
||||
*reinterpret_cast<std::complex<float> *>(&res1) = from1;
|
||||
res0 = vec_perm(res0, res1, p16uc_TRANSPOSE64_HI);
|
||||
#endif
|
||||
return Packet2cf(res0);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
template<> EIGEN_ALWAYS_INLINE Packet2cf pload_ignore<Packet2cf>(const std::complex<float>* from)
|
||||
{
|
||||
EIGEN_ALIGN16 std::complex<float> af[2];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
return pload<Packet2cf>(af);
|
||||
Packet2cf res;
|
||||
res.v = pload_ignore<Packet4f>(reinterpret_cast<const float*>(from));
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pgather_complex_size2(const Scalar* from, Index stride, const Index n = 2)
|
||||
{
|
||||
EIGEN_ALIGN16 std::complex<float> af[2];
|
||||
pstore<std::complex<float> >((std::complex<float> *) af, from);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
eigen_assert(n <= unpacket_traits<Packet>::size && "number of elements will gather past end of packet");
|
||||
EIGEN_ALIGN16 Scalar af[2];
|
||||
for (Index i = 0; i < n; i++) {
|
||||
af[i] = from[i*stride];
|
||||
}
|
||||
return pload_ignore<Packet>(af);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
return pgather_complex_size2<std::complex<float>, Packet2cf>(from, stride);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet2cf pgather_partial<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride, const Index n)
|
||||
{
|
||||
return pgather_complex_size2<std::complex<float>, Packet2cf>(from, stride, n);
|
||||
}
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_complex_size2(Scalar* to, const Packet& from, Index stride, const Index n = 2)
|
||||
{
|
||||
eigen_assert(n <= unpacket_traits<Packet>::size && "number of elements will scatter past end of packet");
|
||||
EIGEN_ALIGN16 Scalar af[2];
|
||||
pstore<Scalar>((Scalar *) af, from);
|
||||
for (Index i = 0; i < n; i++) {
|
||||
to[i*stride] = af[i];
|
||||
}
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
||||
{
|
||||
pscatter_complex_size2<std::complex<float>, Packet2cf>(to, from, stride);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride, const Index n)
|
||||
{
|
||||
pscatter_complex_size2<std::complex<float>, Packet2cf>(to, from, stride, n);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
|
||||
@@ -184,7 +233,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Pack
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
||||
{
|
||||
Packet4f rev_a;
|
||||
rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
|
||||
rev_a = vec_sld(a.v, a.v, 8);
|
||||
return Packet2cf(rev_a);
|
||||
}
|
||||
|
||||
@@ -210,10 +259,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet2cf res = pmul(a, pconj(b));
|
||||
Packet4f s = pmul<Packet4f>(b.v, b.v);
|
||||
return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
|
||||
return pdiv_complex(a, b);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
|
||||
@@ -223,8 +269,13 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
||||
{
|
||||
#ifdef __VSX__
|
||||
Packet4f tmp = reinterpret_cast<Packet4f>(vec_mergeh(reinterpret_cast<Packet2d>(kernel.packet[0].v), reinterpret_cast<Packet2d>(kernel.packet[1].v)));
|
||||
kernel.packet[1].v = reinterpret_cast<Packet4f>(vec_mergel(reinterpret_cast<Packet2d>(kernel.packet[0].v), reinterpret_cast<Packet2d>(kernel.packet[1].v)));
|
||||
#else
|
||||
Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
|
||||
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
||||
#endif
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
@@ -320,6 +371,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSqrt = 1,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
@@ -328,17 +380,35 @@ template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_ALWAYS_INLINE Packet1cd pload_partial<Packet1cd>(const std::complex<double>* from, const Index n, const Index offset)
|
||||
{
|
||||
return Packet1cd(pload_partial<Packet2d>((const double*)from, n * 2, offset * 2));
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE Packet1cd ploadu_partial<Packet1cd>(const std::complex<double>* from, const Index n)
|
||||
{
|
||||
return Packet1cd(ploadu_partial<Packet2d>((const double*)from, n * 2));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
|
||||
template<> EIGEN_ALWAYS_INLINE void pstore_partial <std::complex<double> >(std::complex<double> * to, const Packet1cd& from, const Index n, const Index offset) { pstore_partial((double*)to, from.v, n * 2, offset * 2); }
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<std::complex<double> >(std::complex<double> * to, const Packet1cd& from, const Index n) { pstoreu_partial((double*)to, from.v, n * 2); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
||||
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index)
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index)
|
||||
{
|
||||
return pload<Packet1cd>(from);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index)
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet1cd pgather_partial<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index, const Index)
|
||||
{
|
||||
return pload<Packet1cd>(from);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index)
|
||||
{
|
||||
pstore<std::complex<double> >(to, from);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index, const Index)
|
||||
{
|
||||
pstore<std::complex<double> >(to, from);
|
||||
}
|
||||
@@ -359,7 +429,7 @@ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::c
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
EIGEN_ALIGN16 std::complex<double> res[2];
|
||||
EIGEN_ALIGN16 std::complex<double> res[1];
|
||||
pstore<std::complex<double> >(res, a);
|
||||
|
||||
return res[0];
|
||||
@@ -375,10 +445,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet1cd res = pmul(a,pconj(b));
|
||||
Packet2d s = pmul<Packet2d>(b.v, b.v);
|
||||
return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
|
||||
return pdiv_complex(a, b);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
@@ -388,8 +455,8 @@ EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
||||
{
|
||||
Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
|
||||
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
||||
Packet2d tmp = vec_mergeh(kernel.packet[0].v, kernel.packet[1].v);
|
||||
kernel.packet[1].v = vec_mergel(kernel.packet[0].v, kernel.packet[1].v);
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,73 +12,117 @@
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return plog_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pexp_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f psin<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return psin_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pcos<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pcos_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pacos<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pacos_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pasin<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pasin_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f patan<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
#ifdef __VSX__
|
||||
#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
return vec_rsqrt(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __VSX__
|
||||
#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return vec_rsqrt(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d patan<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
return patan_double(_x);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
return vec_sqrt(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d psqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return vec_sqrt(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
return pexp_double(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(psqrt<Packet4f>, a);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
Packet4f a;
|
||||
for (Index i = 0; i < packet_traits<float>::size; i++) {
|
||||
a[i] = numext::sqrt(x[i]);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
|
||||
ptanh<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_tanh_float(x);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,33 +5,41 @@
|
||||
#define EIGEN_POWER_PREFETCH(p)
|
||||
#endif
|
||||
|
||||
#ifdef _ARCH_PWR9
|
||||
#define USE_PARTIAL_PACKETS
|
||||
#endif
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
|
||||
EIGEN_STRONG_INLINE void gemm_extra_col(
|
||||
template<typename Scalar, typename Packet, typename DataMapper, const Index accRows, const Index accCols>
|
||||
EIGEN_ALWAYS_INLINE void gemm_extra_row(
|
||||
const DataMapper& res,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
Index depth,
|
||||
Index strideA,
|
||||
Index offsetA,
|
||||
Index strideB,
|
||||
Index row,
|
||||
Index col,
|
||||
Index rows,
|
||||
Index remaining_rows,
|
||||
Index remaining_cols,
|
||||
const Packet& pAlpha);
|
||||
const Packet& pAlpha,
|
||||
const Packet& pMask);
|
||||
|
||||
template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows, const Index accCols>
|
||||
EIGEN_STRONG_INLINE void gemm_extra_row(
|
||||
template<typename Scalar, typename Packet, typename DataMapper, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||
EIGEN_STRONG_INLINE void gemm_extra_cols(
|
||||
const DataMapper& res,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
const Scalar* blockA,
|
||||
const Scalar* blockB,
|
||||
Index depth,
|
||||
Index strideA,
|
||||
Index offsetA,
|
||||
Index row,
|
||||
Index strideB,
|
||||
Index offsetB,
|
||||
Index col,
|
||||
Index rows,
|
||||
Index cols,
|
||||
@@ -39,25 +47,11 @@ EIGEN_STRONG_INLINE void gemm_extra_row(
|
||||
const Packet& pAlpha,
|
||||
const Packet& pMask);
|
||||
|
||||
template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>
|
||||
EIGEN_STRONG_INLINE void gemm_unrolled_col(
|
||||
const DataMapper& res,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
Index depth,
|
||||
Index strideA,
|
||||
Index offsetA,
|
||||
Index& row,
|
||||
Index rows,
|
||||
Index col,
|
||||
Index remaining_cols,
|
||||
const Packet& pAlpha);
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows);
|
||||
EIGEN_ALWAYS_INLINE Packet bmask(const Index remaining_rows);
|
||||
|
||||
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||
EIGEN_STRONG_INLINE void gemm_complex_extra_col(
|
||||
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||
EIGEN_ALWAYS_INLINE void gemm_complex_extra_row(
|
||||
const DataMapper& res,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
@@ -66,22 +60,22 @@ EIGEN_STRONG_INLINE void gemm_complex_extra_col(
|
||||
Index offsetA,
|
||||
Index strideB,
|
||||
Index row,
|
||||
Index col,
|
||||
Index rows,
|
||||
Index remaining_rows,
|
||||
Index remaining_cols,
|
||||
const Packet& pAlphaReal,
|
||||
const Packet& pAlphaImag);
|
||||
const Packet& pAlphaImag,
|
||||
const Packet& pMask);
|
||||
|
||||
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||
EIGEN_STRONG_INLINE void gemm_complex_extra_row(
|
||||
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||
EIGEN_STRONG_INLINE void gemm_complex_extra_cols(
|
||||
const DataMapper& res,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
const Scalar* blockA,
|
||||
const Scalar* blockB,
|
||||
Index depth,
|
||||
Index strideA,
|
||||
Index offsetA,
|
||||
Index strideB,
|
||||
Index row,
|
||||
Index offsetB,
|
||||
Index col,
|
||||
Index rows,
|
||||
Index cols,
|
||||
@@ -90,132 +84,133 @@ EIGEN_STRONG_INLINE void gemm_complex_extra_row(
|
||||
const Packet& pAlphaImag,
|
||||
const Packet& pMask);
|
||||
|
||||
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||
EIGEN_STRONG_INLINE void gemm_complex_unrolled_col(
|
||||
const DataMapper& res,
|
||||
const Scalar* lhs_base,
|
||||
const Scalar* rhs_base,
|
||||
Index depth,
|
||||
Index strideA,
|
||||
Index offsetA,
|
||||
Index strideB,
|
||||
Index& row,
|
||||
Index rows,
|
||||
Index col,
|
||||
Index remaining_cols,
|
||||
const Packet& pAlphaReal,
|
||||
const Packet& pAlphaImag);
|
||||
|
||||
template<typename Scalar, typename Packet>
|
||||
EIGEN_ALWAYS_INLINE Packet ploadLhs(const Scalar* lhs);
|
||||
|
||||
template<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>
|
||||
EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,4>& acc, const DataMapper& res, Index row, Index col);
|
||||
|
||||
template<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>
|
||||
EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,8>& acc, const DataMapper& res, Index row, Index col);
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,4>& acc, PacketBlock<Packet,4>& accZ, const Packet& pAlpha);
|
||||
EIGEN_ALWAYS_INLINE Packet ploadLhs(const __UNPACK_TYPE__(Packet)* lhs);
|
||||
|
||||
template<typename DataMapper, typename Packet, const Index accCols, int StorageOrder, bool Complex, int N, bool full = true>
|
||||
EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,N*(Complex?2:1)>& acc, const DataMapper& res, Index row, Index col);
|
||||
|
||||
template<typename DataMapper, typename Packet, int N>
|
||||
EIGEN_ALWAYS_INLINE void bstore(PacketBlock<Packet,N>& acc, const DataMapper& res, Index row);
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
template<typename DataMapper, typename Packet, const Index accCols, bool Complex, Index N, bool full = true>
|
||||
EIGEN_ALWAYS_INLINE void bload_partial(PacketBlock<Packet,N*(Complex?2:1)>& acc, const DataMapper& res, Index row, Index elements);
|
||||
|
||||
template<typename DataMapper, typename Packet, Index N>
|
||||
EIGEN_ALWAYS_INLINE void bstore_partial(PacketBlock<Packet,N>& acc, const DataMapper& res, Index row, Index elements);
|
||||
#endif
|
||||
|
||||
template<typename Packet, int N>
|
||||
EIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag);
|
||||
EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha);
|
||||
|
||||
const static Packet16uc p16uc_SETCOMPLEX32_FIRST = { 0, 1, 2, 3,
|
||||
16, 17, 18, 19,
|
||||
4, 5, 6, 7,
|
||||
20, 21, 22, 23};
|
||||
template<typename Packet, int N, bool mask>
|
||||
EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha, const Packet& pMask);
|
||||
|
||||
const static Packet16uc p16uc_SETCOMPLEX32_SECOND = { 8, 9, 10, 11,
|
||||
24, 25, 26, 27,
|
||||
12, 13, 14, 15,
|
||||
28, 29, 30, 31};
|
||||
//[a,b],[ai,bi] = [a,ai] - This is equivalent to p16uc_GETREAL64
|
||||
const static Packet16uc p16uc_SETCOMPLEX64_FIRST = { 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
16, 17, 18, 19, 20, 21, 22, 23};
|
||||
template<typename Packet, int N, bool mask>
|
||||
EIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag, const Packet& pMask);
|
||||
|
||||
//[a,b],[ai,bi] = [b,bi] - This is equivalent to p16uc_GETIMAG64
|
||||
const static Packet16uc p16uc_SETCOMPLEX64_SECOND = { 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
24, 25, 26, 27, 28, 29, 30, 31};
|
||||
template<typename Packet, typename Packetc, int N, bool full>
|
||||
EIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,N>& taccReal, PacketBlock<Packet,N>& taccImag, PacketBlock<Packetc,N*2>& tRes, PacketBlock<Packetc, N>& acc1, PacketBlock<Packetc, N>& acc2);
|
||||
|
||||
#define MICRO_NORMAL(iter) \
|
||||
(accCols == accCols2) || (unroll_factor != (iter + 1))
|
||||
|
||||
// Grab two decouples real/imaginary PacketBlocks and return two coupled (real/imaginary pairs) PacketBlocks.
|
||||
template<typename Packet, typename Packetc>
|
||||
EIGEN_ALWAYS_INLINE void bcouple_common(PacketBlock<Packet,4>& taccReal, PacketBlock<Packet,4>& taccImag, PacketBlock<Packetc, 4>& acc1, PacketBlock<Packetc, 4>& acc2)
|
||||
{
|
||||
acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_FIRST);
|
||||
acc1.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX32_FIRST);
|
||||
acc1.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX32_FIRST);
|
||||
acc1.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX32_FIRST);
|
||||
#define MICRO_UNROLL_ITER1(func, N) \
|
||||
switch (remaining_rows) { \
|
||||
default: \
|
||||
func(N, 0) \
|
||||
break; \
|
||||
case 1: \
|
||||
func(N, 1) \
|
||||
break; \
|
||||
case 2: \
|
||||
if (sizeof(Scalar) == sizeof(float)) { \
|
||||
func(N, 2) \
|
||||
} \
|
||||
break; \
|
||||
case 3: \
|
||||
if (sizeof(Scalar) == sizeof(float)) { \
|
||||
func(N, 3) \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
|
||||
acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_SECOND);
|
||||
acc2.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX32_SECOND);
|
||||
acc2.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX32_SECOND);
|
||||
acc2.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX32_SECOND);
|
||||
}
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
#define MICRO_UNROLL_ITER(func, N) \
|
||||
if (remaining_rows) { \
|
||||
func(N, true); \
|
||||
} else { \
|
||||
func(N, false); \
|
||||
}
|
||||
|
||||
template<typename Packet, typename Packetc>
|
||||
EIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,4>& taccReal, PacketBlock<Packet,4>& taccImag, PacketBlock<Packetc,8>& tRes, PacketBlock<Packetc, 4>& acc1, PacketBlock<Packetc, 4>& acc2)
|
||||
{
|
||||
bcouple_common<Packet, Packetc>(taccReal, taccImag, acc1, acc2);
|
||||
#define MICRO_NORMAL_PARTIAL(iter) \
|
||||
full || (unroll_factor != (iter + 1))
|
||||
#else
|
||||
#define MICRO_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N)
|
||||
#endif
|
||||
|
||||
acc1.packet[0] = padd<Packetc>(tRes.packet[0], acc1.packet[0]);
|
||||
acc1.packet[1] = padd<Packetc>(tRes.packet[1], acc1.packet[1]);
|
||||
acc1.packet[2] = padd<Packetc>(tRes.packet[2], acc1.packet[2]);
|
||||
acc1.packet[3] = padd<Packetc>(tRes.packet[3], acc1.packet[3]);
|
||||
#define MICRO_COMPLEX_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N)
|
||||
|
||||
acc2.packet[0] = padd<Packetc>(tRes.packet[4], acc2.packet[0]);
|
||||
acc2.packet[1] = padd<Packetc>(tRes.packet[5], acc2.packet[1]);
|
||||
acc2.packet[2] = padd<Packetc>(tRes.packet[6], acc2.packet[2]);
|
||||
acc2.packet[3] = padd<Packetc>(tRes.packet[7], acc2.packet[3]);
|
||||
}
|
||||
#define MICRO_NORMAL_COLS(iter, a, b) ((MICRO_NORMAL(iter)) ? a : b)
|
||||
|
||||
template<typename Packet, typename Packetc>
|
||||
EIGEN_ALWAYS_INLINE void bcouple_common(PacketBlock<Packet,1>& taccReal, PacketBlock<Packet,1>& taccImag, PacketBlock<Packetc, 1>& acc1, PacketBlock<Packetc, 1>& acc2)
|
||||
{
|
||||
acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_FIRST);
|
||||
#define MICRO_LOAD1(lhs_ptr, iter) \
|
||||
if (unroll_factor > iter) { \
|
||||
lhsV##iter = ploadLhs<Packet>(lhs_ptr##iter); \
|
||||
lhs_ptr##iter += MICRO_NORMAL_COLS(iter, accCols, accCols2); \
|
||||
} else { \
|
||||
EIGEN_UNUSED_VARIABLE(lhsV##iter); \
|
||||
}
|
||||
|
||||
acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_SECOND);
|
||||
}
|
||||
#define MICRO_LOAD_ONE(iter) MICRO_LOAD1(lhs_ptr, iter)
|
||||
|
||||
template<typename Packet, typename Packetc>
|
||||
EIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,1>& taccReal, PacketBlock<Packet,1>& taccImag, PacketBlock<Packetc,2>& tRes, PacketBlock<Packetc, 1>& acc1, PacketBlock<Packetc, 1>& acc2)
|
||||
{
|
||||
bcouple_common<Packet, Packetc>(taccReal, taccImag, acc1, acc2);
|
||||
#define MICRO_COMPLEX_LOAD_ONE(iter) \
|
||||
if (!LhsIsReal && (unroll_factor > iter)) { \
|
||||
lhsVi##iter = ploadLhs<Packet>(lhs_ptr_real##iter + MICRO_NORMAL_COLS(iter, imag_delta, imag_delta2)); \
|
||||
} else { \
|
||||
EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
|
||||
} \
|
||||
MICRO_LOAD1(lhs_ptr_real, iter) \
|
||||
|
||||
acc1.packet[0] = padd<Packetc>(tRes.packet[0], acc1.packet[0]);
|
||||
#define MICRO_SRC_PTR1(lhs_ptr, advRows, iter) \
|
||||
if (unroll_factor > iter) { \
|
||||
lhs_ptr##iter = lhs_base + (row+(iter*accCols))*strideA*advRows - MICRO_NORMAL_COLS(iter, 0, (accCols-accCols2)*offsetA); \
|
||||
} else { \
|
||||
EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \
|
||||
}
|
||||
|
||||
acc2.packet[0] = padd<Packetc>(tRes.packet[1], acc2.packet[0]);
|
||||
}
|
||||
#define MICRO_SRC_PTR_ONE(iter) MICRO_SRC_PTR1(lhs_ptr, 1, iter)
|
||||
|
||||
template<>
|
||||
EIGEN_ALWAYS_INLINE void bcouple_common<Packet2d, Packet1cd>(PacketBlock<Packet2d,4>& taccReal, PacketBlock<Packet2d,4>& taccImag, PacketBlock<Packet1cd, 4>& acc1, PacketBlock<Packet1cd, 4>& acc2)
|
||||
{
|
||||
acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_FIRST);
|
||||
acc1.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX64_FIRST);
|
||||
acc1.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX64_FIRST);
|
||||
acc1.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX64_FIRST);
|
||||
#define MICRO_COMPLEX_SRC_PTR_ONE(iter) MICRO_SRC_PTR1(lhs_ptr_real, advanceRows, iter)
|
||||
|
||||
acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_SECOND);
|
||||
acc2.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX64_SECOND);
|
||||
acc2.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX64_SECOND);
|
||||
acc2.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX64_SECOND);
|
||||
}
|
||||
#define MICRO_PREFETCH1(lhs_ptr, iter) \
|
||||
if (unroll_factor > iter) { \
|
||||
EIGEN_POWER_PREFETCH(lhs_ptr##iter); \
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_ALWAYS_INLINE void bcouple_common<Packet2d, Packet1cd>(PacketBlock<Packet2d,1>& taccReal, PacketBlock<Packet2d,1>& taccImag, PacketBlock<Packet1cd, 1>& acc1, PacketBlock<Packet1cd, 1>& acc2)
|
||||
{
|
||||
acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_FIRST);
|
||||
#define MICRO_PREFETCH_ONE(iter) MICRO_PREFETCH1(lhs_ptr, iter)
|
||||
|
||||
acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_SECOND);
|
||||
}
|
||||
#define MICRO_COMPLEX_PREFETCH_ONE(iter) MICRO_PREFETCH1(lhs_ptr_real, iter)
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
#define MICRO_UPDATE_MASK
|
||||
#else
|
||||
#define MICRO_UPDATE_MASK EIGEN_UNUSED_VARIABLE(pMask);
|
||||
#endif
|
||||
|
||||
#define MICRO_UPDATE \
|
||||
if (accCols == accCols2) { \
|
||||
MICRO_UPDATE_MASK \
|
||||
EIGEN_UNUSED_VARIABLE(offsetA); \
|
||||
row += unroll_factor*accCols; \
|
||||
}
|
||||
|
||||
#define MICRO_COMPLEX_UPDATE \
|
||||
MICRO_UPDATE \
|
||||
if(LhsIsReal || (accCols == accCols2)) { \
|
||||
EIGEN_UNUSED_VARIABLE(imag_delta2); \
|
||||
}
|
||||
|
||||
// This is necessary because ploadRhs for double returns a pair of vectors when MMA is enabled.
|
||||
template<typename Scalar, typename Packet>
|
||||
EIGEN_ALWAYS_INLINE Packet ploadRhs(const Scalar* rhs)
|
||||
{
|
||||
return ploadu<Packet>(rhs);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,385 @@
|
||||
#ifndef EIGEN_MATRIX_PRODUCT_MMA_BFLOAT16_ALTIVEC_H
|
||||
#define EIGEN_MATRIX_PRODUCT_MMA_BFLOAT16_ALTIVEC_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
EIGEN_STRONG_INLINE void pgerMMAbfloat16(__vector_quad* acc, const Packet8bf& a, const Packet8bf& b, int maskX, int maskY)
|
||||
{
|
||||
switch(maskX){
|
||||
case 15:
|
||||
switch(maskY){
|
||||
case 0b1111:
|
||||
__builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
|
||||
break;
|
||||
case 0b0011:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1111, 0b11, 0b11);
|
||||
break;
|
||||
case 0b0001:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1111, 0b1, 0b11);
|
||||
break;
|
||||
case 0b0111:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1111, 0b111, 0b11);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
switch(maskY){
|
||||
case 0b1111:
|
||||
__builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
|
||||
break;
|
||||
case 0b0011:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b11, 0b11, 0b11);
|
||||
break;
|
||||
case 0b0001:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b11, 0b1, 0b11);
|
||||
break;
|
||||
case 0b0111:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b11, 0b111, 0b11);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
switch(maskY){
|
||||
case 0b1111:
|
||||
__builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
|
||||
break;
|
||||
case 0b0011:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1, 0b11, 0b11);
|
||||
break;
|
||||
case 0b0001:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1, 0b1, 0b11);
|
||||
break;
|
||||
case 0b0111:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1, 0b111, 0b11);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0b0111:
|
||||
switch(maskY){
|
||||
case 0b1111:
|
||||
__builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
|
||||
break;
|
||||
case 0b0011:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b111, 0b11, 0b11);
|
||||
break;
|
||||
case 0b0001:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b111, 0b1, 0b11);
|
||||
break;
|
||||
case 0b0111:
|
||||
__builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b111, 0b111, 0b11);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void scaleAndStore(float* result, float* acc, Packet4f pAlpha)
|
||||
{
|
||||
Packet4f result_block = ploadu<Packet4f>(result);
|
||||
Packet4f packet_pmadd = pmadd(pload<Packet4f>(acc), pAlpha, result_block);
|
||||
pstoreu(result, packet_pmadd);
|
||||
}
|
||||
|
||||
template<int num_packets, bool zero>
|
||||
EIGEN_STRONG_INLINE Packet8bf loadLhsBfloat16(const bfloat16* indexA)
|
||||
{
|
||||
Packet8bf lhs1 = ploadu<Packet8bf>(indexA);
|
||||
Packet8bf lhs2;
|
||||
const int packet_size = 8; //We fit 8 bfloat16 on a 128 register
|
||||
if(zero){
|
||||
lhs2 = pset1<Packet8bf>(Eigen::bfloat16(0));
|
||||
}
|
||||
else lhs2 = ploadu<Packet8bf>(indexA + num_packets*packet_size);
|
||||
return vec_mergeh(lhs1.m_val, lhs2.m_val);
|
||||
}
|
||||
|
||||
template<bool zero>
|
||||
EIGEN_STRONG_INLINE Packet8bf loadLhsBfloat16ExtraRows(const bfloat16* indexA, Index strideA, Index row, int extra_rows)
|
||||
{
|
||||
EIGEN_ALIGN16 bfloat16 lhs_array[8] = {Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0)};
|
||||
int count = 0;
|
||||
const bfloat16* idxA = indexA + row*strideA;
|
||||
for(int row_count = 0; row_count < extra_rows; row_count++){
|
||||
lhs_array[count++] = *idxA;
|
||||
if(!zero) lhs_array[count] = *(idxA+1);
|
||||
count++;
|
||||
idxA += strideA;
|
||||
}
|
||||
return pload<Packet8bf>(lhs_array);
|
||||
}
|
||||
|
||||
template<bool zero>
|
||||
EIGEN_STRONG_INLINE Packet8bf loadRhsBfloat16(const bfloat16* baseB, Index strideB, int i, int k)
|
||||
{
|
||||
const bfloat16* indexB = baseB + strideB*4*i + (k*4);
|
||||
Packet8bf rhs1 = ploadu<Packet8bf>(indexB);
|
||||
if(zero){
|
||||
Packet8bf rhs2 = pset1<Packet8bf>(Eigen::bfloat16(0));
|
||||
return vec_mergeh(rhs1.m_val, rhs2.m_val);
|
||||
}
|
||||
//r = vec_perm (a, b, c)
|
||||
//Let v be the concatenation of a and b.
|
||||
//Each byte of r selected by using the least-significant 5 bits of the corresponding byte of c as an index into v
|
||||
//We need this elements from rhs: 0, 4, 1, 5, 2, 6, 3, 7
|
||||
Packet16uc c = {0x0u, 0x1u, 0x8u, 0x9u, 0x2u, 0x3u, 0xAu, 0xB, 0x4, 0x5, 0xCu, 0xDu, 0x6u, 0x7u, 0xEu, 0xFu};
|
||||
return vec_perm(rhs1.m_val, rhs1.m_val, c);
|
||||
}
|
||||
|
||||
template<bool zero>
|
||||
EIGEN_STRONG_INLINE Packet8bf loadRhsBfloat16ExtraCols(const bfloat16* blockB, Index strideB, Index offsetB, Index col, int i, int k, int extra_cols)
|
||||
{
|
||||
EIGEN_ALIGN16 bfloat16 rhs_vector[8] = {Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0)};
|
||||
const bfloat16* indexB = blockB + ((col+4*i)*strideB)+k+offsetB;
|
||||
for(int c = 0; c < extra_cols; c++){
|
||||
rhs_vector[2*c] = *indexB;
|
||||
if(!zero) rhs_vector[2*c+1] = *(indexB+1);
|
||||
indexB += strideB;
|
||||
}
|
||||
return pload<Packet8bf>(rhs_vector);
|
||||
}
|
||||
|
||||
template<int num_acc, int num_packets, bool zero, bool rhs_extra_cols, bool lhs_extra_rows>
|
||||
EIGEN_STRONG_INLINE void KLoop
|
||||
(
|
||||
const bfloat16* indexA,
|
||||
const bfloat16* indexB,
|
||||
__vector_quad (&quad_acc)[num_acc],
|
||||
Index strideA,
|
||||
Index strideB,
|
||||
Index offsetB,
|
||||
Index k,
|
||||
Index row,
|
||||
Index col,
|
||||
int extra_rows,
|
||||
int extra_cols,
|
||||
int mask_rows = 0xF,
|
||||
int mask_cols = 0xF
|
||||
)
|
||||
{
|
||||
Packet8bf lhs;
|
||||
Packet8bf rhs[num_acc];
|
||||
if(lhs_extra_rows) lhs = loadLhsBfloat16ExtraRows<zero>(indexA+k, strideA, row, extra_rows);
|
||||
else lhs = loadLhsBfloat16<num_packets, zero>(indexA + k*num_packets*8); //a packet of bfloat16 has 8 elements
|
||||
for(int i = 0; i < num_acc; i++){
|
||||
if(!rhs_extra_cols)
|
||||
rhs[i] = loadRhsBfloat16<zero>(indexB, strideB, i, k);
|
||||
else{
|
||||
rhs[i] = loadRhsBfloat16ExtraCols<zero>(indexB, strideB, offsetB, col, i, k, extra_cols);
|
||||
}
|
||||
pgerMMAbfloat16(&(quad_acc[i]), rhs[i], lhs, mask_cols, mask_rows);
|
||||
}
|
||||
}
|
||||
|
||||
template<const int num_acc, const int standard_block_size, const int num_packets, bool rhsExtraCols = false, bool lhsExtraRows = false>
|
||||
void colLoopBody(Index* p_col, Index row, Index depth, Index cols, Index rows, int offset_row, int block_index, Packet4f pAlpha, const bfloat16* indexA, Index strideA, const bfloat16* blockB, Index strideB, Index offsetB, float* result, int extra_cols = 0, int extra_rows = 0, int mask_cols = 0xF, int mask_rows = 0xF)
|
||||
{
|
||||
int col = *p_col;
|
||||
int count;
|
||||
int max, step, bound;
|
||||
const bfloat16* indexB;
|
||||
|
||||
if(num_acc == 1) bound = 0;
|
||||
else bound = 1;
|
||||
|
||||
if(rhsExtraCols){
|
||||
count = 0;
|
||||
max = 1;
|
||||
step = 1;
|
||||
indexB = blockB;
|
||||
}
|
||||
else{
|
||||
count = col;
|
||||
step = num_acc * 4; //each accumulator has 4 elements
|
||||
max = cols/step;
|
||||
indexB = blockB + 4*offsetB + strideB*col;
|
||||
}
|
||||
|
||||
while(count/step + bound < max){
|
||||
Index k = 0;
|
||||
EIGEN_ALIGN32 float acc[num_acc][4][4];
|
||||
__vector_quad quad_acc[num_acc];
|
||||
|
||||
for(int i = 0; i < num_acc; i++)
|
||||
__builtin_mma_xxsetaccz(&(quad_acc[i]));
|
||||
|
||||
if(depth%2 != 0){
|
||||
KLoop<num_acc, num_packets, true, rhsExtraCols, lhsExtraRows>(indexA, indexB, quad_acc, strideA, strideB, offsetB, k, row, col, extra_rows, extra_cols, mask_rows, mask_cols);
|
||||
k = 1;
|
||||
}
|
||||
for(; k/2 < depth/2; k += 2){
|
||||
KLoop<num_acc, num_packets, false, rhsExtraCols, lhsExtraRows>(indexA, indexB, quad_acc, strideA, strideB, offsetB, k, row, col, extra_rows, extra_cols, mask_rows, mask_cols);
|
||||
}
|
||||
for(int i = 0; i < num_acc; i++){
|
||||
__builtin_mma_disassemble_acc((void*)acc[i], &(quad_acc[i]));
|
||||
if(lhsExtraRows){
|
||||
for(int x = 0; x < extra_cols; x++){
|
||||
for(int y = 0; y < extra_rows; y++){
|
||||
result[((col+i*4)+x)*rows + row + y] += acc[i][x][y]*(pAlpha[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else{
|
||||
if(rhsExtraCols){
|
||||
for(int x = 0; x < cols-col; x++){
|
||||
scaleAndStore(result + ((col+i*4)+x)*rows + row + offset_row,acc[i][x], pAlpha);
|
||||
}
|
||||
}
|
||||
else{
|
||||
for(int x = 0; x < 4; x++){
|
||||
scaleAndStore(result + ((col+i*4)+x)*rows + (block_index*16) + offset_row,acc[i][x], pAlpha);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
count += step;
|
||||
if(!rhsExtraCols) {
|
||||
indexB += strideB*step;
|
||||
col += step;
|
||||
}
|
||||
}
|
||||
*p_col = col;
|
||||
}
|
||||
|
||||
template<typename Index, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>
|
||||
void gemmMMAbfloat16(const DataMapper& res, const bfloat16* blockA, const bfloat16* blockB, Index rows, Index depth, Index cols, bfloat16 alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
|
||||
{
|
||||
|
||||
if(rows == 0 || cols == 0 || depth == 0) return;
|
||||
const Packet4f pAlpha = pset1<Packet4f>(Eigen::bfloat16_impl::bfloat16_to_float(alpha));
|
||||
ei_declare_aligned_stack_constructed_variable(float, result, cols*rows, 0);
|
||||
|
||||
for(int j = 0; j < cols; j++){
|
||||
for(int i = 0; i < rows; i++){
|
||||
result[j*rows + i] = res(i,j);
|
||||
}
|
||||
}
|
||||
|
||||
Index row = 0;
|
||||
Index col = 0;
|
||||
|
||||
if( strideA == -1 ) strideA = depth;
|
||||
if( strideB == -1 ) strideB = depth;
|
||||
//Packing is done in blocks.
|
||||
//There's 3 possible sizes of blocks
|
||||
//Blocks of 8 columns with 16 elements (8x16) as col major
|
||||
//Blocks of 8 columns with 8 elements (8x8) as col major. This happens when there's 16 > rows > 8
|
||||
//Blocks of 8 columns with <8 elements as row major. This happens when there's less than 8 remaining rows
|
||||
|
||||
//Loop for LHS standard block (8x16)
|
||||
int standard_block_size = 16;
|
||||
const int standard_blocks_quantity = rows/standard_block_size; //Number of standard blocks
|
||||
int bigSuffix = (2*8) * (strideA-offsetA-depth);
|
||||
const bfloat16* indexA = blockA;
|
||||
int block_index;
|
||||
for(block_index = 0; block_index < standard_blocks_quantity; block_index++){
|
||||
indexA += 2*8*offsetA;
|
||||
for(int offset_row = 0; offset_row < standard_block_size; offset_row += 4){ //This block size has 16 rows maximum
|
||||
col = 0;
|
||||
colLoopBody<5, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
|
||||
colLoopBody<4, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
|
||||
colLoopBody<3, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
|
||||
colLoopBody<2, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
|
||||
colLoopBody<1, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
|
||||
if(cols > col){
|
||||
int extra_cols= cols-col;
|
||||
int shift = (4-extra_cols>= 0) ? 4-extra_cols: 0;
|
||||
int mask_cols= 0xF >> shift;
|
||||
//Remember: It doesnt make sense use multiple acc to extra_cols as we are unrolling col loop
|
||||
colLoopBody<1, 16, 2, true>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result, extra_cols, 4, mask_cols, 0xF);
|
||||
}
|
||||
}
|
||||
row += 16;
|
||||
indexA += bigSuffix + 2*8*depth;
|
||||
}
|
||||
//LHS (8x8) block
|
||||
if(rows - standard_blocks_quantity*16 >= 8){
|
||||
indexA += 1*8*offsetA + 2*8*offsetA;
|
||||
for(int offset_row = 0; offset_row < 8; offset_row += 4){
|
||||
col = 0;
|
||||
colLoopBody<5, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
|
||||
colLoopBody<4, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
|
||||
colLoopBody<3, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
|
||||
colLoopBody<2, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
|
||||
colLoopBody<1, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
|
||||
}
|
||||
if(cols > col){
|
||||
int extra_cols= cols-col;
|
||||
int shift = (4-extra_cols>= 0) ? 4-extra_cols: 0;
|
||||
int mask_cols= 0xF >> shift;
|
||||
|
||||
for(int offset_row = 0; offset_row < 8; offset_row += 4){
|
||||
colLoopBody<1, 8, 1, true>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result, extra_cols, 4, mask_cols, 0xF);
|
||||
}
|
||||
} //end extra cols
|
||||
row += 8;
|
||||
}
|
||||
//extra rows
|
||||
while(row < rows){
|
||||
int extra_rows = rows-row;
|
||||
int shift = (4-extra_rows >= 0) ? 4-extra_rows : 0;
|
||||
int mask_rows = 0xF >> shift;
|
||||
int extra_rows_or_four = (extra_rows <= 4) ? extra_rows : 4;
|
||||
|
||||
//This index is the beginning of remaining block.
|
||||
//This last block for LHS is organized as RowMajor
|
||||
col = 0;
|
||||
colLoopBody<5, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
|
||||
colLoopBody<4, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
|
||||
colLoopBody<3, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
|
||||
colLoopBody<2, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
|
||||
colLoopBody<1, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
|
||||
if(cols > col){
|
||||
int extra_cols= cols-col;
|
||||
int shift = (4-extra_cols>= 0) ? 4-extra_cols: 0;
|
||||
int mask_cols= 0xF >> shift;
|
||||
int extra_cols_or_four = (extra_cols <= 4) ? extra_cols : 4;
|
||||
|
||||
colLoopBody<1, 8, 1, true, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, extra_cols_or_four, extra_rows_or_four, mask_cols, mask_rows);
|
||||
}
|
||||
row += extra_rows_or_four;
|
||||
}
|
||||
|
||||
//Convert back to bfloat16
|
||||
for(col = 0; col/4 < cols/4; col += 4){
|
||||
int row;
|
||||
for(row = 0; row/8 < rows/8; row += 8){
|
||||
//get and save block
|
||||
PacketBlock<Packet8bf,4> block;
|
||||
for(int j = 0; j < 4; j++){
|
||||
Packet4f temp_even, temp_odd;
|
||||
EIGEN_ALIGN32 float even[4], odd[4];
|
||||
for(int i = 0; i < 4; i++){
|
||||
even[i] = result[(col + j)*rows + row + i*2];
|
||||
odd[i] = result[(col + j)*rows + row + i*2+1];
|
||||
}
|
||||
temp_even = pload<Packet4f>(even);
|
||||
temp_odd = pload<Packet4f>(odd);
|
||||
block.packet[j] = F32ToBf16(temp_even, temp_odd);
|
||||
}
|
||||
|
||||
res.template storePacketBlock<Packet8bf,4>(row, col, block);
|
||||
}
|
||||
//extra rows
|
||||
while(row < rows){
|
||||
for(int col_off = 0; col_off < 4; col_off++){
|
||||
res(row, col+col_off) = Eigen::bfloat16(result[(col+col_off)*rows+row]);
|
||||
}
|
||||
row++;
|
||||
}
|
||||
|
||||
}
|
||||
//extra cols
|
||||
while(col < cols){
|
||||
for(int r = 0; r < rows; r++){
|
||||
res(r, col) = Eigen::bfloat16(result[col*rows + r]);
|
||||
}
|
||||
col++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
#endif //EIGEN_MATRIX_PRODUCT_MMA_BFLOAT16_ALTIVEC_H
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user