aboutsummaryrefslogtreecommitdiff
path: root/Eigen/Core
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/Core')
-rw-r--r--Eigen/Core290
1 files changed, 215 insertions, 75 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 509c529e1..0f7fa630d 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -14,6 +14,58 @@
// first thing Eigen does: stop the compiler from committing suicide
#include "src/Core/util/DisableStupidWarnings.h"
+// Handle NVCC/CUDA/SYCL
+#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
+ // Do not try asserts on CUDA and SYCL!
+ #ifndef EIGEN_NO_DEBUG
+ #define EIGEN_NO_DEBUG
+ #endif
+
+ #ifdef EIGEN_INTERNAL_DEBUGGING
+ #undef EIGEN_INTERNAL_DEBUGGING
+ #endif
+
+ #ifdef EIGEN_EXCEPTIONS
+ #undef EIGEN_EXCEPTIONS
+ #endif
+
+ // All functions callable from CUDA code must be qualified with __device__
+ #ifdef __CUDACC__
+ // Do not try to vectorize on CUDA and SYCL!
+ #ifndef EIGEN_DONT_VECTORIZE
+ #define EIGEN_DONT_VECTORIZE
+ #endif
+
+ #define EIGEN_DEVICE_FUNC __host__ __device__
+ // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
+ // works properly on the device side
+ #include <math_functions.hpp>
+ #else
+ #define EIGEN_DEVICE_FUNC
+ #endif
+
+#else
+ #define EIGEN_DEVICE_FUNC
+
+#endif
+
+// When compiling CUDA device code with NVCC, pull in math functions from the
+// global namespace. In host mode, and when device doee with clang, use the
+// std versions.
+#if defined(__CUDA_ARCH__) && defined(__NVCC__)
+ #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
+#else
+ #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
+#endif
+
+#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
+ #define EIGEN_EXCEPTIONS
+#endif
+
+#ifdef EIGEN_EXCEPTIONS
+ #include <new>
+#endif
+
// then include this file where all our macros are defined. It's really important to do it first because
// it's where we do all the alignment settings (platform detection and honoring the user's will if he
// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
@@ -21,7 +73,7 @@
// Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)
// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
-#if defined(__MINGW32__) && EIGEN_GNUC_AT_LEAST(4,6)
+#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6)
#pragma GCC optimize ("-fno-ipa-cp-clone")
#endif
@@ -31,26 +83,26 @@
// and inclusion of their respective header files
#include "src/Core/util/MKL_support.h"
-// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
-// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
-#if !EIGEN_ALIGN
+// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
+// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
+#if EIGEN_MAX_ALIGN_BYTES==0
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE
#endif
#endif
-#ifdef _MSC_VER
+#if EIGEN_COMP_MSVC
#include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
- #if (_MSC_VER >= 1500) // 2008 or later
+ #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
// Remember that usage of defined() in a #define is undefined by the standard.
// a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
- #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64)
+ #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
#define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
#endif
#endif
#else
// Remember that usage of defined() in a #define is undefined by the standard
- #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) )
+ #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
#define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
#endif
#endif
@@ -82,6 +134,28 @@
#ifdef __SSE4_2__
#define EIGEN_VECTORIZE_SSE4_2
#endif
+ #ifdef __AVX__
+ #define EIGEN_VECTORIZE_AVX
+ #define EIGEN_VECTORIZE_SSE3
+ #define EIGEN_VECTORIZE_SSSE3
+ #define EIGEN_VECTORIZE_SSE4_1
+ #define EIGEN_VECTORIZE_SSE4_2
+ #endif
+ #ifdef __AVX2__
+ #define EIGEN_VECTORIZE_AVX2
+ #endif
+ #ifdef __FMA__
+ #define EIGEN_VECTORIZE_FMA
+ #endif
+ #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512)
+ #define EIGEN_VECTORIZE_AVX512
+ #define EIGEN_VECTORIZE_AVX2
+ #define EIGEN_VECTORIZE_AVX
+ #define EIGEN_VECTORIZE_FMA
+ #ifdef __AVX512DQ__
+ #define EIGEN_VECTORIZE_AVX512DQ
+ #endif
+ #endif
// include files
@@ -95,9 +169,10 @@
extern "C" {
// In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
// Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
- #if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1110
+ #if EIGEN_COMP_ICC >= 1110
#include <immintrin.h>
#else
+ #include <mmintrin.h>
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef EIGEN_VECTORIZE_SSE3
@@ -112,8 +187,20 @@
#ifdef EIGEN_VECTORIZE_SSE4_2
#include <nmmintrin.h>
#endif
+ #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
+ #include <immintrin.h>
+ #endif
#endif
} // end extern "C"
+ #elif defined __VSX__
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_VSX
+ #include <altivec.h>
+ // We need to #undef all these ugly tokens defined in <altivec.h>
+ // => use __vector instead of vector
+ #undef bool
+ #undef vector
+ #undef pixel
#elif defined __ALTIVEC__
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ALTIVEC
@@ -123,13 +210,35 @@
#undef bool
#undef vector
#undef pixel
- #elif defined __ARM_NEON
+ #elif (defined __ARM_NEON) || (defined __ARM_NEON__)
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_NEON
#include <arm_neon.h>
+ #elif (defined __s390x__ && defined __VEC__)
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_ZVECTOR
+ #include <vecintrin.h>
#endif
#endif
+#if defined(__F16C__) && !defined(EIGEN_COMP_CLANG)
+ // We can use the optimized fp16 to float and float to fp16 conversion routines
+ #define EIGEN_HAS_FP16_C
+#endif
+
+#if defined __CUDACC__
+ #define EIGEN_VECTORIZE_CUDA
+ #include <vector_types.h>
+ #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+ #define EIGEN_HAS_CUDA_FP16
+ #endif
+#endif
+
+#if defined EIGEN_HAS_CUDA_FP16
+ #include <host_defines.h>
+ #include <cuda_fp16.h>
+#endif
+
#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
#define EIGEN_HAS_OPENMP
#endif
@@ -139,7 +248,7 @@
#endif
// MSVC for windows mobile does not have the errno.h file
-#if !(defined(_MSC_VER) && defined(_WIN32_WCE)) && !defined(__ARMCC_VERSION)
+#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM
#define EIGEN_HAS_ERRNO
#endif
@@ -159,29 +268,30 @@
// for min/max:
#include <algorithm>
+// for std::is_nothrow_move_assignable
+#ifdef EIGEN_INCLUDE_TYPE_TRAITS
+#include <type_traits>
+#endif
+
// for outputting debug info
#ifdef EIGEN_DEBUG_ASSIGN
#include <iostream>
#endif
// required for __cpuid, needs to be included after cmath
-#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) && (!defined(_WIN32_WCE))
+#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE
#include <intrin.h>
#endif
-#if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
- #define EIGEN_EXCEPTIONS
-#endif
-
-#ifdef EIGEN_EXCEPTIONS
- #include <new>
-#endif
-
/** \brief Namespace containing all symbols from the %Eigen library. */
namespace Eigen {
inline static const char *SimdInstructionSetsInUse(void) {
-#if defined(EIGEN_VECTORIZE_SSE4_2)
+#if defined(EIGEN_VECTORIZE_AVX512)
+ return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_AVX)
+ return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_SSE4_2)
return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
#elif defined(EIGEN_VECTORIZE_SSE4_1)
return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
@@ -193,8 +303,12 @@ inline static const char *SimdInstructionSetsInUse(void) {
return "SSE, SSE2";
#elif defined(EIGEN_VECTORIZE_ALTIVEC)
return "AltiVec";
+#elif defined(EIGEN_VECTORIZE_VSX)
+ return "VSX";
#elif defined(EIGEN_VECTORIZE_NEON)
return "ARM NEON";
+#elif defined(EIGEN_VECTORIZE_ZVECTOR)
+ return "S390X ZVECTOR";
#else
return "None";
#endif
@@ -202,42 +316,21 @@ inline static const char *SimdInstructionSetsInUse(void) {
} // end namespace Eigen
-#define STAGE10_FULL_EIGEN2_API 10
-#define STAGE20_RESOLVE_API_CONFLICTS 20
-#define STAGE30_FULL_EIGEN3_API 30
-#define STAGE40_FULL_EIGEN3_STRICTNESS 40
-#define STAGE99_NO_EIGEN2_SUPPORT 99
-
-#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS
- #define EIGEN2_SUPPORT
- #define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS
-#elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
- #define EIGEN2_SUPPORT
- #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
-#elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS
- #define EIGEN2_SUPPORT
- #define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS
-#elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API
- #define EIGEN2_SUPPORT
- #define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API
-#elif defined EIGEN2_SUPPORT
- // default to stage 3, that's what it's always meant
- #define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
- #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
-#else
- #define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT
+#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
+// This will generate an error message:
+#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
#endif
-#ifdef EIGEN2_SUPPORT
-#undef minor
-#endif
+namespace Eigen {
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
// ensure QNX/QCC support
using std::size_t;
-// gcc 4.6.0 wants std:: for ptrdiff_t
+// gcc 4.6.0 wants std:: for ptrdiff_t
using std::ptrdiff_t;
+}
+
/** \defgroup Core_Module Core module
* This is the main module of Eigen providing dense matrix and vector support
* (both fixed and dynamic size) with all the features corresponding to a BLAS library
@@ -249,8 +342,8 @@ using std::ptrdiff_t;
*/
#include "src/Core/util/Constants.h"
-#include "src/Core/util/ForwardDeclarations.h"
#include "src/Core/util/Meta.h"
+#include "src/Core/util/ForwardDeclarations.h"
#include "src/Core/util/StaticAssert.h"
#include "src/Core/util/XprHelper.h"
#include "src/Core/util/Memory.h"
@@ -258,41 +351,92 @@ using std::ptrdiff_t;
#include "src/Core/NumTraits.h"
#include "src/Core/MathFunctions.h"
#include "src/Core/GenericPacketMath.h"
+#include "src/Core/MathFunctionsImpl.h"
-#if defined EIGEN_VECTORIZE_SSE
+#if defined EIGEN_VECTORIZE_AVX512
+ #include "src/Core/arch/SSE/PacketMath.h"
+ #include "src/Core/arch/AVX/PacketMath.h"
+ #include "src/Core/arch/AVX512/PacketMath.h"
+ #include "src/Core/arch/AVX512/MathFunctions.h"
+#elif defined EIGEN_VECTORIZE_AVX
+ // Use AVX for floats and doubles, SSE for integers
+ #include "src/Core/arch/SSE/PacketMath.h"
+ #include "src/Core/arch/SSE/Complex.h"
+ #include "src/Core/arch/SSE/MathFunctions.h"
+ #include "src/Core/arch/AVX/PacketMath.h"
+ #include "src/Core/arch/AVX/MathFunctions.h"
+ #include "src/Core/arch/AVX/Complex.h"
+ #include "src/Core/arch/AVX/TypeCasting.h"
+#elif defined EIGEN_VECTORIZE_SSE
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h"
#include "src/Core/arch/SSE/Complex.h"
-#elif defined EIGEN_VECTORIZE_ALTIVEC
+ #include "src/Core/arch/SSE/TypeCasting.h"
+#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
#include "src/Core/arch/AltiVec/PacketMath.h"
+ #include "src/Core/arch/AltiVec/MathFunctions.h"
#include "src/Core/arch/AltiVec/Complex.h"
#elif defined EIGEN_VECTORIZE_NEON
#include "src/Core/arch/NEON/PacketMath.h"
+ #include "src/Core/arch/NEON/MathFunctions.h"
#include "src/Core/arch/NEON/Complex.h"
+#elif defined EIGEN_VECTORIZE_ZVECTOR
+ #include "src/Core/arch/ZVector/PacketMath.h"
+ #include "src/Core/arch/ZVector/MathFunctions.h"
+ #include "src/Core/arch/ZVector/Complex.h"
+#endif
+
+// Half float support
+#include "src/Core/arch/CUDA/Half.h"
+#include "src/Core/arch/CUDA/PacketMathHalf.h"
+#include "src/Core/arch/CUDA/TypeCasting.h"
+
+#if defined EIGEN_VECTORIZE_CUDA
+ #include "src/Core/arch/CUDA/PacketMath.h"
+ #include "src/Core/arch/CUDA/MathFunctions.h"
#endif
#include "src/Core/arch/Default/Settings.h"
-#include "src/Core/Functors.h"
+#include "src/Core/functors/TernaryFunctors.h"
+#include "src/Core/functors/BinaryFunctors.h"
+#include "src/Core/functors/UnaryFunctors.h"
+#include "src/Core/functors/NullaryFunctors.h"
+#include "src/Core/functors/StlFunctors.h"
+#include "src/Core/functors/AssignmentFunctors.h"
+
+// Specialized functors to enable the processing of complex numbers
+// on CUDA devices
+#include "src/Core/arch/CUDA/Complex.h"
+
+#include "src/Core/IO.h"
#include "src/Core/DenseCoeffsBase.h"
#include "src/Core/DenseBase.h"
#include "src/Core/MatrixBase.h"
#include "src/Core/EigenBase.h"
+#include "src/Core/Product.h"
+#include "src/Core/CoreEvaluators.h"
+#include "src/Core/AssignEvaluator.h"
+
#ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
// at least confirmed with Doxygen 1.5.5 and 1.5.6
#include "src/Core/Assign.h"
#endif
+#include "src/Core/ArrayBase.h"
#include "src/Core/util/BlasUtil.h"
#include "src/Core/DenseStorage.h"
#include "src/Core/NestByValue.h"
-#include "src/Core/ForceAlignedAccess.h"
+
+// #include "src/Core/ForceAlignedAccess.h"
+
#include "src/Core/ReturnByValue.h"
#include "src/Core/NoAlias.h"
#include "src/Core/PlainObjectBase.h"
#include "src/Core/Matrix.h"
#include "src/Core/Array.h"
+#include "src/Core/CwiseTernaryOp.h"
#include "src/Core/CwiseBinaryOp.h"
#include "src/Core/CwiseUnaryOp.h"
#include "src/Core/CwiseNullaryOp.h"
@@ -300,32 +444,32 @@ using std::ptrdiff_t;
#include "src/Core/SelfCwiseBinaryOp.h"
#include "src/Core/Dot.h"
#include "src/Core/StableNorm.h"
-#include "src/Core/MapBase.h"
#include "src/Core/Stride.h"
+#include "src/Core/MapBase.h"
#include "src/Core/Map.h"
+#include "src/Core/Ref.h"
#include "src/Core/Block.h"
#include "src/Core/VectorBlock.h"
-#include "src/Core/Ref.h"
#include "src/Core/Transpose.h"
#include "src/Core/DiagonalMatrix.h"
#include "src/Core/Diagonal.h"
#include "src/Core/DiagonalProduct.h"
-#include "src/Core/PermutationMatrix.h"
-#include "src/Core/Transpositions.h"
#include "src/Core/Redux.h"
#include "src/Core/Visitor.h"
#include "src/Core/Fuzzy.h"
-#include "src/Core/IO.h"
#include "src/Core/Swap.h"
#include "src/Core/CommaInitializer.h"
-#include "src/Core/Flagged.h"
-#include "src/Core/ProductBase.h"
#include "src/Core/GeneralProduct.h"
+#include "src/Core/Solve.h"
+#include "src/Core/Inverse.h"
+#include "src/Core/SolverBase.h"
+#include "src/Core/PermutationMatrix.h"
+#include "src/Core/Transpositions.h"
#include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h"
#include "src/Core/products/GeneralBlockPanelKernel.h"
#include "src/Core/products/Parallelizer.h"
-#include "src/Core/products/CoeffBasedProduct.h"
+#include "src/Core/ProductEvaluators.h"
#include "src/Core/products/GeneralMatrixVector.h"
#include "src/Core/products/GeneralMatrixMatrix.h"
#include "src/Core/SolveTriangular.h"
@@ -340,6 +484,7 @@ using std::ptrdiff_t;
#include "src/Core/products/TriangularSolverVector.h"
#include "src/Core/BandMatrix.h"
#include "src/Core/CoreIterators.h"
+#include "src/Core/ConditionEstimator.h"
#include "src/Core/BooleanRedux.h"
#include "src/Core/Select.h"
@@ -347,18 +492,17 @@ using std::ptrdiff_t;
#include "src/Core/Random.h"
#include "src/Core/Replicate.h"
#include "src/Core/Reverse.h"
-#include "src/Core/ArrayBase.h"
#include "src/Core/ArrayWrapper.h"
#ifdef EIGEN_USE_BLAS
-#include "src/Core/products/GeneralMatrixMatrix_MKL.h"
-#include "src/Core/products/GeneralMatrixVector_MKL.h"
-#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h"
-#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h"
-#include "src/Core/products/SelfadjointMatrixVector_MKL.h"
-#include "src/Core/products/TriangularMatrixMatrix_MKL.h"
-#include "src/Core/products/TriangularMatrixVector_MKL.h"
-#include "src/Core/products/TriangularSolverMatrix_MKL.h"
+#include "src/Core/products/GeneralMatrixMatrix_BLAS.h"
+#include "src/Core/products/GeneralMatrixVector_BLAS.h"
+#include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h"
+#include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h"
+#include "src/Core/products/SelfadjointMatrixVector_BLAS.h"
+#include "src/Core/products/TriangularMatrixMatrix_BLAS.h"
+#include "src/Core/products/TriangularMatrixVector_BLAS.h"
+#include "src/Core/products/TriangularSolverMatrix_BLAS.h"
#endif // EIGEN_USE_BLAS
#ifdef EIGEN_USE_MKL_VML
@@ -369,8 +513,4 @@ using std::ptrdiff_t;
#include "src/Core/util/ReenableStupidWarnings.h"
-#ifdef EIGEN2_SUPPORT
-#include "Eigen2Support"
-#endif
-
#endif // EIGEN_CORE_H