diff options
author | Yi Kong <yikong@google.com> | 2022-02-25 17:02:53 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2022-02-25 17:02:53 +0000 |
commit | edb0ad5bb04b48aab7dd0978f0475edd3550de7c (patch) | |
tree | fb979fb4cf4f8052c8cc66b1ec9516d91fcd859b /unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h | |
parent | 8fd413e275f78a4c240f1442ce5cf77c73a20a55 (diff) | |
parent | bc0f5df265caa21a2120c22453655a7fcc941991 (diff) | |
download | eigen-7ecd5586f9218fe07c97ede8e405561aca973d78.tar.gz |
Merge changes Iee153445,Iee274471 am: 79df15ea88 am: 10f298fc41 am: 7cb5001398 am: bc0f5df265aml_uwb_331910010aml_uwb_331820070aml_uwb_331613010aml_uwb_331611010aml_uwb_331410010aml_uwb_331310030aml_uwb_331115000aml_uwb_331015040aml_uwb_330810010aml_tz4_332714070aml_tz4_332714050aml_tz4_332714010aml_tz4_331910000aml_tz4_331314030aml_tz4_331314020aml_tz4_331314010aml_tz4_331012050aml_tz4_331012040aml_tz4_331012000aml_ase_331311020aml_ase_331112000aml_ase_331011020android13-mainline-uwb-releaseandroid13-mainline-tzdata4-releaseandroid13-mainline-appsearch-releaseaml_tz4_332714010
Original change: https://android-review.googlesource.com/c/platform/external/eigen/+/1999079
Change-Id: Ife39d10c8b23d3eeb174cd52f462f9d20527ad03
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h | 39 |
1 files changed, 28 insertions, 11 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h index 5cf7b4f71..974feb0ad 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h @@ -21,14 +21,28 @@ enum { // Default Blocking Strategy -template <typename LhsMapper, typename RhsMapper, typename Index, int ShardingType=ShardByCol> +template<typename ResScalar, typename LhsScalar, typename RhsScalar, typename StorageIndex, int ShardingType = ShardByCol> class TensorContractionBlocking { public: - typedef typename LhsMapper::Scalar LhsScalar; - typedef typename RhsMapper::Scalar RhsScalar; + /* + adding EIGEN_DEVICE_FUNC unconditionally to 'TensorContractionBlocking' constructor in `TensorContractionBlocking.h` + requires adding EIGEN_DEVICE_FUNC to `computeProductBlockingSizes` in `GeneralBlockPanelKernel.h` + which in turn, requires adding EIGEN_DEVICE_FUNC to `evaluateProductBlockingSizesHeuristic` in `GeneralBlockPanelKernel.h` + which in turn, requires adding EIGEN_DEVICE_FUNC to `manage_caching_sizes` in `GeneralBlockPanelKernel.h` + (else HIPCC will error out) - EIGEN_DEVICE_FUNC TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) : + However adding EIGEN_DEVICE_FUNC to `manage_caching_sizes` in `GeneralBlockPanelKernel.h` + results in NVCC erroring out with the following error + + ../Eigen/src/Core/products/GeneralBlockPanelKernel.h(57): error #2901: + dynamic initialization is not supported for function-scope static variables within a __device__/__global__ function + */ + + #if !defined(EIGEN_HIPCC) + EIGEN_DEVICE_FUNC + #endif + TensorContractionBlocking(StorageIndex k, StorageIndex m, StorageIndex n, StorageIndex num_threads = 1) : kc_(k), mc_(m), nc_(n) { if (ShardingType == ShardByCol) { @@ -37,19 +51,22 @@ class TensorContractionBlocking { else { computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, nc_, mc_, num_threads); } + + const int rhs_packet_size = internal::packet_traits<RhsScalar>::size; + kc_ = (rhs_packet_size <= 8 || kc_ <= rhs_packet_size) ? + kc_ : (kc_ / rhs_packet_size) * rhs_packet_size; } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index kc() const { return kc_; } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index mc() const { return mc_; } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index nc() const { return nc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex kc() const { return kc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex mc() const { return mc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex nc() const { return nc_; } private: - Index kc_; - Index mc_; - Index nc_; + StorageIndex kc_; + StorageIndex mc_; + StorageIndex nc_; }; - } // end namespace internal } // end namespace Eigen |