Merge changes Iee153445,Iee274471 am: 79df15ea88 am: 10f298fc41 am: 7cb5001398 am: bc0f5df265aml_uwb_331910010 aml_uwb_331820070 aml_uwb_331613010 aml_uwb_331611010 aml_uwb_331410010 aml_uwb_331310030 aml_uwb_331115000 aml_uwb_331015040 aml_uwb_330810010 aml_tz4_332714070 aml_tz4_332714050 aml_tz4_332714010 aml_tz4_331910000 aml_tz4_331314030 aml_tz4_331314020 aml_tz4_331314010 aml_tz4_331012050 aml_tz4_331012040 aml_tz4_331012000 aml_ase_331311020 aml_ase_331112000 aml_ase_331011020 android13-mainline-uwb-release android13-mainline-tzdata4-release android13-mainline-appsearch-release aml_tz4_332714010

Original change: https://android-review.googlesource.com/c/platform/external/eigen/+/1999079 Change-Id: Ife39d10c8b23d3eeb174cd52f462f9d20527ad03
author: Yi Kong <yikong@google.com> 2022-02-25 17:02:53 +0000
committer: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> 2022-02-25 17:02:53 +0000
commit: edb0ad5bb04b48aab7dd0978f0475edd3550de7c (patch)
tree: fb979fb4cf4f8052c8cc66b1ec9516d91fcd859b /unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
parent: 8fd413e275f78a4c240f1442ce5cf77c73a20a55 (diff)
parent: bc0f5df265caa21a2120c22453655a7fcc941991 (diff)
download: eigen-7ecd5586f9218fe07c97ede8e405561aca973d78.tar.gz
1 files changed, 6 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
index 83c449cf1..195267ce8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
@@ -174,8 +174,11 @@ class TensorCostModel {
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int numThreads(
       double output_size, const TensorOpCost& cost_per_coeff, int max_threads) {
     double cost = totalCost(output_size, cost_per_coeff);
-    int threads = (cost - kStartupCycles) / kPerThreadCycles + 0.9;
-    return numext::mini(max_threads, numext::maxi(1, threads));
+    double threads = (cost - kStartupCycles) / kPerThreadCycles + 0.9;
+    // Make sure we don't invoke undefined behavior when we convert to an int.
+    threads = numext::mini<double>(threads, GenericNumTraits<int>::highest());
+    return numext::mini(max_threads,
+                        numext::maxi<int>(1, static_cast<int>(threads)));
   }
 
   // taskSize assesses parallel task size.
@@ -186,14 +189,13 @@ class TensorCostModel {
     return totalCost(output_size, cost_per_coeff) / kTaskSize;
   }
 
- private:
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double totalCost(
       double output_size, const TensorOpCost& cost_per_coeff) {
     // Cost of memory fetches from L2 cache. 64 is typical cache line size.
     // 11 is L2 cache latency on Haswell.
     // We don't know whether data is in L1, L2 or L3. But we are most interested
     // in single-threaded computational time around 100us-10ms (smaller time
-    // is too small for parallelization, larger time is not intersting
+    // is too small for parallelization, larger time is not interesting
     // either because we are probably using all available threads already).
     // And for the target time range, L2 seems to be what matters. Data set
     // fitting into L1 is too small to take noticeable time. Data set fitting
author	Yi Kong <yikong@google.com>	2022-02-25 17:02:53 +0000
committer	Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>	2022-02-25 17:02:53 +0000
commit	edb0ad5bb04b48aab7dd0978f0475edd3550de7c (patch)
tree	fb979fb4cf4f8052c8cc66b1ec9516d91fcd859b /unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
parent	8fd413e275f78a4c240f1442ce5cf77c73a20a55 (diff)
parent	bc0f5df265caa21a2120c22453655a7fcc941991 (diff)
download	eigen-7ecd5586f9218fe07c97ede8e405561aca973d78.tar.gz