diff options
author | Andy Hung <hunga@google.com> | 2024-01-17 21:01:59 -0800 |
---|---|---|
committer | Andy Hung <hunga@google.com> | 2024-03-11 14:16:19 -0700 |
commit | 43abce3697741abcae12d072e5f8366fe2a5a565 (patch) | |
tree | 3d3ef3c5fa2e5bc0edd703392bd1b8ac0eab29f6 | |
parent | 4f1e68ef9cc743b54e19751d262e1270ab15df2e (diff) | |
download | media-43abce3697741abcae12d072e5f8366fe2a5a565.tar.gz |
audio mutex: Optimize data access
Distinguish between statistics (multiple writer) and
thread_local (single writer) memory access to optimize
data access.
Test: atest audio_mutex_tests
Test: atest audio_mutex_benchmark
Test: atest CtsMediaAudioTestCases
Bug: 321302216
Merged-In: I662e878933031111b68b21917223b64c2eee9d05
Change-Id: I662e878933031111b68b21917223b64c2eee9d05
-rw-r--r-- | audio_utils/benchmarks/audio_mutex_benchmark.cpp | 171 | ||||
-rw-r--r-- | audio_utils/include/audio_utils/mutex.h | 132 |
2 files changed, 221 insertions, 82 deletions
diff --git a/audio_utils/benchmarks/audio_mutex_benchmark.cpp b/audio_utils/benchmarks/audio_mutex_benchmark.cpp index a58aec31..3bf04f82 100644 --- a/audio_utils/benchmarks/audio_mutex_benchmark.cpp +++ b/audio_utils/benchmarks/audio_mutex_benchmark.cpp @@ -34,79 +34,84 @@ $ atest audio_mutex_benchmark Benchmark Time CPU Iteration audio_mutex_benchmark: - #BM_atomic_add_equals<int32_t> 6.508700118072382 ns 6.471633177192451 ns 108110486 - #BM_atomic_add_to_seq_cst<int16_t> 6.557658152513349 ns 6.526665108542128 ns 107252873 - #BM_atomic_add_to_seq_cst<int32_t> 6.61304199453549 ns 6.58175539524565 ns 106351923 - #BM_atomic_add_to_seq_cst<int64_t> 6.557521711571485 ns 6.5265363568644625 ns 107250668 - #BM_atomic_add_to_seq_cst<float> 7.895243222524512 ns 7.858297243207844 ns 89394951 - #BM_atomic_add_to_seq_cst<double> 7.931688495474578 ns 7.893971885098797 ns 88653486 - #BM_atomic_add_to_relaxed<int16_t> 5.140386288993005 ns 5.116383769230237 ns 135131188 - #BM_atomic_add_to_relaxed<int32_t> 5.181670175781189 ns 5.157418005923224 ns 135724804 - #BM_atomic_add_to_relaxed<int64_t> 5.161260548149761 ns 5.136776648952849 ns 135135216 - #BM_atomic_add_to_relaxed<float> 7.786417198158838 ns 7.749791796134465 ns 90646732 - #BM_atomic_add_to_relaxed<double> 7.760358404716961 ns 7.723992286938152 ns 90644677 - #BM_gettid 2.116039491081284 ns 2.106033253650779 ns 332358395 - #BM_systemTime 43.074033150581585 ns 42.8699911242381 ns 16328739 - #BM_thread_8_variables 2.8214796173366734 ns 2.8081271094521703 ns 249273547 - #BM_thread_local_8_variables 2.819987500327649 ns 2.808149311074747 ns 249278495 - #BM_StdMutexLockUnlock 18.155770972784783 ns 18.070903999828232 ns 38747264 - #BM_RWMutexReadLockUnlock 16.12456214871892 ns 16.04901684644192 ns 43612414 - #BM_RWMutexWriteLockUnlock 19.14824893658628 ns 19.05893391346091 ns 36725255 - #BM_SharedMutexReadLockUnlock 39.54155074347332 ns 39.35497456828369 ns 17788418 - #BM_SharedMutexWriteLockUnlock 41.58785205766037 ns 41.39323040198865 ns 16911078 - #BM_AudioUtilsMutexLockUnlock 66.56918230215399 ns 66.25544975244046 ns 10562911 - #BM_AudioUtilsPIMutexLockUnlock 67.02589961630612 ns 66.70819768056897 ns 10493090 - #BM_StdMutexInitializationLockUnlock 29.544903877103074 ns 29.406544528057406 ns 23801319 - #BM_RWMutexInitializationReadLockUnlock 26.91749522594829 ns 26.802654591541785 ns 26123567 - #BM_RWMutexInitializationWriteLockUnlock 30.20599678894913 ns 30.06422812747118 ns 23284596 - #BM_SharedMutexInitializationReadLockUnlock 58.070478136125395 ns 57.79511704041489 ns 12111671 - #BM_SharedMutexInitializationWriteLockUnlock 59.36722820827075 ns 59.08875400469678 ns 11843905 - #BM_AudioUtilsMutexInitializationLockUnlock 85.04952357479699 ns 84.65093492146583 ns 8269839 - #BM_AudioUtilsPIMutexInitializationLockUnlock 83.32953114993384 ns 82.9411400506946 ns 8440765 - #BM_StdMutexBlockingConditionVariable/threads:2 20067.186478012434 ns 25402.779402102544 ns 54792 - #BM_AudioUtilsMutexBlockingConditionVariable/threads:2 48417.40553370931 ns 58220.13591731267 ns 23220 - #BM_AudioUtilsPIMutexBlockingConditionVariable/threads:2 48724.90563264992 ns 59858.82489342454 ns 15482 - #BM_StdMutexScopedLockUnlock/threads:1 33.58821991644139 ns 33.41913176098606 ns 16058919 - #BM_StdMutexScopedLockUnlock/threads:2 356.67886764843007 ns 707.8318856903202 ns 4625680 - #BM_StdMutexScopedLockUnlock/threads:4 130.45108549886208 ns 447.1268742499998 ns 4000000 - #BM_StdMutexScopedLockUnlock/threads:8 139.0823761208755 ns 541.9088026721488 ns 1362200 - #BM_RWMutexScopedReadLockUnlock/threads:1 32.33613871803748 ns 32.194204614295046 ns 21710272 - #BM_RWMutexScopedReadLockUnlock/threads:2 160.47792160732033 ns 319.3012639397403 ns 2095986 - #BM_RWMutexScopedReadLockUnlock/threads:4 217.21087383931467 ns 861.2673855686197 ns 839892 - #BM_RWMutexScopedReadLockUnlock/threads:8 232.19586516883186 ns 1831.4409709220026 ns 491368 - #BM_RWMutexScopedWriteLockUnlock/threads:1 33.49908180449042 ns 33.34195684310611 ns 21010780 - #BM_RWMutexScopedWriteLockUnlock/threads:2 286.096410842338 ns 564.599202114389 ns 2485068 - #BM_RWMutexScopedWriteLockUnlock/threads:4 451.7913123512162 ns 1601.6332793492106 ns 1931432 - #BM_RWMutexScopedWriteLockUnlock/threads:8 417.50240217790537 ns 1678.8585405353656 ns 794072 - #BM_SharedMutexScopedReadLockUnlock/threads:1 67.65354544884363 ns 67.37498338520537 ns 9133426 - #BM_SharedMutexScopedReadLockUnlock/threads:2 370.22816132765433 ns 735.4710534035784 ns 1322608 - #BM_SharedMutexScopedReadLockUnlock/threads:4 298.7991937078523 ns 1015.8674764877635 ns 991824 - #BM_SharedMutexScopedReadLockUnlock/threads:8 359.17200914091643 ns 1500.1318202480697 ns 615960 - #BM_SharedMutexScopedWriteLockUnlock/threads:1 73.40224842642553 ns 73.06218848168656 ns 8616869 - #BM_SharedMutexScopedWriteLockUnlock/threads:2 502.8427941278981 ns 909.1756670594543 ns 599122 - #BM_SharedMutexScopedWriteLockUnlock/threads:4 2322.7325028106275 ns 6083.585590040707 ns 313436 - #BM_SharedMutexScopedWriteLockUnlock/threads:8 4948.555700826256 ns 15412.772486815033 ns 373152 - #BM_AudioUtilsMutexScopedLockUnlock/threads:1 147.60580533538862 ns 146.97151308638587 ns 4062848 - #BM_AudioUtilsMutexScopedLockUnlock/threads:2 5409.319112352385 ns 10729.084861761592 ns 728090 - #BM_AudioUtilsMutexScopedLockUnlock/threads:4 630.9403610213494 ns 1866.9171243841429 ns 579688 - #BM_AudioUtilsMutexScopedLockUnlock/threads:8 612.9153996947896 ns 2167.0654441098654 ns 417104 - #BM_AudioUtilsPIMutexScopedLockUnlock/threads:1 148.94249680999073 ns 148.3061023465011 ns 4387722 - #BM_AudioUtilsPIMutexScopedLockUnlock/threads:2 3537.898640072271 ns 4287.604650248743 ns 356196 - #BM_AudioUtilsPIMutexScopedLockUnlock/threads:4 13969.834843789307 ns 19572.29615170118 ns 28688 - #BM_AudioUtilsPIMutexScopedLockUnlock/threads:8 30652.264078729862 ns 40000.50360617244 ns 23848 - #BM_StdMutexReverseScopedLockUnlock/threads:1 31.34740304135938 ns 31.200396418488175 ns 21854682 - #BM_StdMutexReverseScopedLockUnlock/threads:2 54.06016658620641 ns 103.2554157873692 ns 5317694 - #BM_StdMutexReverseScopedLockUnlock/threads:4 169.8661622311813 ns 592.4042833246494 ns 3209096 - #BM_StdMutexReverseScopedLockUnlock/threads:8 156.65913206788008 ns 604.623918327717 ns 1742672 - #BM_AudioUtilsMutexReverseScopedLockUnlock/threads:1 147.51456839840807 ns 146.73295356311675 ns 4395816 - #BM_AudioUtilsMutexReverseScopedLockUnlock/threads:2 2425.8992549948744 ns 4812.346055000001 ns 200000 - #BM_AudioUtilsMutexReverseScopedLockUnlock/threads:4 453.8639331349259 ns 1256.0567649999934 ns 400000 - #BM_AudioUtilsMutexReverseScopedLockUnlock/threads:8 635.5625220561735 ns 2294.725433768965 ns 356872 - #BM_AudioUtilsPIMutexReverseScopedLockUnlock/threads:1 148.7079480412097 ns 148.0359150267745 ns 4188943 - #BM_AudioUtilsPIMutexReverseScopedLockUnlock/threads:2 14037.435207752424 ns 17829.977469499998 ns 2000000 - #BM_AudioUtilsPIMutexReverseScopedLockUnlock/threads:4 20098.127750043204 ns 26126.68207500001 ns 40000 - #BM_AudioUtilsPIMutexReverseScopedLockUnlock/threads:8 28805.264783022852 ns 38780.66452074406 ns 16776 - #BM_empty_while 0.352701456999057 ns 0.35104016500000057 ns 1000000000 + #BM_atomic_add_equals<int32_t> 6.5014863185278395 ns 6.471886635411762 ns 108155820 + #BM_atomic_add_to_seq_cst<int16_t> 6.61234085779295 ns 6.581580170972295 ns 106366123 + #BM_atomic_add_to_seq_cst<int32_t> 6.557071440111132 ns 6.526548754217108 ns 107245051 + #BM_atomic_add_to_seq_cst<int64_t> 6.61054872108173 ns 6.581425697716481 ns 106357707 + #BM_atomic_add_to_seq_cst<float> 7.93492707040209 ns 7.897547368104975 ns 88252950 + #BM_atomic_add_to_seq_cst<double> 7.886038567240897 ns 7.8497197451292 ns 89287119 + #BM_atomic_add_to_relaxed<int16_t> 5.182024355920982 ns 5.157555443739661 ns 135737011 + #BM_atomic_add_to_relaxed<int32_t> 5.16494770312128 ns 5.1405126580046945 ns 138577686 + #BM_atomic_add_to_relaxed<int64_t> 5.1814947733364125 ns 5.157353700881224 ns 135464777 + #BM_atomic_add_to_relaxed<float> 7.783111893574588 ns 7.746887673515652 ns 90644491 + #BM_atomic_add_to_relaxed<double> 7.7814935288184 ns 7.745133319828123 ns 90241753 + #BM_atomic_add_to_unordered<int16_t> 0.3536150309955701 ns 0.3519483499999989 ns 1000000000 + #BM_atomic_add_to_unordered<int32_t> 0.35361311800079415 ns 0.3519554869999997 ns 1000000000 + #BM_atomic_add_to_unordered<int64_t> 0.3536399739969056 ns 0.35196829499999893 ns 1000000000 + #BM_atomic_add_to_unordered<float> 0.7053300267486111 ns 0.7020178623833573 ns 997066161 + #BM_atomic_add_to_unordered<double> 0.7053142521506013 ns 0.7020198464745387 ns 997044788 + #BM_gettid 2.115943991086376 ns 2.106083654346265 ns 332367453 + #BM_systemTime 43.07716484148213 ns 42.87261607042414 ns 16329289 + #BM_thread_8_variables 2.821637561733614 ns 2.808209234245132 ns 249258815 + #BM_thread_local_8_variables 2.820507201699363 ns 2.8082240903316533 ns 249273405 + #BM_StdMutexLockUnlock 18.406383115576315 ns 18.319232771558767 ns 38195925 + #BM_RWMutexReadLockUnlock 17.04181301677912 ns 16.962061518265294 ns 41265858 + #BM_RWMutexWriteLockUnlock 19.11833893994331 ns 19.02850556999233 ns 36797896 + #BM_SharedMutexReadLockUnlock 35.60498964393801 ns 35.43688128260861 ns 19751748 + #BM_SharedMutexWriteLockUnlock 38.00521852390459 ns 37.82714893741646 ns 18505041 + #BM_AudioUtilsMutexLockUnlock 33.20438439586303 ns 33.04808193472067 ns 21175687 + #BM_AudioUtilsPIMutexLockUnlock 34.823252358618 ns 34.659274618519255 ns 20200626 + #BM_StdMutexInitializationLockUnlock 30.918411041550183 ns 30.77489556434292 ns 22749414 + #BM_RWMutexInitializationReadLockUnlock 28.41113243181421 ns 28.276704201193102 ns 24754897 + #BM_RWMutexInitializationWriteLockUnlock 30.991909358233716 ns 30.84726331659203 ns 22690659 + #BM_SharedMutexInitializationReadLockUnlock 58.6180771593902 ns 58.344618743289224 ns 11994661 + #BM_SharedMutexInitializationWriteLockUnlock 60.57289575308067 ns 60.291177006858376 ns 11610697 + #BM_AudioUtilsMutexInitializationLockUnlock 45.1727720110517 ns 44.96317124907669 ns 15567973 + #BM_AudioUtilsPIMutexInitializationLockUnlock 56.51245654873645 ns 56.24826020323107 ns 12446425 + #BM_StdMutexBlockingConditionVariable/threads:2 13005.859125798019 ns 15785.260059044813 ns 42002 + #BM_AudioUtilsMutexBlockingConditionVariable/threads:2 51395.10524573919 ns 62111.48912393159 ns 93600 + #BM_AudioUtilsPIMutexBlockingConditionVariable/threads:2 38003.326141779035 ns 48747.110842948 ns 16988 + #BM_StdMutexScopedLockUnlock/threads:1 31.047443261227432 ns 30.90162837297469 ns 21748273 + #BM_StdMutexScopedLockUnlock/threads:2 187.5369570007024 ns 371.9489944999989 ns 2000000 + #BM_StdMutexScopedLockUnlock/threads:4 183.24430610750633 ns 652.1608249451782 ns 2367624 + #BM_StdMutexScopedLockUnlock/threads:8 114.40673350319138 ns 457.55848734565944 ns 1731896 + #BM_RWMutexScopedReadLockUnlock/threads:1 32.36649526110872 ns 32.21528625736229 ns 21668875 + #BM_RWMutexScopedReadLockUnlock/threads:2 233.82989524907316 ns 465.2684319999982 ns 2000000 + #BM_RWMutexScopedReadLockUnlock/threads:4 234.2368397183923 ns 876.9921508228601 ns 701220 + #BM_RWMutexScopedReadLockUnlock/threads:8 261.81297505285545 ns 2047.9601000535506 ns 403384 + #BM_RWMutexScopedWriteLockUnlock/threads:1 34.75660442655403 ns 34.59235428851013 ns 20236050 + #BM_RWMutexScopedWriteLockUnlock/threads:2 233.78794333484538 ns 459.1015740364359 ns 2612646 + #BM_RWMutexScopedWriteLockUnlock/threads:4 329.8850715993315 ns 1057.9245678854486 ns 782200 + #BM_RWMutexScopedWriteLockUnlock/threads:8 547.4260907751869 ns 2289.554716970124 ns 685016 + #BM_SharedMutexScopedReadLockUnlock/threads:1 67.81834135936606 ns 67.5253075922258 ns 9641336 + #BM_SharedMutexScopedReadLockUnlock/threads:2 359.628342996484 ns 714.5713215382715 ns 1716424 + #BM_SharedMutexScopedReadLockUnlock/threads:4 354.53138343836275 ns 1245.6886813947233 ns 948032 + #BM_SharedMutexScopedReadLockUnlock/threads:8 358.283363638594 ns 1612.215822485538 ns 431424 + #BM_SharedMutexScopedWriteLockUnlock/threads:1 74.97589649902241 ns 74.61641205682437 ns 8257929 + #BM_SharedMutexScopedWriteLockUnlock/threads:2 386.34333957740654 ns 686.9830245124164 ns 1034904 + #BM_SharedMutexScopedWriteLockUnlock/threads:4 6140.788010450615 ns 15583.110439217011 ns 403172 + #BM_SharedMutexScopedWriteLockUnlock/threads:8 3039.076686160784 ns 11077.392742135295 ns 129184 + #BM_AudioUtilsMutexScopedLockUnlock/threads:1 66.64561062274281 ns 66.349105514304 ns 10548911 + #BM_AudioUtilsMutexScopedLockUnlock/threads:2 481.48238149951794 ns 951.586758000001 ns 2000000 + #BM_AudioUtilsMutexScopedLockUnlock/threads:4 285.27629596770197 ns 908.6821992538335 ns 737632 + #BM_AudioUtilsMutexScopedLockUnlock/threads:8 336.3453911753518 ns 1293.2229317430258 ns 578168 + #BM_AudioUtilsPIMutexScopedLockUnlock/threads:1 67.85915888024319 ns 67.56177071445192 ns 8313616 + #BM_AudioUtilsPIMutexScopedLockUnlock/threads:2 10762.71635475132 ns 13490.832976499998 ns 2000000 + #BM_AudioUtilsPIMutexScopedLockUnlock/threads:4 17841.919923356494 ns 26260.20764057142 ns 47876 + #BM_AudioUtilsPIMutexScopedLockUnlock/threads:8 27627.09897164099 ns 36121.4640768056 ns 15728 + #BM_StdMutexReverseScopedLockUnlock/threads:1 31.179721602212002 ns 31.038040257397274 ns 21308084 + #BM_StdMutexReverseScopedLockUnlock/threads:2 61.21498053399278 ns 116.6737022856601 ns 6378638 + #BM_StdMutexReverseScopedLockUnlock/threads:4 112.91128021867448 ns 370.7553689205596 ns 1887344 + #BM_StdMutexReverseScopedLockUnlock/threads:8 141.66823079666526 ns 551.0014334496227 ns 1566152 + #BM_AudioUtilsMutexReverseScopedLockUnlock/threads:1 67.0489114081147 ns 66.73026969103846 ns 8505214 + #BM_AudioUtilsMutexReverseScopedLockUnlock/threads:2 220.90466438402458 ns 416.39108014586975 ns 1243832 + #BM_AudioUtilsMutexReverseScopedLockUnlock/threads:4 291.7432157916735 ns 969.6308767058123 ns 1076028 + #BM_AudioUtilsMutexReverseScopedLockUnlock/threads:8 316.76478317856913 ns 1169.0109563042486 ns 586968 + #BM_AudioUtilsPIMutexReverseScopedLockUnlock/threads:1 68.07235467128096 ns 67.79246629476496 ns 8952912 + #BM_AudioUtilsPIMutexReverseScopedLockUnlock/threads:2 4730.325827513298 ns 5911.874809999971 ns 200000 + #BM_AudioUtilsPIMutexReverseScopedLockUnlock/threads:4 28942.076574315968 ns 38915.04260355007 ns 23660 + #BM_AudioUtilsPIMutexReverseScopedLockUnlock/threads:8 28147.14339638534 ns 36412.06354560159 ns 44472 + #BM_empty_while 0.3526294760085875 ns 0.35102246399999615 ns 1000000000 */ @@ -165,6 +170,26 @@ BENCHMARK(BM_atomic_add_to_relaxed<float>); BENCHMARK(BM_atomic_add_to_relaxed<double>); +template <typename T> +static void BM_atomic_add_to_unordered(benchmark::State &state) { + int64_t i64 = 10; + android::audio_utils::unordered_atomic<T> dst; + while (state.KeepRunning()) { + android::audio_utils::atomic_add_to(dst, i64, std::memory_order_relaxed); + } + LOG(DEBUG) << __func__ << " " << dst.load(); +} + +BENCHMARK(BM_atomic_add_to_unordered<int16_t>); + +BENCHMARK(BM_atomic_add_to_unordered<int32_t>); + +BENCHMARK(BM_atomic_add_to_unordered<int64_t>); + +BENCHMARK(BM_atomic_add_to_unordered<float>); + +BENCHMARK(BM_atomic_add_to_unordered<double>); + // Benchmark gettid(). The mutex class uses this to get the linux thread id. static void BM_gettid(benchmark::State &state) { int32_t value = 0; diff --git a/audio_utils/include/audio_utils/mutex.h b/audio_utils/include/audio_utils/mutex.h index a473b611..75d397bd 100644 --- a/audio_utils/include/audio_utils/mutex.h +++ b/audio_utils/include/audio_utils/mutex.h @@ -389,6 +389,96 @@ public: static constexpr bool abort_on_invalid_unlock_ = true; }; +// relaxed_atomic implements the same features as std::atomic<T> but using +// std::memory_order_relaxed as default. +// +// This is the minimum consistency for the multiple writer multiple reader case. + +template <typename T> +class relaxed_atomic : private std::atomic<T> { +public: + constexpr relaxed_atomic(T desired) : std::atomic<T>(desired) {} + operator T() const { return std::atomic<T>::load(std::memory_order_relaxed); } + T operator=(T desired) { + std::atomic<T>::store(desired, std::memory_order_relaxed); return desired; + } + + T operator--() { return std::atomic<T>::fetch_sub(1, std::memory_order_relaxed) - 1; } + T operator++() { return std::atomic<T>::fetch_add(1, std::memory_order_relaxed) + 1; } + T operator+=(const T value) { + return std::atomic<T>::fetch_add(value, std::memory_order_relaxed) + value; + } + + T load(std::memory_order order = std::memory_order_relaxed) const { + return std::atomic<T>::load(order); + } + T fetch_add(T arg, std::memory_order order =std::memory_order_relaxed) { + return std::atomic<T>::fetch_add(arg, order); + } + bool compare_exchange_weak( + T& expected, T desired, std::memory_order order = std::memory_order_relaxed) { + return std::atomic<T>::compare_exchange_weak(expected, desired, order); + } +}; + +// unordered_atomic implements data storage such that memory reads have a value +// consistent with a memory write in some order, i.e. not having values +// "out of thin air". +// +// Unordered memory reads and writes may not actually take place but be implicitly cached. +// Nevertheless, a memory read should return at least as contemporaneous a value +// as the last memory write before the write thread memory barrier that +// preceded the most recent read thread memory barrier. +// +// This is weaker than relaxed_atomic and has no equivalent C++ terminology. +// unordered_atomic would be used for a single writer, multiple reader case, +// where data access of type T would be a implemented by the compiler and +// hw architecture with a single "uninterruptible" memory operation. +// Note that multiple writers would cause read-modify-write unordered_atomic +// operations to have inconsistent results. + +template <typename T> +class unordered_atomic { + static_assert(std::atomic<T>::is_always_lock_free); +public: + unordered_atomic() = default; + constexpr unordered_atomic(T desired) : t_(desired) {} + operator T() const { return t_; } + T& operator=(T desired) { return t_ = desired; } + + T& operator--() { const T temp = t_ - 1; return t_ = temp; } + T& operator++() { const T temp = t_ + 1; return t_ = temp; } + T& operator+=(const T value) { const T temp = t_ + value; return t_ = temp; } + + T load(std::memory_order order = std::memory_order_relaxed) const { (void)order; return t_; } + +private: + T t_; +}; + +// While std::atomic with the default std::memory_order_seq_cst +// access could be used, it results in performance loss over less +// restrictive memory access. + +// stats_atomic is a multiple writer multiple reader object. +// +// This is normally used to increment statistics counters on +// mutex priority categories. +// +// We used relaxed_atomic instead of std::atomic/memory_order_seq_cst here. +template <typename T> +using stats_atomic = relaxed_atomic<T>; + +// thread_atomic is a single writer multiple reader object. +// +// This is normally accessed as a thread local (hence single writer) +// but may be accessed (rarely) by multiple readers on deadlock +// detection which does not modify the data. +// +// We use unordered_atomic instead of std::atomic/memory_order_seq_cst here. +template <typename T> +using thread_atomic = unordered_atomic<T>; + /** * Helper method to accumulate floating point values to an atomic * prior to C++23 support of atomic<float> atomic<double> accumulation. @@ -411,6 +501,30 @@ void atomic_add_to(std::atomic<AccumulateType> &dst, ValueType src, dst.fetch_add(src, order); } +template <typename AccumulateType, typename ValueType> +requires std::is_floating_point<AccumulateType>::value +void atomic_add_to(relaxed_atomic<AccumulateType> &dst, ValueType src, + std::memory_order order = std::memory_order_relaxed) { + AccumulateType expected; + do { + expected = dst; + } while (!dst.compare_exchange_weak(expected, expected + src, order)); +} + +template <typename AccumulateType, typename ValueType> +requires std::is_integral<AccumulateType>::value +void atomic_add_to(relaxed_atomic<AccumulateType> &dst, ValueType src, + std::memory_order order = std::memory_order_relaxed) { + dst.fetch_add(src, order); +} + +template <typename AccumulateType, typename ValueType> +void atomic_add_to(unordered_atomic<AccumulateType> &dst, ValueType src, + std::memory_order order = std::memory_order_relaxed) { + (void)order; // unused + dst = dst + src; +} + /** * mutex_stat is a struct composed of atomic members associated * with usage of a particular mutex order. @@ -427,11 +541,11 @@ struct mutex_stat { static_assert(std::is_integral_v<CounterType>); static_assert(std::atomic<CounterType>::is_always_lock_free); static_assert(std::atomic<AccumulatorType>::is_always_lock_free); - std::atomic<CounterType> locks = 0; // number of times locked - std::atomic<CounterType> unlocks = 0; // number of times unlocked - std::atomic<CounterType> waits = 0; // number of locks that waitedwa - std::atomic<AccumulatorType> wait_sum_ns = 0.; // sum of time waited. - std::atomic<AccumulatorType> wait_sumsq_ns = 0.; // sumsq of time waited. + stats_atomic<CounterType> locks = 0; // number of times locked + stats_atomic<CounterType> unlocks = 0; // number of times unlocked + stats_atomic<CounterType> waits = 0; // number of locks that waited + stats_atomic<AccumulatorType> wait_sum_ns = 0.; // sum of time waited. + stats_atomic<AccumulatorType> wait_sumsq_ns = 0.; // sumsq of time waited. template <typename WaitTimeType> void add_wait_time(WaitTimeType wait_ns) { @@ -524,7 +638,7 @@ struct mutex_stat { template <typename Item, typename Payload, size_t N> class atomic_stack { public: - using item_payload_pair_t = std::pair<std::atomic<Item>, std::atomic<Payload>>; + using item_payload_pair_t = std::pair<thread_atomic<Item>, thread_atomic<Payload>>; /** * Puts the item at the top of the stack. @@ -648,8 +762,8 @@ public: const auto& invalid() const { return invalid_; } private: - std::atomic<size_t> top_ = 0; // ranges from 0 to N - 1 - std::atomic<size_t> true_top_ = 0; // always >= top_. + thread_atomic<size_t> top_ = 0; // ranges from 0 to N - 1 + thread_atomic<size_t> true_top_ = 0; // always >= top_. // if true_top_ == top_ the subset stack is complete. /* @@ -761,7 +875,7 @@ public: } const pid_t tid_; // me - std::atomic<MutexHandle> mutex_wait_{}; // mutex waiting for + thread_atomic<MutexHandle> mutex_wait_{}; // mutex waiting for atomic_stack_t mutexes_held_; // mutexes held }; |