aboutsummaryrefslogtreecommitdiff
path: root/third_party/abseil-cpp/absl/synchronization/mutex_benchmark.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/abseil-cpp/absl/synchronization/mutex_benchmark.cc')
-rw-r--r--third_party/abseil-cpp/absl/synchronization/mutex_benchmark.cc223
1 files changed, 223 insertions, 0 deletions
diff --git a/third_party/abseil-cpp/absl/synchronization/mutex_benchmark.cc b/third_party/abseil-cpp/absl/synchronization/mutex_benchmark.cc
new file mode 100644
index 0000000000..ab1880012a
--- /dev/null
+++ b/third_party/abseil-cpp/absl/synchronization/mutex_benchmark.cc
@@ -0,0 +1,223 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <mutex> // NOLINT(build/c++11)
+#include <vector>
+
+#include "absl/base/internal/cycleclock.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/synchronization/blocking_counter.h"
+#include "absl/synchronization/internal/thread_pool.h"
+#include "absl/synchronization/mutex.h"
+#include "benchmark/benchmark.h"
+
+namespace {
+
+void BM_Mutex(benchmark::State& state) {
+ static absl::Mutex* mu = new absl::Mutex;
+ for (auto _ : state) {
+ absl::MutexLock lock(mu);
+ }
+}
+BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();
+
+static void DelayNs(int64_t ns, int* data) {
+ int64_t end = absl::base_internal::CycleClock::Now() +
+ ns * absl::base_internal::CycleClock::Frequency() / 1e9;
+ while (absl::base_internal::CycleClock::Now() < end) {
+ ++(*data);
+ benchmark::DoNotOptimize(*data);
+ }
+}
+
+template <typename MutexType>
+class RaiiLocker {
+ public:
+ explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }
+ ~RaiiLocker() { mu_->Unlock(); }
+ private:
+ MutexType* mu_;
+};
+
+template <>
+class RaiiLocker<std::mutex> {
+ public:
+ explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
+ ~RaiiLocker() { mu_->unlock(); }
+ private:
+ std::mutex* mu_;
+};
+
+template <typename MutexType>
+void BM_Contended(benchmark::State& state) {
+ struct Shared {
+ MutexType mu;
+ int data = 0;
+ };
+ static auto* shared = new Shared;
+ int local = 0;
+ for (auto _ : state) {
+ // Here we model both local work outside of the critical section as well as
+ // some work inside of the critical section. The idea is to capture some
+ // more or less realisitic contention levels.
+ // If contention is too low, the benchmark won't measure anything useful.
+ // If contention is unrealistically high, the benchmark will favor
+ // bad mutex implementations that block and otherwise distract threads
+ // from the mutex and shared state for as much as possible.
+ // To achieve this amount of local work is multiplied by number of threads
+ // to keep ratio between local work and critical section approximately
+ // equal regardless of number of threads.
+ DelayNs(100 * state.threads, &local);
+ RaiiLocker<MutexType> locker(&shared->mu);
+ DelayNs(state.range(0), &shared->data);
+ }
+}
+
+BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)
+ ->UseRealTime()
+ // ThreadPerCpu poorly handles non-power-of-two CPU counts.
+ ->Threads(1)
+ ->Threads(2)
+ ->Threads(4)
+ ->Threads(6)
+ ->Threads(8)
+ ->Threads(12)
+ ->Threads(16)
+ ->Threads(24)
+ ->Threads(32)
+ ->Threads(48)
+ ->Threads(64)
+ ->Threads(96)
+ ->Threads(128)
+ ->Threads(192)
+ ->Threads(256)
+ // Some empirically chosen amounts of work in critical section.
+ // 1 is low contention, 200 is high contention and few values in between.
+ ->Arg(1)
+ ->Arg(20)
+ ->Arg(50)
+ ->Arg(200);
+
+BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock)
+ ->UseRealTime()
+ // ThreadPerCpu poorly handles non-power-of-two CPU counts.
+ ->Threads(1)
+ ->Threads(2)
+ ->Threads(4)
+ ->Threads(6)
+ ->Threads(8)
+ ->Threads(12)
+ ->Threads(16)
+ ->Threads(24)
+ ->Threads(32)
+ ->Threads(48)
+ ->Threads(64)
+ ->Threads(96)
+ ->Threads(128)
+ ->Threads(192)
+ ->Threads(256)
+ // Some empirically chosen amounts of work in critical section.
+ // 1 is low contention, 200 is high contention and few values in between.
+ ->Arg(1)
+ ->Arg(20)
+ ->Arg(50)
+ ->Arg(200);
+
+BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
+ ->UseRealTime()
+ // ThreadPerCpu poorly handles non-power-of-two CPU counts.
+ ->Threads(1)
+ ->Threads(2)
+ ->Threads(4)
+ ->Threads(6)
+ ->Threads(8)
+ ->Threads(12)
+ ->Threads(16)
+ ->Threads(24)
+ ->Threads(32)
+ ->Threads(48)
+ ->Threads(64)
+ ->Threads(96)
+ ->Threads(128)
+ ->Threads(192)
+ ->Threads(256)
+ // Some empirically chosen amounts of work in critical section.
+ // 1 is low contention, 200 is high contention and few values in between.
+ ->Arg(1)
+ ->Arg(20)
+ ->Arg(50)
+ ->Arg(200);
+
+// Measure the overhead of conditions on mutex release (when they must be
+// evaluated). Mutex has (some) support for equivalence classes allowing
+// Conditions with the same function/argument to potentially not be multiply
+// evaluated.
+//
+// num_classes==0 is used for the special case of every waiter being distinct.
+void BM_ConditionWaiters(benchmark::State& state) {
+ int num_classes = state.range(0);
+ int num_waiters = state.range(1);
+
+ struct Helper {
+ static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {
+ init->DecrementCount();
+ m->LockWhen(absl::Condition(
+ static_cast<bool (*)(int*)>([](int* v) { return *v == 0; }), p));
+ m->Unlock();
+ }
+ };
+
+ if (num_classes == 0) {
+ // No equivalence classes.
+ num_classes = num_waiters;
+ }
+
+ absl::BlockingCounter init(num_waiters);
+ absl::Mutex mu;
+ std::vector<int> equivalence_classes(num_classes, 1);
+
+ // Must be declared last to be destroyed first.
+ absl::synchronization_internal::ThreadPool pool(num_waiters);
+
+ for (int i = 0; i < num_waiters; i++) {
+ // Mutex considers Conditions with the same function and argument
+ // to be equivalent.
+ pool.Schedule([&, i] {
+ Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);
+ });
+ }
+ init.Wait();
+
+ for (auto _ : state) {
+ mu.Lock();
+ mu.Unlock(); // Each unlock requires Condition evaluation for our waiters.
+ }
+
+ mu.Lock();
+ for (int i = 0; i < num_classes; i++) {
+ equivalence_classes[i] = 0;
+ }
+ mu.Unlock();
+}
+
+// Some configurations have higher thread limits than others.
+#if defined(__linux__) && !defined(THREAD_SANITIZER)
+constexpr int kMaxConditionWaiters = 8192;
+#else
+constexpr int kMaxConditionWaiters = 1024;
+#endif
+BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);
+
+} // namespace