aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorColin Cross <ccross@android.com>2022-05-11 15:17:55 -0700
committerColin Cross <ccross@android.com>2022-05-12 14:28:45 -0700
commit0b1e8d31d21c56a87f9a53f088ea841aeaf87981 (patch)
treef05cdf1cf8f8360d5f130aeb4e7dfbad826786ea
parenta4eb82f67b8aa30eb705e1ff5aef5f72a0564266 (diff)
downloadlibmemunreachable-0b1e8d31d21c56a87f9a53f088ea841aeaf87981.tar.gz
memunreachable: prevent forking when main thread times outandroid13-dev
The main thread waits up to 30 seconds for the collection thread to pause all the other threads. If the collection thread has not finished pausing after 30 seconds the main thread will release the malloc locks. That could allow an unpaused thread to take a malloc lock and then get paused while holding it. When the collection thread tries to fork it will take all the malloc locks again, deadlocking with the paused thread that is holding a malloc lock. Switch from a semaphore to an atomic state machine to coordinate between the main thread adn the collection thread. If the main thread stops waiting it will atomically transition the state to ABORT, which will prevent the collection thread from forking. Bug: 231273975 Test: memunreachable_test, memunreachable_unit_test Change-Id: Ia37b5612dd0434f0231e2aeb159a8e9e5e6afd5d Merged-In: Ia37b5612dd0434f0231e2aeb159a8e9e5e6afd5d
-rw-r--r--Android.bp4
-rw-r--r--AtomicState.h98
-rw-r--r--MemUnreachable.cpp47
-rw-r--r--Semaphore.h60
-rw-r--r--tests/AtomicState_test.cpp135
5 files changed, 276 insertions, 68 deletions
diff --git a/Android.bp b/Android.bp
index 2a3fa1f..68457e5 100644
--- a/Android.bp
+++ b/Android.bp
@@ -48,6 +48,9 @@ cc_library {
"libc_malloc_debug_backtrace",
"libprocinfo",
],
+ header_libs: [
+ "libgtest_prod_headers",
+ ],
export_include_dirs: ["include"],
local_include_dirs: ["include"],
version_script: "libmemunreachable.map",
@@ -73,6 +76,7 @@ cc_test {
host_supported: true,
srcs: [
"tests/Allocator_test.cpp",
+ "tests/AtomicState_test.cpp",
"tests/HeapWalker_test.cpp",
"tests/LeakFolding_test.cpp",
],
diff --git a/AtomicState.h b/AtomicState.h
new file mode 100644
index 0000000..365eab9
--- /dev/null
+++ b/AtomicState.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <chrono>
+#include <functional>
+#include <mutex>
+#include <unordered_set>
+
+#include <gtest/gtest_prod.h>
+
+#include "android-base/macros.h"
+
+namespace android {
+
+/*
+ * AtomicState manages updating or waiting on a state enum between multiple threads.
+ */
+template <typename T>
+class AtomicState {
+ public:
+ explicit AtomicState(T state) : state_(state) {}
+ ~AtomicState() = default;
+
+ /*
+ * Set the state to `to`. Wakes up any waiters that are waiting on the new state.
+ */
+ void set(T to) {
+ std::lock_guard<std::mutex> lock(m_);
+ state_ = to;
+ cv_.notify_all();
+ }
+
+ /*
+ * If the state is `from`, change it to `to` and return true. Otherwise don't change
+ * it and return false. If the state is changed, wakes up any waiters that are waiting
+ * on the new state.
+ */
+ bool transition(T from, T to) {
+ return transition_or(from, to, [&] { return state_; });
+ }
+
+ /*
+ * If the state is `from`, change it to `to` and return true. Otherwise, call `or_func`,
+ * set the state to the value it returns and return false. Wakes up any waiters that are
+ * waiting on the new state.
+ */
+ bool transition_or(T from, T to, const std::function<T()>& orFunc) {
+ std::lock_guard<std::mutex> lock(m_);
+
+ bool failed = false;
+ if (state_ == from) {
+ state_ = to;
+ } else {
+ failed = true;
+ state_ = orFunc();
+ }
+ cv_.notify_all();
+
+ return !failed;
+ }
+
+ /*
+ * Block until the state is either `state1` or `state2`, or the time limit is reached.
+ * Returns true if the time limit was not reached, false if it was reached.
+ */
+ bool wait_for_either_of(T state1, T state2, std::chrono::milliseconds ms) {
+ std::unique_lock<std::mutex> lock(m_);
+ bool success = cv_.wait_for(lock, ms, [&] { return state_ == state1 || state_ == state2; });
+ return success;
+ }
+
+ private:
+ T state_;
+ std::mutex m_;
+ std::condition_variable cv_;
+
+ FRIEND_TEST(AtomicStateTest, transition);
+ FRIEND_TEST(AtomicStateTest, wait);
+
+ DISALLOW_COPY_AND_ASSIGN(AtomicState);
+};
+
+} // namespace android
diff --git a/MemUnreachable.cpp b/MemUnreachable.cpp
index b02a77e..d37819a 100644
--- a/MemUnreachable.cpp
+++ b/MemUnreachable.cpp
@@ -29,6 +29,7 @@
#include <backtrace.h>
#include "Allocator.h"
+#include "AtomicState.h"
#include "Binder.h"
#include "HeapWalker.h"
#include "Leak.h"
@@ -37,7 +38,6 @@
#include "ProcessMappings.h"
#include "PtracerThread.h"
#include "ScopedDisableMalloc.h"
-#include "Semaphore.h"
#include "ThreadCapture.h"
#include "bionic.h"
@@ -282,6 +282,13 @@ static inline const char* plural(T val) {
return (val == 1) ? "" : "s";
}
+enum State {
+ STARTING = 0,
+ PAUSING,
+ COLLECTING,
+ ABORT,
+};
+
bool GetUnreachableMemory(UnreachableMemoryInfo& info, size_t limit) {
if (info.version > 0) {
MEM_ALOGE("unsupported UnreachableMemoryInfo.version %zu in GetUnreachableMemory",
@@ -294,7 +301,7 @@ bool GetUnreachableMemory(UnreachableMemoryInfo& info, size_t limit) {
Heap heap;
- Semaphore continue_parent_sem;
+ AtomicState<State> state(STARTING);
LeakPipe pipe;
PtracerThread thread{[&]() -> int {
@@ -303,6 +310,13 @@ bool GetUnreachableMemory(UnreachableMemoryInfo& info, size_t limit) {
/////////////////////////////////////////////
MEM_ALOGI("collecting thread info for process %d...", parent_pid);
+ if (!state.transition_or(STARTING, PAUSING, [&] {
+ MEM_ALOGI("collecting thread expected state STARTING, aborting");
+ return ABORT;
+ })) {
+ return 1;
+ }
+
ThreadCapture thread_capture(parent_pid, heap);
allocator::vector<ThreadInfo> thread_info(heap);
allocator::vector<Mapping> mappings(heap);
@@ -310,24 +324,34 @@ bool GetUnreachableMemory(UnreachableMemoryInfo& info, size_t limit) {
// ptrace all the threads
if (!thread_capture.CaptureThreads()) {
- continue_parent_sem.Post();
+ state.set(ABORT);
return 1;
}
// collect register contents and stacks
if (!thread_capture.CapturedThreadInfo(thread_info)) {
- continue_parent_sem.Post();
+ state.set(ABORT);
return 1;
}
// snapshot /proc/pid/maps
if (!ProcessMappings(parent_pid, mappings)) {
- continue_parent_sem.Post();
+ state.set(ABORT);
return 1;
}
if (!BinderReferences(refs)) {
- continue_parent_sem.Post();
+ state.set(ABORT);
+ return 1;
+ }
+
+ // Atomically update the state from PAUSING to COLLECTING.
+ // The main thread may have given up waiting for this thread to finish
+ // pausing, in which case it will have changed the state to ABORT.
+ if (!state.transition_or(PAUSING, COLLECTING, [&] {
+ MEM_ALOGI("collecting thread aborting");
+ return ABORT;
+ })) {
return 1;
}
@@ -337,7 +361,6 @@ bool GetUnreachableMemory(UnreachableMemoryInfo& info, size_t limit) {
// can drop the malloc locks, it will block until the collection thread
// exits.
thread_capture.ReleaseThread(parent_tid);
- continue_parent_sem.Post();
// fork a process to do the heap walking
int ret = fork();
@@ -400,7 +423,15 @@ bool GetUnreachableMemory(UnreachableMemoryInfo& info, size_t limit) {
// Wait for the collection thread to signal that it is ready to fork the
// heap walker process.
- continue_parent_sem.Wait(30s);
+ if (!state.wait_for_either_of(COLLECTING, ABORT, 30s)) {
+ // The pausing didn't finish within 30 seconds, attempt to atomically
+ // update the state from PAUSING to ABORT. The collecting thread
+ // may have raced with the timeout and already updated the state to
+ // COLLECTING, in which case aborting is not necessary.
+ if (state.transition(PAUSING, ABORT)) {
+ MEM_ALOGI("main thread timed out waiting for collecting thread");
+ }
+ }
// Re-enable malloc so the collection thread can fork.
}
diff --git a/Semaphore.h b/Semaphore.h
deleted file mode 100644
index cd73972..0000000
--- a/Semaphore.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBMEMUNREACHABLE_SEMAPHORE_H_
-#define LIBMEMUNREACHABLE_SEMAPHORE_H_
-
-#include <chrono>
-#include <mutex>
-
-#include "android-base/macros.h"
-
-namespace android {
-
-class Semaphore {
- public:
- explicit Semaphore(int count = 0) : count_(count) {}
- ~Semaphore() = default;
-
- void Wait(std::chrono::milliseconds ms) {
- std::unique_lock<std::mutex> lk(m_);
- cv_.wait_for(lk, ms, [&] {
- if (count_ > 0) {
- count_--;
- return true;
- }
- return false;
- });
- }
- void Post() {
- {
- std::lock_guard<std::mutex> lk(m_);
- count_++;
- }
- cv_.notify_one();
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(Semaphore);
-
- int count_;
- std::mutex m_;
- std::condition_variable cv_;
-};
-
-} // namespace android
-
-#endif // LIBMEMUNREACHABLE_SEMAPHORE_H_
diff --git a/tests/AtomicState_test.cpp b/tests/AtomicState_test.cpp
new file mode 100644
index 0000000..7fb2806
--- /dev/null
+++ b/tests/AtomicState_test.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <AtomicState.h>
+
+#include <chrono>
+#include <thread>
+
+#include <gtest/gtest.h>
+
+using namespace std::chrono_literals;
+
+namespace android {
+
+enum AtomicStateTestEnum {
+ A,
+ B,
+ C,
+ D,
+ E,
+};
+
+class AtomicStateTest : public testing::Test {
+ protected:
+ AtomicStateTest() : state_(A) {}
+ virtual void SetUp() {}
+ virtual void TearDown() {}
+
+ AtomicState<AtomicStateTestEnum> state_;
+};
+
+TEST_F(AtomicStateTest, transition) {
+ ASSERT_EQ(A, state_.state_);
+
+ // Starts as A, transition from B fails
+ ASSERT_FALSE(state_.transition(B, C));
+ ASSERT_EQ(A, state_.state_);
+
+ // transition from A to B
+ ASSERT_TRUE(state_.transition(A, B));
+ ASSERT_EQ(B, state_.state_);
+
+ // State is B, transition from A fails
+ ASSERT_FALSE(state_.transition(A, B));
+ ASSERT_EQ(B, state_.state_);
+
+ // State is B, transition_or from A calls the lambda
+ bool lambda = false;
+ bool already_locked = false;
+ state_.transition_or(A, B, [&] {
+ // The lock should be held in the lambda
+ if (state_.m_.try_lock()) {
+ state_.m_.unlock();
+ } else {
+ already_locked = true;
+ }
+ lambda = true;
+ return B;
+ });
+ ASSERT_TRUE(lambda);
+ ASSERT_TRUE(already_locked);
+ ASSERT_EQ(B, state_.state_);
+
+ // State is C, transition_or from B to C does not call the lambda
+ lambda = false;
+ state_.transition_or(B, C, [&] {
+ lambda = true;
+ return C;
+ });
+ ASSERT_FALSE(lambda);
+ ASSERT_EQ(C, state_.state_);
+}
+
+TEST_F(AtomicStateTest, wait) {
+ ASSERT_EQ(A, state_.state_);
+
+ // Starts as A, wait_for_either_of B, C returns false
+ ASSERT_FALSE(state_.wait_for_either_of(B, C, 10ms));
+
+ // Starts as A, wait_for_either_of A, B returns true
+ ASSERT_TRUE(state_.wait_for_either_of(A, B, 1s));
+
+ {
+ std::thread t([&] {
+ usleep(10000);
+ state_.set(B);
+ });
+
+ // Wait ing for B or C returns true after state is set to B
+ ASSERT_TRUE(state_.wait_for_either_of(B, C, 1s));
+
+ t.join();
+ }
+
+ ASSERT_EQ(B, state_.state_);
+ {
+ std::thread t([&] {
+ usleep(10000);
+ state_.transition(B, C);
+ });
+
+ // Waiting for A or C returns true after state is transitioned to C
+ ASSERT_TRUE(state_.wait_for_either_of(A, C, 1s));
+
+ t.join();
+ }
+
+ ASSERT_EQ(C, state_.state_);
+ {
+ std::thread t([&] {
+ usleep(10000);
+ state_.transition(C, D);
+ });
+
+ // Waiting for A or B returns false after state is transitioned to D
+ ASSERT_FALSE(state_.wait_for_either_of(A, B, 100ms));
+
+ t.join();
+ }
+}
+
+} // namespace android