summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShrinidhi Hegde <shrinidhihegde@google.com>2024-04-09 15:33:21 +0000
committerShrinidhi Hegde <shrinidhihegde@google.com>2024-04-11 15:28:07 +0000
commitab9b68384052d8b9b35e1e96274fb66827081d7e (patch)
treefb83d36f27a815ee376e000a0aca259d5f74c1ea
parentef2e67eec1b703cef0d881c8dd1ae76bb60d6171 (diff)
downloadcore-ab9b68384052d8b9b35e1e96274fb66827081d7e.tar.gz
Throttle reboot from native watchdog
Found out that increasing the threshold to 20 crashes pushes the file system based rollbacks as well. So introducing a throttling behaviour instead. Now native watchdog performs reboot with ramdump at 5 restarts. After that packageWatchdog/ RescueParty takes over to perform other mitigations. Ram dump + reboot will not be performed more than once in 24hrs. Test: manual Bug: 291137901 Change-Id: Ia192411dad94e8e25c26f700d2fe7f94d41439b8
-rw-r--r--init/service.cpp29
1 files changed, 22 insertions, 7 deletions
diff --git a/init/service.cpp b/init/service.cpp
index eb24dd593..31308a036 100644
--- a/init/service.cpp
+++ b/init/service.cpp
@@ -355,20 +355,35 @@ void Service::Reap(const siginfo_t& siginfo) {
// If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
// reboot into bootloader or set crashing property
boot_clock::time_point now = boot_clock::now();
+ constexpr const char native_watchdog_reboot_time[] = "persist.init.svc.last_fatal_reboot_epoch";
+ uint64_t throttle_window =
+ std::chrono::duration_cast<std::chrono::seconds>(std::chrono::hours(24)).count();
if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) &&
!was_last_exit_ok_) {
bool boot_completed = GetBoolProperty("sys.boot_completed", false);
if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
if (++crash_count_ > 4) {
- auto exit_reason = boot_completed ?
- "in " + std::to_string(fatal_crash_window_.count()) + " minutes" :
- "before boot completed";
+ auto exit_reason =
+ boot_completed
+ ? "in " + std::to_string(fatal_crash_window_.count()) + " minutes"
+ : "before boot completed";
if (flags_ & SVC_CRITICAL) {
if (!GetBoolProperty("init.svc_debug.no_fatal." + name_, false)) {
- // Aborts into `fatal_reboot_target_'.
- SetFatalRebootTarget(fatal_reboot_target_);
- LOG(FATAL) << "critical process '" << name_ << "' exited 4 times "
- << exit_reason;
+ uint64_t epoch_time =
+ std::chrono::duration_cast<std::chrono::seconds>(
+ std::chrono::system_clock::now().time_since_epoch())
+ .count();
+ // Do not reboot again If it was already initiated in the last 24hrs
+ if (epoch_time - GetIntProperty(native_watchdog_reboot_time, 0) >
+ throttle_window) {
+ SetProperty(native_watchdog_reboot_time, std::to_string(epoch_time));
+ // Aborts into `fatal_reboot_target_'.
+ SetFatalRebootTarget(fatal_reboot_target_);
+ LOG(FATAL) << "critical process '" << name_ << "' exited 4 times "
+ << exit_reason;
+ } else {
+ LOG(INFO) << "Reboot already performed in last 24hrs because of crash.";
+ }
}
} else {
LOG(ERROR) << "process with updatable components '" << name_