diff options
author | Shrinidhi Hegde <shrinidhihegde@google.com> | 2024-04-09 15:33:21 +0000 |
---|---|---|
committer | Shrinidhi Hegde <shrinidhihegde@google.com> | 2024-04-11 15:28:07 +0000 |
commit | ab9b68384052d8b9b35e1e96274fb66827081d7e (patch) | |
tree | fb83d36f27a815ee376e000a0aca259d5f74c1ea | |
parent | ef2e67eec1b703cef0d881c8dd1ae76bb60d6171 (diff) | |
download | core-ab9b68384052d8b9b35e1e96274fb66827081d7e.tar.gz |
Throttle reboot from native watchdog
Found out that increasing the threshold to 20 crashes pushes the file
system based rollbacks as well. So introducing a throttling behaviour
instead. Now native watchdog performs reboot with ramdump at 5 restarts.
After that packageWatchdog/ RescueParty takes over to perform other
mitigations. Ram dump + reboot will not be performed more than once
in 24hrs.
Test: manual
Bug: 291137901
Change-Id: Ia192411dad94e8e25c26f700d2fe7f94d41439b8
-rw-r--r-- | init/service.cpp | 29 |
1 files changed, 22 insertions, 7 deletions
diff --git a/init/service.cpp b/init/service.cpp index eb24dd593..31308a036 100644 --- a/init/service.cpp +++ b/init/service.cpp @@ -355,20 +355,35 @@ void Service::Reap(const siginfo_t& siginfo) { // If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed, // reboot into bootloader or set crashing property boot_clock::time_point now = boot_clock::now(); + constexpr const char native_watchdog_reboot_time[] = "persist.init.svc.last_fatal_reboot_epoch"; + uint64_t throttle_window = + std::chrono::duration_cast<std::chrono::seconds>(std::chrono::hours(24)).count(); if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) && !was_last_exit_ok_) { bool boot_completed = GetBoolProperty("sys.boot_completed", false); if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) { if (++crash_count_ > 4) { - auto exit_reason = boot_completed ? - "in " + std::to_string(fatal_crash_window_.count()) + " minutes" : - "before boot completed"; + auto exit_reason = + boot_completed + ? "in " + std::to_string(fatal_crash_window_.count()) + " minutes" + : "before boot completed"; if (flags_ & SVC_CRITICAL) { if (!GetBoolProperty("init.svc_debug.no_fatal." + name_, false)) { - // Aborts into `fatal_reboot_target_'. - SetFatalRebootTarget(fatal_reboot_target_); - LOG(FATAL) << "critical process '" << name_ << "' exited 4 times " - << exit_reason; + uint64_t epoch_time = + std::chrono::duration_cast<std::chrono::seconds>( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + // Do not reboot again If it was already initiated in the last 24hrs + if (epoch_time - GetIntProperty(native_watchdog_reboot_time, 0) > + throttle_window) { + SetProperty(native_watchdog_reboot_time, std::to_string(epoch_time)); + // Aborts into `fatal_reboot_target_'. + SetFatalRebootTarget(fatal_reboot_target_); + LOG(FATAL) << "critical process '" << name_ << "' exited 4 times " + << exit_reason; + } else { + LOG(INFO) << "Reboot already performed in last 24hrs because of crash."; + } } } else { LOG(ERROR) << "process with updatable components '" << name_ |