diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 05:23:38 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 05:23:38 +0000 |
commit | d6749fdbcad76ad5ee97c5c739eaf98a03607b43 (patch) | |
tree | 945ab05e3e6b3bd3dc01eb60eb8ba0828c32ddbe | |
parent | c329fe6a7613e99264d5ced7b97675d5d4edabcd (diff) | |
parent | fcc9c1a5928b2659378370c9ad38ed45888368f7 (diff) | |
download | lmkd-android14-mainline-uwb-release.tar.gz |
Snap for 10453563 from fcc9c1a5928b2659378370c9ad38ed45888368f7 to mainline-uwb-releaseaml_uwb_341513070aml_uwb_341511050aml_uwb_341310300aml_uwb_341310030aml_uwb_341111010aml_uwb_341011000android14-mainline-uwb-release
Change-Id: Id8e005357dab905b209ee9f2b31e1db218b544ea
-rw-r--r-- | Android.bp | 28 | ||||
-rw-r--r-- | PREUPLOAD.cfg | 5 | ||||
-rw-r--r-- | README.md | 104 | ||||
-rw-r--r-- | include/lmkd_hooks.h | 67 | ||||
-rw-r--r-- | lmkd.cpp | 119 | ||||
-rw-r--r-- | reaper.cpp | 95 | ||||
-rw-r--r-- | tests/TEST_MAPPING | 2 | ||||
-rw-r--r-- | tests/lmkd_tests.cpp | 27 |
8 files changed, 318 insertions, 129 deletions
@@ -2,6 +2,32 @@ package { default_applicable_licenses: ["Android-Apache-2.0"], } +soong_config_module_type { + name: "lmkd_hooks_cc_defaults", + module_type: "cc_defaults", + config_namespace: "lmkd", + bool_variables: ["use_hooks"], + properties: [ + "cflags", + "static_libs", + ], +} + +lmkd_hooks_cc_defaults { + name: "lmkd_hooks_defaults", + + soong_config_variables: { + use_hooks: { + cflags: [ + "-DLMKD_USE_HOOKS" + ], + static_libs: [ + "liblmkdhooks" + ] + } + } +} + cc_defaults { name: "stats_defaults", cflags: [ @@ -38,7 +64,7 @@ cc_binary { "-DLMKD_TRACE_KILLS" ], init_rc: ["lmkd.rc"], - defaults: ["stats_defaults"], + defaults: ["stats_defaults", "lmkd_hooks_defaults"], logtags: ["event.logtags"], afdo: true, } diff --git a/PREUPLOAD.cfg b/PREUPLOAD.cfg new file mode 100644 index 0000000..c8dbf77 --- /dev/null +++ b/PREUPLOAD.cfg @@ -0,0 +1,5 @@ +[Builtin Hooks] +clang_format = true + +[Builtin Hooks Options] +clang_format = --commit ${PREUPLOAD_COMMIT} --style file --extensions c,h,cc,cpp @@ -26,75 +26,75 @@ Android Properties lmkd can be configured on a particular system using the following Android properties: - ro.config.low_ram: choose between low-memory vs high-performance - device. Default = false. + - `ro.config.low_ram`: choose between low-memory vs high-performance + device. Default = false. - ro.lmk.use_minfree_levels: use free memory and file cache thresholds for - making decisions when to kill. This mode works - the same way kernel lowmemorykiller driver used - to work. Default = false + - `ro.lmk.use_minfree_levels`: use free memory and file cache thresholds for + making decisions when to kill. This mode works + the same way kernel lowmemorykiller driver used + to work. Default = false - ro.lmk.low: min oom_adj score for processes eligible to be - killed at low vmpressure level. Default = 1001 - (disabled) + - `ro.lmk.low`: min oom_adj score for processes eligible to be + killed at low vmpressure level. Default = 1001 + (disabled) - ro.lmk.medium: min oom_adj score for processes eligible to be - killed at medium vmpressure level. Default = 800 - (non-essential processes) + - `ro.lmk.medium`: min oom_adj score for processes eligible to be + killed at medium vmpressure level. Default = 800 + (non-essential processes) - ro.lmk.critical: min oom_adj score for processes eligible to be - killed at critical vmpressure level. Default = 0 - (all processes) + - `ro.lmk.critical`: min oom_adj score for processes eligible to be + killed at critical vmpressure level. Default = 0 + (all processes) - ro.lmk.critical_upgrade: enables upgrade to critical level. Default = false + - `ro.lmk.critical_upgrade`: enables upgrade to critical level. Default = false - ro.lmk.upgrade_pressure: max mem_pressure at which level will be upgraded - because system is swapping too much. Default = 100 - (disabled) + - `ro.lmk.upgrade_pressure`: max mem_pressure at which level will be upgraded + because system is swapping too much. Default = 100 + (disabled) - ro.lmk.downgrade_pressure: min mem_pressure at which vmpressure event will - be ignored because enough free memory is still - available. Default = 100 (disabled) + - `ro.lmk.downgrade_pressure`: min mem_pressure at which vmpressure event will + be ignored because enough free memory is still + available. Default = 100 (disabled) - ro.lmk.kill_heaviest_task: kill heaviest eligible task (best decision) vs. - any eligible task (fast decision). Default = false + - `ro.lmk.kill_heaviest_task`: kill heaviest eligible task (best decision) vs. + any eligible task (fast decision). Default = false - ro.lmk.kill_timeout_ms: duration in ms after a kill when no additional - kill will be done. Default = 0 (disabled) + - `ro.lmk.kill_timeout_ms`: duration in ms after a kill when no additional + kill will be done. Default = 0 (disabled) - ro.lmk.debug: enable lmkd debug logs, Default = false + - `ro.lmk.debug`: enable lmkd debug logs, Default = false - ro.lmk.swap_free_low_percentage: level of free swap as a percentage of the - total swap space used as a threshold to consider - the system as swap space starved. Default for - low-RAM devices = 10, for high-end devices = 20 + - `ro.lmk.swap_free_low_percentage`: level of free swap as a percentage of the + total swap space used as a threshold to consider + the system as swap space starved. Default for + low-RAM devices = 10, for high-end devices = 20 - ro.lmk.thrashing_limit: number of workingset refaults as a percentage of - the file-backed pagecache size used as a threshold - to consider system thrashing its pagecache. - Default for low-RAM devices = 30, for high-end - devices = 100 + - `ro.lmk.thrashing_limit`: number of workingset refaults as a percentage of + the file-backed pagecache size used as a threshold + to consider system thrashing its pagecache. + Default for low-RAM devices = 30, for high-end + devices = 100 - ro.lmk.thrashing_limit_decay: thrashing threshold decay expressed as a - percentage of the original threshold used to lower - the threshold when system does not recover even - after a kill. Default for low-RAM devices = 50, - for high-end devices = 10 + - `ro.lmk.thrashing_limit_decay`: thrashing threshold decay expressed as a + percentage of the original threshold used to lower + the threshold when system does not recover even + after a kill. Default for low-RAM devices = 50, + for high-end devices = 10 - ro.lmk.psi_partial_stall_ms: partial PSI stall threshold in milliseconds for - triggering low memory notification. Default for - low-RAM devices = 200, for high-end devices = 70 + - `ro.lmk.psi_partial_stall_ms`: partial PSI stall threshold in milliseconds for + triggering low memory notification. Default for + low-RAM devices = 200, for high-end devices = 70 - ro.lmk.psi_complete_stall_ms: complete PSI stall threshold in milliseconds for - triggering critical memory notification. Default = - 700 + - `ro.lmk.psi_complete_stall_ms`: complete PSI stall threshold in milliseconds for + triggering critical memory notification. Default = + 700 lmkd will set the following Android properties according to current system configurations: - sys.lmk.minfree_levels: minfree:oom_adj_score pairs, delimited by comma + - `sys.lmk.minfree_levels`: minfree:oom_adj_score pairs, delimited by comma - sys.lmk.reportkills: whether or not it supports reporting process kills - to clients. Test app should check this property - before testing low memory kill notification. - Default will be unset. + - `sys.lmk.reportkills`: whether or not it supports reporting process kills + to clients. Test app should check this property + before testing low memory kill notification. + Default will be unset. diff --git a/include/lmkd_hooks.h b/include/lmkd_hooks.h new file mode 100644 index 0000000..259a3fd --- /dev/null +++ b/include/lmkd_hooks.h @@ -0,0 +1,67 @@ +/* + * Copyright 2022 Google, Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file defines no-op hook functions for LMKD. To override these + * definitions, enable the use_lmkd_hooks product variable and create a library + * "liblmkdhooks" that supplies definitions for the hook functions in your + * vendor folder. + */ + +#ifndef _LMKD_HOOKS_H_ +#define _LMKD_HOOKS_H_ + +#include <sys/types.h> + +__BEGIN_DECLS + +#ifdef LMKD_USE_HOOKS + +/* + * Initialize all necessary Android props and perform any necessary validation + * on the values. Called before lmkd_init_hook() and will be called again + * whenever LMKD receives the LMK_UPDATE_PROPS command. Returns true on success, + * false otherwise. + */ +bool lmkd_update_props_hook(); +/* + * Perform any necessary initialization for the hooks. Called only once at the + * end of LMKD's init(). Returns true on success, false otherwise. + */ +bool lmkd_init_hook(); +/* + * Allows for interception of a kill by LMKD. This hook may attempt to free + * memory elsewhere to avoid the specified process being killed. Returns 0 to + * proceed with the kill, or the number of memory pages freed elsewhere to skip + * the kill. + */ +int lmkd_free_memory_before_kill_hook(struct proc* procp, int proc_size_pages, + int proc_oom_score, int kill_reason); + +#else /* LMKD_USE_HOOKS */ + +static inline bool lmkd_update_props_hook() { return true; } +static inline bool lmkd_init_hook() { return true; } +static inline int lmkd_free_memory_before_kill_hook(struct proc*, int, int, + int) { + return 0; +} + +#endif /* LMKD_USE_HOOKS */ + +__END_DECLS + +#endif @@ -42,6 +42,7 @@ #include <cutils/sockets.h> #include <liblmkd_utils.h> #include <lmkd.h> +#include <lmkd_hooks.h> #include <log/log.h> #include <log/log_event_list.h> #include <log/log_time.h> @@ -65,19 +66,17 @@ #define ATRACE_TAG ATRACE_TAG_ALWAYS #include <cutils/trace.h> -static inline void trace_kill_start(int pid, const char *desc) { - ATRACE_INT("kill_one_process", pid); +static inline void trace_kill_start(const char *desc) { ATRACE_BEGIN(desc); } static inline void trace_kill_end() { ATRACE_END(); - ATRACE_INT("kill_one_process", 0); } #else /* LMKD_TRACE_KILLS */ -static inline void trace_kill_start(int, const char *) {} +static inline void trace_kill_start(const char *) {} static inline void trace_kill_end() {} #endif /* LMKD_TRACE_KILLS */ @@ -451,6 +450,7 @@ union meminfo { /* fields below are calculated rather than read from the file */ int64_t nr_file_pages; int64_t total_gpu_kb; + int64_t easy_available; } field; int64_t arr[MI_FIELD_COUNT]; }; @@ -468,7 +468,7 @@ enum vmstat_field { VS_FIELD_COUNT }; -static const char* const vmstat_field_names[MI_FIELD_COUNT] = { +static const char* const vmstat_field_names[VS_FIELD_COUNT] = { "nr_free_pages", "nr_inactive_file", "nr_active_file", @@ -511,6 +511,7 @@ struct proc { uid_t uid; int oomadj; pid_t reg_pid; /* PID of the process that registered this record */ + bool valid; struct proc *pidhash_next; }; @@ -547,7 +548,7 @@ static uint32_t killcnt_total = 0; /* PAGE_SIZE / 1024 */ static long page_k; -static void update_props(); +static bool update_props(); static bool init_monitors(); static void destroy_monitors(); @@ -942,6 +943,7 @@ static void proc_insert(struct proc *procp) { proc_slot(procp); } +// Can be called only from the main thread. static int pid_remove(int pid) { int hval = pid_hashfn(pid); struct proc *procp; @@ -971,6 +973,15 @@ static int pid_remove(int pid) { return 0; } +static void pid_invalidate(int pid) { + std::shared_lock lock(adjslot_list_lock); + struct proc *procp = pid_lookup(pid); + + if (procp) { + procp->valid = false; + } +} + /* * Write a string to a file. * Returns false if the file does not exist. @@ -1221,6 +1232,7 @@ static void cmd_procprio(LMKD_CTRL_PACKET packet, int field_count, struct ucred procp->uid = params.uid; procp->reg_pid = cred->pid; procp->oomadj = params.oomadj; + procp->valid = true; proc_insert(procp); } else { if (!claim_record(procp, cred->pid)) { @@ -1512,14 +1524,19 @@ static void ctrl_command_handler(int dsock_idx) { case LMK_UPDATE_PROPS: if (nargs != 0) goto wronglen; - update_props(); - if (!use_inkernel_interface) { - /* Reinitialize monitors to apply new settings */ - destroy_monitors(); - result = init_monitors() ? 0 : -1; - } else { - result = 0; + result = -1; + if (update_props()) { + if (!use_inkernel_interface) { + /* Reinitialize monitors to apply new settings */ + destroy_monitors(); + if (init_monitors()) { + result = 0; + } + } else { + result = 0; + } } + len = lmkd_pack_set_update_props_repl(packet, result); if (ctrl_data_write(dsock_idx, (char *)packet, len) != len) { ALOGE("Failed to report operation results"); @@ -1822,7 +1839,7 @@ static bool meminfo_parse_line(char *line, union meminfo *mi) { static int64_t read_gpu_total_kb() { static int fd = android::bpf::bpfFdGet( - "/sys/fs/bpf/map_gpu_mem_gpu_mem_total_map", BPF_F_RDONLY); + "/sys/fs/bpf/map_gpuMem_gpu_mem_total_map", BPF_F_RDONLY); static constexpr uint64_t kBpfKeyGpuTotalUsage = 0; uint64_t value; @@ -1860,10 +1877,19 @@ static int meminfo_parse(union meminfo *mi) { mi->field.nr_file_pages = mi->field.cached + mi->field.swap_cached + mi->field.buffers; mi->field.total_gpu_kb = read_gpu_total_kb(); + mi->field.easy_available = mi->field.nr_free_pages + mi->field.inactive_file; return 0; } +// In the case of ZRAM, mi->field.free_swap can't be used directly because swap space is taken +// from the free memory or reclaimed. Use the lowest of free_swap and easily available memory to +// measure free swap because they represent how much swap space the system will consider to use +// and how much it can actually use. +static inline int64_t get_free_swap(union meminfo *mi) { + return std::min(mi->field.free_swap, mi->field.easy_available); +} + /* /proc/vmstat parsing routines */ static bool vmstat_parse_line(char *line, union vmstat *vs) { char *cp; @@ -2088,7 +2114,7 @@ static struct proc *proc_adj_prev(int oomadj, int pid) { return NULL; } -// When called from a non-main thread, adjslot_list_lock read lock should be taken. +// Can be called only from the main thread. static struct proc *proc_get_heaviest(int oomadj) { struct adjslot_list *head = &procadjslot_list[ADJTOSLOT(oomadj)]; struct adjslot_list *curr = head->next; @@ -2150,9 +2176,11 @@ static void watchdog_callback() { continue; } - if (reaper.kill({ target.pidfd, target.pid, target.uid }, true) == 0) { + if (target.valid && reaper.kill({ target.pidfd, target.pid, target.uid }, true) == 0) { ALOGW("lmkd watchdog killed process %d, oom_score_adj %d", target.pid, oom_score); killinfo_log(&target, 0, 0, 0, NULL, NULL, NULL, NULL, NULL); + // Can't call pid_remove() from non-main thread, therefore just invalidate the record + pid_invalidate(target.pid); break; } prev_pid = target.pid; @@ -2290,7 +2318,7 @@ static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_i char buf[PAGE_SIZE]; char desc[LINE_MAX]; - if (!read_proc_status(pid, buf, sizeof(buf))) { + if (!procp->valid || !read_proc_status(pid, buf, sizeof(buf))) { goto out; } if (!parse_status_tag(buf, PROC_STATUS_TGID_FIELD, &tgid)) { @@ -2320,7 +2348,18 @@ static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_i snprintf(desc, sizeof(desc), "lmk,%d,%d,%d,%d,%d", pid, ki ? (int)ki->kill_reason : -1, procp->oomadj, min_oom_score, ki ? ki->max_thrashing : -1); - trace_kill_start(pid, desc); + result = lmkd_free_memory_before_kill_hook(procp, rss_kb / page_k, procp->oomadj, + ki ? (int)ki->kill_reason : -1); + if (result > 0) { + /* + * Memory was freed elsewhere; no need to kill. Note: intentionally do not + * pid_remove(pid) since it was not killed. + */ + ALOGI("Skipping kill; %ld kB freed elsewhere.", result * page_k); + return result; + } + + trace_kill_start(desc); start_wait_for_proc_kill(pidfd < 0 ? pid : pidfd); kill_result = reaper.kill({ pidfd, pid, uid }, false); @@ -2359,7 +2398,7 @@ static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_i kill_st.oom_score = procp->oomadj; kill_st.min_oom_score = min_oom_score; kill_st.free_mem_kb = mi->field.nr_free_pages * page_k; - kill_st.free_swap_kb = mi->field.free_swap * page_k; + kill_st.free_swap_kb = get_free_swap(mi) * page_k; stats_write_lmk_kill_occurred(&kill_st, mem_st); ctrl_data_write_lmk_kill_occurred((pid_t)pid, uid); @@ -2535,7 +2574,7 @@ void calc_zone_watermarks(struct zoneinfo *zi, struct zone_watermarks *watermark } static int calc_swap_utilization(union meminfo *mi) { - int64_t swap_used = mi->field.total_swap - mi->field.free_swap; + int64_t swap_used = mi->field.total_swap - get_free_swap(mi); int64_t total_swappable = mi->field.active_anon + mi->field.inactive_anon + mi->field.shmem + swap_used; return total_swappable > 0 ? (swap_used * 100) / total_swappable : 0; @@ -2628,17 +2667,17 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ /* Check free swap levels */ if (swap_free_low_percentage) { swap_low_threshold = mi.field.total_swap * swap_free_low_percentage / 100; - swap_is_low = mi.field.free_swap < swap_low_threshold; + swap_is_low = get_free_swap(&mi) < swap_low_threshold; } else { swap_low_threshold = 0; } /* Identify reclaim state */ - if (vs.field.pgscan_direct > init_pgscan_direct) { + if (vs.field.pgscan_direct != init_pgscan_direct) { init_pgscan_direct = vs.field.pgscan_direct; init_pgscan_kswapd = vs.field.pgscan_kswapd; reclaim = DIRECT_RECLAIM; - } else if (vs.field.pgscan_kswapd > init_pgscan_kswapd) { + } else if (vs.field.pgscan_kswapd != init_pgscan_kswapd) { init_pgscan_kswapd = vs.field.pgscan_kswapd; reclaim = KSWAPD_RECLAIM; } else if (workingset_refault_file == prev_workingset_refault) { @@ -2741,7 +2780,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ kill_reason = LOW_SWAP_AND_THRASHING; snprintf(kill_desc, sizeof(kill_desc), "device is low on swap (%" PRId64 "kB < %" PRId64 "kB) and thrashing (%" PRId64 "%%)", - mi.field.free_swap * page_k, swap_low_threshold * page_k, thrashing); + get_free_swap(&mi) * page_k, swap_low_threshold * page_k, thrashing); /* Do not kill perceptible apps unless below min watermark or heavily thrashing */ if (wmark > WMARK_MIN && thrashing < thrashing_critical_pct) { min_score_adj = PERCEPTIBLE_APP_ADJ + 1; @@ -2752,7 +2791,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ kill_reason = LOW_MEM_AND_SWAP; snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and swap is low (%" PRId64 "kB < %" PRId64 "kB)", wmark < WMARK_LOW ? "min" : "low", - mi.field.free_swap * page_k, swap_low_threshold * page_k); + get_free_swap(&mi) * page_k, swap_low_threshold * page_k); /* Do not kill perceptible apps unless below min watermark or heavily thrashing */ if (wmark > WMARK_MIN && thrashing < thrashing_critical_pct) { min_score_adj = PERCEPTIBLE_APP_ADJ + 1; @@ -3031,7 +3070,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po // If we still have enough swap space available, check if we want to // ignore/downgrade pressure events. - if (mi.field.free_swap >= + if (get_free_swap(&mi) >= mi.field.total_swap * swap_free_low_percentage / 100) { // If the pressure is larger than downgrade_pressure lmk will not // kill any process, since enough memory is available. @@ -3052,7 +3091,8 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po do_kill: if (low_ram_device) { /* For Go devices kill only one task */ - if (find_and_kill_process(level_oomadj[level], NULL, &mi, &wi, &curr_tm, NULL) == 0) { + if (find_and_kill_process(use_minfree_levels ? min_score_adj : level_oomadj[level], + NULL, &mi, &wi, &curr_tm, NULL) == 0) { if (debug_process_killing) { ALOGI("Nothing to kill"); } @@ -3239,7 +3279,7 @@ static bool init_mp_common(enum vmpressure_level level) { goto err_open_mpfd; } - evctlfd = open(GetCgroupAttributePath("CgroupEventControl").c_str(), O_WRONLY | O_CLOEXEC); + evctlfd = open(GetCgroupAttributePath("MemCgroupEventControl").c_str(), O_WRONLY | O_CLOEXEC); if (evctlfd < 0) { ALOGI("No kernel memory cgroup event control (errno=%d)", errno); goto err_open_evctlfd; @@ -3499,6 +3539,11 @@ static int init(void) { } ALOGI("Process polling is %s", pidfd_supported ? "supported" : "not supported" ); + if (!lmkd_init_hook()) { + ALOGE("Failed to initialize LMKD hooks."); + return -1; + } + return 0; } @@ -3543,7 +3588,8 @@ static void call_handler(struct event_handler_info* handler_info, resume_polling(poll_params, curr_tm); break; case POLLING_DO_NOT_CHANGE: - if (get_time_diff_ms(&poll_params->poll_start_tm, &curr_tm) > PSI_WINDOW_SIZE_MS) { + if (poll_params->poll_handler && + get_time_diff_ms(&poll_params->poll_start_tm, &curr_tm) > PSI_WINDOW_SIZE_MS) { /* Polled for the duration of PSI window, time to stop */ poll_params->poll_handler = NULL; } @@ -3686,7 +3732,7 @@ int issue_reinit() { return res == UPDATE_PROPS_SUCCESS ? 0 : -1; } -static void update_props() { +static bool update_props() { /* By default disable low level vmpressure events */ level_oomadj[VMPRESS_LEVEL_LOW] = GET_LMK_PROPERTY(int32, "low", OOM_SCORE_ADJ_MAX + 1); @@ -3730,6 +3776,14 @@ static void update_props() { stall_limit_critical = GET_LMK_PROPERTY(int64, "stall_limit_critical", 100); reaper.enable_debug(debug_process_killing); + + /* Call the update props hook */ + if (!lmkd_update_props_hook()) { + ALOGE("Failed to update LMKD hook props."); + return false; + } + + return true; } int main(int argc, char **argv) { @@ -3740,7 +3794,10 @@ int main(int argc, char **argv) { return issue_reinit(); } - update_props(); + if (!update_props()) { + ALOGE("Failed to initialize props, exiting."); + return -1; + } ctx = create_android_logger(KILLINFO_LOG_TAG); @@ -53,6 +53,41 @@ static inline long get_time_diff_ms(struct timespec *from, (to->tv_nsec - from->tv_nsec) / (long)NS_PER_MS; } +static void set_process_group_and_prio(uid_t uid, int pid, const std::vector<std::string>& profiles, + int prio) { + DIR* d; + char proc_path[PATH_MAX]; + struct dirent* de; + + if (!SetProcessProfilesCached(uid, pid, profiles)) { + ALOGW("Failed to set task profiles for the process (%d) being killed", pid); + } + + snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid); + if (!(d = opendir(proc_path))) { + ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno, + pid); + return; + } + + while ((de = readdir(d))) { + int t_pid; + + if (de->d_name[0] == '.') continue; + t_pid = atoi(de->d_name); + + if (!t_pid) { + ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid); + continue; + } + + if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) { + ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno); + } + } + closedir(d); +} + static void* reaper_main(void* param) { Reaper *reaper = static_cast<Reaper*>(param); struct timespec start_tm, end_tm; @@ -64,6 +99,10 @@ static void* reaper_main(void* param) { ALOGE("Failed to assign cpuset to the reaper thread"); } + if (setpriority(PRIO_PROCESS, tid, ANDROID_PRIORITY_HIGHEST)) { + ALOGW("Unable to raise priority of the reaper thread (%d): errno=%d", tid, errno); + } + for (;;) { target = reaper->dequeue_request(); @@ -76,8 +115,13 @@ static void* reaper_main(void* param) { reaper->notify_kill_failure(target.pid); goto done; } + + set_process_group_and_prio(target.uid, target.pid, + {"CPUSET_SP_FOREGROUND", "SCHED_SP_FOREGROUND"}, + ANDROID_PRIORITY_NORMAL); + if (process_mrelease(target.pidfd, 0)) { - ALOGE("process_mrelease %d failed: %s", target.pidfd, strerror(errno)); + ALOGE("process_mrelease %d failed: %s", target.pid, strerror(errno)); goto done; } if (reaper->debug_enabled()) { @@ -85,6 +129,7 @@ static void* reaper_main(void* param) { ALOGI("Process %d was reaped in %ldms", target.pid, get_time_diff_ms(&start_tm, &end_tm)); } + done: close(target.pidfd); reaper->request_complete(); @@ -112,6 +157,9 @@ bool Reaper::is_reaping_supported() { bool Reaper::init(int comm_fd) { char name[16]; + struct sched_param param = { + .sched_priority = 0, + }; if (thread_cnt_ > 0) { // init should not be called multiple times @@ -124,6 +172,10 @@ bool Reaper::init(int comm_fd) { ALOGE("pthread_create failed: %s", strerror(errno)); continue; } + // set normal scheduling policy for the reaper thread + if (pthread_setschedparam(thread_pool_[thread_cnt_], SCHED_OTHER, ¶m)) { + ALOGW("set SCHED_FIFO failed %s", strerror(errno)); + } snprintf(name, sizeof(name), "lmkd_reaper%d", thread_cnt_); if (pthread_setname_np(thread_pool_[thread_cnt_], name)) { ALOGW("pthread_setname_np failed: %s", strerror(errno)); @@ -141,41 +193,6 @@ bool Reaper::init(int comm_fd) { return true; } -static void set_process_group_and_prio(uid_t uid, int pid, const std::vector<std::string>& profiles, - int prio) { - DIR* d; - char proc_path[PATH_MAX]; - struct dirent* de; - - if (!SetProcessProfilesCached(uid, pid, profiles)) { - ALOGW("Failed to set task profiles for the process (%d) being killed", pid); - } - - snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid); - if (!(d = opendir(proc_path))) { - ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno, - pid); - return; - } - - while ((de = readdir(d))) { - int t_pid; - - if (de->d_name[0] == '.') continue; - t_pid = atoi(de->d_name); - - if (!t_pid) { - ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid); - continue; - } - - if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) { - ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno); - } - } - closedir(d); -} - bool Reaper::async_kill(const struct target_proc& target) { if (target.pidfd == -1) { return false; @@ -199,10 +216,6 @@ bool Reaper::async_kill(const struct target_proc& target) { cond_.notify_one(); mutex_.unlock(); - set_process_group_and_prio(target.uid, target.pid, - {"CPUSET_SP_FOREGROUND", "SCHED_SP_FOREGROUND"}, - ANDROID_PRIORITY_HIGHEST); - return true; } @@ -222,7 +235,7 @@ int Reaper::kill(const struct target_proc& target, bool synchronous) { return result; } - return is_reaping_supported() ? process_mrelease(target.pidfd, 0) : 0; + return 0; } Reaper::target_proc Reaper::dequeue_request() { diff --git a/tests/TEST_MAPPING b/tests/TEST_MAPPING index 7c2533b..bcfc9b8 100644 --- a/tests/TEST_MAPPING +++ b/tests/TEST_MAPPING @@ -1,5 +1,5 @@ { - "presubmit-large": [ + "presubmit": [ { "name": "lmkd_tests" } diff --git a/tests/lmkd_tests.cpp b/tests/lmkd_tests.cpp index 0676d85..5cf7d0b 100644 --- a/tests/lmkd_tests.cpp +++ b/tests/lmkd_tests.cpp @@ -38,10 +38,13 @@ using namespace android::base; #define LMKD_LOGCAT_MARKER "lowmemorykiller" #define LMKD_KILL_TEMPLATE "Kill \'[^']*\' \\\(%d\\)" #define LMKD_REAP_TEMPLATE "Process %d was reaped" +#define LMKD_REAP_FAIL_TEMPLATE "process_mrelease %d failed" #define LMKD_KILL_LINE_START LMKD_LOGCAT_MARKER ": Kill" #define LMKD_REAP_LINE_START LMKD_LOGCAT_MARKER ": Process" #define LMKD_REAP_TIME_TEMPLATE LMKD_LOGCAT_MARKER ": Process %d was reaped in %ldms" +#define LMKD_REAP_MRELESE_ERR_MARKER ": process_mrelease" +#define LMKD_REAP_NO_PROCESS_TEMPLATE ": process_mrelease %d failed: No such process" #define ONE_MB (1 << 20) @@ -161,6 +164,12 @@ class LmkdTest : public ::testing::Test { reap_pid == pid; } + static bool ParseReapNoProcess(const std::string& line, pid_t pid) { + int reap_pid; + return sscanf(line.c_str(), LMKD_REAP_NO_PROCESS_TEMPLATE, &reap_pid) == 1 && + reap_pid == pid; + } + private: int sock; uid_t uid; @@ -187,8 +196,9 @@ TEST_F(LmkdTest, TargetReaping) { FAIL() << "Target process " << pid << " was not killed"; } - std::string regex = - StringPrintf("((" LMKD_KILL_TEMPLATE ")|(" LMKD_REAP_TEMPLATE "))", pid, pid); + std::string regex = StringPrintf("((" LMKD_KILL_TEMPLATE ")|(" LMKD_REAP_TEMPLATE + ")|(" LMKD_REAP_FAIL_TEMPLATE "))", + pid, pid, pid); std::string logcat_out = ReadLogcat(LMKD_LOGCAT_MARKER ":I", regex); // find kill report @@ -202,7 +212,18 @@ TEST_F(LmkdTest, TargetReaping) { // find reap duration report line_start = logcat_out.find(LMKD_REAP_LINE_START, line_end); - ASSERT_TRUE(line_start != std::string::npos) << "Reaping time report is not found"; + if (line_start == std::string::npos) { + // Target might have exited before reaping started + line_start = logcat_out.find(LMKD_REAP_MRELESE_ERR_MARKER, line_end); + + ASSERT_TRUE(line_start != std::string::npos) << "Reaping time report is not found"; + + line_end = logcat_out.find('\n', line_start); + line = logcat_out.substr(line_start, line_end == std::string::npos ? std::string::npos + : line_end - line_start); + ASSERT_TRUE(ParseReapNoProcess(line, pid)) << "Failed to reap the target " << pid; + return; + } line_end = logcat_out.find('\n', line_start); line = logcat_out.substr( line_start, line_end == std::string::npos ? std::string::npos : line_end - line_start); |