From d034701c6f09fb30bcaefdd750b60eed70752d7e Mon Sep 17 00:00:00 2001 From: Nrithya Kanakasabapathy Date: Thu, 18 Mar 2021 22:58:30 +0000 Subject: Merge branch 'whitechapel' into android-gs-pixel-5.10 * whitechapel: edgetpu: shorten KCI timeout for testing edgetpu: replace WDT modify with mode settings edgetpu: add active and dormant mode to SW watchdog edgetpu: clear mappings after sending leave group edgetpu: make wakelock event warning more descriptive Signed-off-by: Nrithya Kanakasabapathy Change-Id: Ia3a8aea5b578a528164d7a9d6175a9f43e6fd7d9 --- drivers/edgetpu/edgetpu-device-group.c | 40 +++++-------- drivers/edgetpu/edgetpu-firmware.c | 3 +- drivers/edgetpu/edgetpu-kci.c | 5 +- drivers/edgetpu/edgetpu-sw-watchdog.c | 102 ++++++++++++++++++++++----------- drivers/edgetpu/edgetpu-sw-watchdog.h | 41 ++++++++++--- drivers/edgetpu/edgetpu-wakelock.c | 47 ++++++++------- drivers/edgetpu/edgetpu-wakelock.h | 18 ++++-- 7 files changed, 161 insertions(+), 95 deletions(-) (limited to 'drivers') diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c index 7510839..273feba 100644 --- a/drivers/edgetpu/edgetpu-device-group.c +++ b/drivers/edgetpu/edgetpu-device-group.c @@ -411,10 +411,21 @@ void edgetpu_group_notify(struct edgetpu_device_group *group, uint event_id) */ static void edgetpu_device_group_release(struct edgetpu_device_group *group) { - edgetpu_mappings_clear_group(group); + int i; + struct edgetpu_dev *etdev; + edgetpu_group_clear_events(group); if (edgetpu_device_group_is_finalized(group)) { + for (i = 0; i < group->n_clients; i++) { + etdev = edgetpu_device_group_nth_etdev(group, i); + edgetpu_sw_wdt_dec_active_ref(etdev); + } edgetpu_device_group_kci_leave(group); + /* + * Mappings clear should be performed after had a handshake with + * the firmware. + */ + edgetpu_mappings_clear_group(group); edgetpu_usr_release_group(group); edgetpu_group_remove_remote_dram(group); #ifdef EDGETPU_HAS_P2P_MAILBOX @@ -560,22 +571,6 @@ void edgetpu_device_group_leave_locked(struct edgetpu_client *client) kfree(cur); edgetpu_client_put(client); group->n_clients--; - } else { - /* - * Don't modify wdt heartbeat if state is not GOOD. - * Safe to access etdev->state without state_lock as - * racing state change may again restart wdt. - */ - if (will_disband && - cur->client->etdev->state == ETDEV_STATE_GOOD) { - /* - * set time interval for sw wdt to DORMANT - * state of all other clients in group. - */ - edgetpu_sw_wdt_modify_heartbeat( - cur->client->etdev, - EDGETPU_DORMANT_DEV_BEAT_MS); - } } } edgetpu_device_group_put(client->group); @@ -593,14 +588,6 @@ void edgetpu_device_group_leave_locked(struct edgetpu_client *client) break; } } - /* - * if etdev is not in any group and state is still good, set time - * interval of wdt to DORMANT state. - */ - if (!edgetpu_in_any_group_locked(client->etdev) && - client->etdev->state == ETDEV_STATE_GOOD) - edgetpu_sw_wdt_modify_heartbeat(client->etdev, - EDGETPU_DORMANT_DEV_BEAT_MS); mutex_unlock(&client->etdev->groups_lock); } @@ -819,8 +806,7 @@ int edgetpu_device_group_finalize(struct edgetpu_device_group *group) for (i = 0; i < group->n_clients; i++) { etdev = edgetpu_device_group_nth_etdev(group, i); - edgetpu_sw_wdt_modify_heartbeat(etdev, - EDGETPU_ACTIVE_DEV_BEAT_MS); + edgetpu_sw_wdt_inc_active_ref(etdev); } mutex_unlock(&group->lock); return 0; diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c index ef2e80f..a4ac9a5 100644 --- a/drivers/edgetpu/edgetpu-firmware.c +++ b/drivers/edgetpu/edgetpu-firmware.c @@ -737,7 +737,8 @@ int edgetpu_firmware_create(struct edgetpu_dev *etdev, } etdev->firmware = et_fw; - ret = edgetpu_sw_wdt_create(etdev, EDGETPU_DORMANT_DEV_BEAT_MS); + ret = edgetpu_sw_wdt_create(etdev, EDGETPU_ACTIVE_DEV_BEAT_MS, + EDGETPU_DORMANT_DEV_BEAT_MS); if (ret) etdev_err(etdev, "Failed to create sw wdt instance\n"); else diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c index 0f9ca8d..e56eb9a 100644 --- a/drivers/edgetpu/edgetpu-kci.c +++ b/drivers/edgetpu/edgetpu-kci.c @@ -29,9 +29,12 @@ #define QUEUE_SIZE MAX_QUEUE_SIZE /* Timeout for KCI responses from the firmware (milliseconds) */ -#ifdef CONFIG_EDGETPU_FPGA +#if IS_ENABLED(CONFIG_EDGETPU_FPGA) /* Set extra ludicrously high to 60 seconds for (slow) Palladium emulation. */ #define KCI_TIMEOUT (60000) +#elif IS_ENABLED(CONFIG_EDGETPU_TEST) +/* fake-firmware could respond in a short time */ +#define KCI_TIMEOUT (200) #else /* 5 secs. */ #define KCI_TIMEOUT (5000) diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.c b/drivers/edgetpu/edgetpu-sw-watchdog.c index 703a5e3..5d96e4d 100644 --- a/drivers/edgetpu/edgetpu-sw-watchdog.c +++ b/drivers/edgetpu/edgetpu-sw-watchdog.c @@ -4,6 +4,8 @@ * * Copyright (C) 2020 Google, Inc. */ + +#include #include #include #include @@ -25,6 +27,36 @@ static void sw_wdt_handler_work(struct work_struct *work) et_action_work->edgetpu_sw_wdt_handler(et_action_work->data); } +static void sw_wdt_start(struct edgetpu_sw_wdt *wdt) +{ + if (wdt->is_wdt_disabled) { + etdev_dbg(wdt->etdev, "sw wdt disabled by module param"); + return; + } + etdev_dbg(wdt->etdev, "sw wdt: started\n"); + schedule_delayed_work(&wdt->dwork, wdt->hrtbeat_jiffs); +} + +static void sw_wdt_stop(struct edgetpu_sw_wdt *wdt) +{ + etdev_dbg(wdt->etdev, "sw wdt: stopped\n"); + cancel_delayed_work_sync(&wdt->dwork); +} + +static void sw_wdt_modify_rate(struct edgetpu_sw_wdt *wdt, unsigned long rate) +{ + if (rate == wdt->hrtbeat_jiffs) + return; + wdt->hrtbeat_jiffs = rate; + /* + * Don't restart the work if we already encountered a firmware timeout. + */ + if (work_pending(&wdt->et_action_work.work)) + return; + sw_wdt_stop(wdt); + sw_wdt_start(wdt); +} + void edgetpu_watchdog_bite(struct edgetpu_dev *etdev, bool reset) { if (!etdev->etdev_sw_wdt) @@ -43,9 +75,8 @@ void edgetpu_watchdog_bite(struct edgetpu_dev *etdev, bool reset) } /* - * Ping the f/w for a response. Reschedule the work for next beat - * in case of response or schedule a worker for action callback in case of - * TIMEOUT. + * Ping the f/w for a response. Reschedule the work for next beat in case of f/w + * is responded, or schedule a worker for action callback in case of TIMEOUT. */ static void sw_wdt_work(struct work_struct *work) { @@ -69,7 +100,8 @@ static void sw_wdt_work(struct work_struct *work) } } -int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long hrtbeat_ms) +int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long active_ms, + unsigned long dormant_ms) { struct edgetpu_sw_wdt *etdev_sw_wdt; @@ -78,7 +110,11 @@ int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long hrtbeat_ms) return -ENOMEM; etdev_sw_wdt->etdev = etdev; - etdev_sw_wdt->hrtbeat_jiffs = msecs_to_jiffies(hrtbeat_ms); + etdev_sw_wdt->hrtbeat_active = msecs_to_jiffies(active_ms); + etdev_sw_wdt->hrtbeat_dormant = msecs_to_jiffies(dormant_ms); + atomic_set(&etdev_sw_wdt->active_counter, 0); + /* init to dormant rate */ + etdev_sw_wdt->hrtbeat_jiffs = etdev_sw_wdt->hrtbeat_dormant; INIT_DELAYED_WORK(&etdev_sw_wdt->dwork, sw_wdt_work); INIT_WORK(&etdev_sw_wdt->et_action_work.work, sw_wdt_handler_work); etdev_sw_wdt->is_wdt_disabled = wdt_disable; @@ -94,13 +130,7 @@ int edgetpu_sw_wdt_start(struct edgetpu_dev *etdev) return -EINVAL; if (!etdev_sw_wdt->et_action_work.edgetpu_sw_wdt_handler) etdev_err(etdev, "sw wdt handler not set\n"); - if (etdev_sw_wdt->is_wdt_disabled) { - etdev_dbg(etdev, "sw wdt disabled by module param"); - return 0; - } - etdev_dbg(etdev, "sw wdt: started\n"); - schedule_delayed_work(&etdev_sw_wdt->dwork, - etdev_sw_wdt->hrtbeat_jiffs); + sw_wdt_start(etdev_sw_wdt); return 0; } @@ -108,17 +138,24 @@ void edgetpu_sw_wdt_stop(struct edgetpu_dev *etdev) { if (!etdev->etdev_sw_wdt) return; - etdev_dbg(etdev, "sw wdt: stopped\n"); - cancel_delayed_work_sync(&etdev->etdev_sw_wdt->dwork); + sw_wdt_stop(etdev->etdev_sw_wdt); } void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev) { - /* cancel and sync work due to watchdog bite to prevent UAF */ - cancel_work_sync(&etdev->etdev_sw_wdt->et_action_work.work); - edgetpu_sw_wdt_stop(etdev); - kfree(etdev->etdev_sw_wdt); + struct edgetpu_sw_wdt *wdt = etdev->etdev_sw_wdt; + int counter; + + if (!wdt) + return; etdev->etdev_sw_wdt = NULL; + /* cancel and sync work due to watchdog bite to prevent UAF */ + cancel_work_sync(&wdt->et_action_work.work); + sw_wdt_stop(wdt); + counter = atomic_read(&wdt->active_counter); + if (counter) + etdev_warn(etdev, "Unbalanced WDT active counter: %d", counter); + kfree(wdt); } void edgetpu_sw_wdt_set_handler(struct edgetpu_dev *etdev, @@ -132,23 +169,22 @@ void edgetpu_sw_wdt_set_handler(struct edgetpu_dev *etdev, et_sw_wdt->et_action_work.data = data; } -void edgetpu_sw_wdt_modify_heartbeat(struct edgetpu_dev *etdev, - unsigned long hrtbeat_ms) +void edgetpu_sw_wdt_inc_active_ref(struct edgetpu_dev *etdev) { - struct edgetpu_sw_wdt *etdev_sw_wdt = etdev->etdev_sw_wdt; - unsigned long hrtbeat_jiffs = msecs_to_jiffies(hrtbeat_ms); + struct edgetpu_sw_wdt *wdt = etdev->etdev_sw_wdt; - if (!etdev_sw_wdt) + if (!wdt) return; - /* - * check if (et_action_work) is pending, since after watchdog bite - * there is no need to restart another work. - */ - if (work_pending(&etdev_sw_wdt->et_action_work.work)) + if (!atomic_fetch_inc(&wdt->active_counter)) + sw_wdt_modify_rate(wdt, wdt->hrtbeat_active); +} + +void edgetpu_sw_wdt_dec_active_ref(struct edgetpu_dev *etdev) +{ + struct edgetpu_sw_wdt *wdt = etdev->etdev_sw_wdt; + + if (!wdt) return; - if (hrtbeat_jiffs != etdev_sw_wdt->hrtbeat_jiffs) { - edgetpu_sw_wdt_stop(etdev); - etdev_sw_wdt->hrtbeat_jiffs = hrtbeat_jiffs; - edgetpu_sw_wdt_start(etdev); - } + if (atomic_fetch_dec(&wdt->active_counter) == 1) + sw_wdt_modify_rate(wdt, wdt->hrtbeat_dormant); } diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.h b/drivers/edgetpu/edgetpu-sw-watchdog.h index 7b214b2..bbc1757 100644 --- a/drivers/edgetpu/edgetpu-sw-watchdog.h +++ b/drivers/edgetpu/edgetpu-sw-watchdog.h @@ -7,6 +7,7 @@ #ifndef __EDGETPU_SW_WDT_H__ #define __EDGETPU_SW_WDT_H__ +#include #include #include "edgetpu-internal.h" @@ -26,15 +27,30 @@ struct edgetpu_sw_wdt { struct delayed_work dwork; /* edgetpu device this watchdog is monitoring. */ struct edgetpu_dev *etdev; - /* time period in jiffies in pinging device firmware. */ + /* Time period in jiffies in pinging device firmware. */ unsigned long hrtbeat_jiffs; - /* work information for watchdog bite. */ + /* Heartbeat rates passed in edgetpu_sw_wdt_create(), in jiffies. */ + unsigned long hrtbeat_active; + unsigned long hrtbeat_dormant; + /* Work information for watchdog bite. */ struct edgetpu_sw_wdt_action_work et_action_work; - /* flag to mark that watchdog is disabled. */ + /* Flag to mark that watchdog is disabled. */ bool is_wdt_disabled; + /* + * When this counter is greater than zero, use @hrtbeat_active heartbeat + * rate, otherwise @hrtbeat_dormant. Initial value is zero. + */ + atomic_t active_counter; }; -int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long hrtbeat_ms); +/* + * Creates and assigns a watchdog object to @etdev->etdev_sw_wdt. + * + * @active_ms and @dormant_ms are the time periods in pinging device firmware + * for active mode and dormant mode, respectively. + */ +int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long active_ms, + unsigned long dormant_ms); int edgetpu_sw_wdt_start(struct edgetpu_dev *etdev); void edgetpu_sw_wdt_stop(struct edgetpu_dev *etdev); void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev); @@ -44,9 +60,20 @@ void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev); */ void edgetpu_sw_wdt_set_handler(struct edgetpu_dev *etdev, void (*handler_cb)(void *), void *data); -/* Modify the time interval of heartbeat. It will also start the watchdog. */ -void edgetpu_sw_wdt_modify_heartbeat(struct edgetpu_dev *etdev, - unsigned long hrtbeat_ms); +/* + * Increases the @active_counter. + * + * If @active_counter was zero, watchdog will be restarted with the active + * heartbeat rate. + */ +void edgetpu_sw_wdt_inc_active_ref(struct edgetpu_dev *etdev); +/* + * Decreases the @active_counter. + * + * If @active_counter was one, watchdog will be restarted with the dormant + * heartbeat rate. + */ +void edgetpu_sw_wdt_dec_active_ref(struct edgetpu_dev *etdev); /* * Schedule sw watchdog action immediately. Called on fatal errors. diff --git a/drivers/edgetpu/edgetpu-wakelock.c b/drivers/edgetpu/edgetpu-wakelock.c index 61af4fa..f308d3a 100644 --- a/drivers/edgetpu/edgetpu-wakelock.c +++ b/drivers/edgetpu/edgetpu-wakelock.c @@ -15,21 +15,32 @@ #include "edgetpu-internal.h" #include "edgetpu-wakelock.h" +static const char *const event_name[] = { +#define X(name, _) #name + EDGETPU_WAKELOCK_EVENTS +#undef X +}; + /* - * Returns the first event with a non-zero counter. - * Returns EDGETPU_WAKELOCK_EVENT_END if all event counters are zero. + * Loops through the events and warns if any event has a non-zero counter. + * Returns true if at least one non-zero counter is found. * * Caller holds @wakelock->lock. */ -static enum edgetpu_wakelock_event -wakelock_non_zero_event(struct edgetpu_wakelock *wakelock) +static bool wakelock_warn_non_zero_event(struct edgetpu_wakelock *wakelock) { int i; - - for (i = 0; i < EDGETPU_WAKELOCK_EVENT_END; i++) - if (wakelock->event_count[i]) - return i; - return EDGETPU_WAKELOCK_EVENT_END; + bool ret = false; + + for (i = 0; i < EDGETPU_WAKELOCK_EVENT_END; i++) { + if (wakelock->event_count[i]) { + ret = true; + etdev_warn(wakelock->etdev, + "%s has non-zero counter=%d", event_name[i], + wakelock->event_count[i]); + } + } + return ret; } struct edgetpu_wakelock *edgetpu_wakelock_alloc(struct edgetpu_dev *etdev) @@ -143,17 +154,13 @@ int edgetpu_wakelock_release(struct edgetpu_wakelock *wakelock) return -EINVAL; } /* only need to check events when this is the last reference */ - if (wakelock->req_count == 1) { - enum edgetpu_wakelock_event evt = - wakelock_non_zero_event(wakelock); - - if (evt != EDGETPU_WAKELOCK_EVENT_END) { - etdev_warn( - wakelock->etdev, - "event %d is happening, refusing wakelock release", - evt); - return -EAGAIN; - } + if (wakelock->req_count == 1 && + wakelock_warn_non_zero_event(wakelock)) { + etdev_warn( + wakelock->etdev, + "detected non-zero events, refusing wakelock release"); + return -EAGAIN; } + return --wakelock->req_count; } diff --git a/drivers/edgetpu/edgetpu-wakelock.h b/drivers/edgetpu/edgetpu-wakelock.h index 713f58b..69e0fa7 100644 --- a/drivers/edgetpu/edgetpu-wakelock.h +++ b/drivers/edgetpu/edgetpu-wakelock.h @@ -25,14 +25,20 @@ * Events that could block the wakelock from being released. * Use inc_event() and dec_event() to increase and decrease the counters when * the event happens. + * + * Defined with X macros to support fetching event names from values. */ -enum edgetpu_wakelock_event { - EDGETPU_WAKELOCK_EVENT_FULL_CSR = 0, - EDGETPU_WAKELOCK_EVENT_MBOX_CSR = 1, - EDGETPU_WAKELOCK_EVENT_CMD_QUEUE = 2, - EDGETPU_WAKELOCK_EVENT_RESP_QUEUE = 3, +#define EDGETPU_WAKELOCK_EVENTS \ + X(EDGETPU_WAKELOCK_EVENT_FULL_CSR, 0), \ + X(EDGETPU_WAKELOCK_EVENT_MBOX_CSR, 1), \ + X(EDGETPU_WAKELOCK_EVENT_CMD_QUEUE, 2), \ + X(EDGETPU_WAKELOCK_EVENT_RESP_QUEUE, 3), \ + X(EDGETPU_WAKELOCK_EVENT_END, 4) - EDGETPU_WAKELOCK_EVENT_END = 4 +enum edgetpu_wakelock_event { +#define X(name, val) name = val + EDGETPU_WAKELOCK_EVENTS +#undef X }; struct edgetpu_wakelock { -- cgit v1.2.3