summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNrithya Kanakasabapathy <nrithya@google.com>2021-03-18 22:58:30 +0000
committerNrithya Kanakasabapathy <nrithya@google.com>2021-03-18 22:58:30 +0000
commitd034701c6f09fb30bcaefdd750b60eed70752d7e (patch)
tree607f78dbf8dbc3714dac9328b60c990e444a4cec
parent0749d4c2a3bc59e2f698e97528ad9447204e03de (diff)
downloadedgetpu-d034701c6f09fb30bcaefdd750b60eed70752d7e.tar.gz
Merge branch 'whitechapel' into android-gs-pixel-5.10
* whitechapel: edgetpu: shorten KCI timeout for testing edgetpu: replace WDT modify with mode settings edgetpu: add active and dormant mode to SW watchdog edgetpu: clear mappings after sending leave group edgetpu: make wakelock event warning more descriptive Signed-off-by: Nrithya Kanakasabapathy <nrithya@google.com> Change-Id: Ia3a8aea5b578a528164d7a9d6175a9f43e6fd7d9
-rw-r--r--drivers/edgetpu/edgetpu-device-group.c40
-rw-r--r--drivers/edgetpu/edgetpu-firmware.c3
-rw-r--r--drivers/edgetpu/edgetpu-kci.c5
-rw-r--r--drivers/edgetpu/edgetpu-sw-watchdog.c102
-rw-r--r--drivers/edgetpu/edgetpu-sw-watchdog.h41
-rw-r--r--drivers/edgetpu/edgetpu-wakelock.c47
-rw-r--r--drivers/edgetpu/edgetpu-wakelock.h18
7 files changed, 161 insertions, 95 deletions
diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c
index 7510839..273feba 100644
--- a/drivers/edgetpu/edgetpu-device-group.c
+++ b/drivers/edgetpu/edgetpu-device-group.c
@@ -411,10 +411,21 @@ void edgetpu_group_notify(struct edgetpu_device_group *group, uint event_id)
*/
static void edgetpu_device_group_release(struct edgetpu_device_group *group)
{
- edgetpu_mappings_clear_group(group);
+ int i;
+ struct edgetpu_dev *etdev;
+
edgetpu_group_clear_events(group);
if (edgetpu_device_group_is_finalized(group)) {
+ for (i = 0; i < group->n_clients; i++) {
+ etdev = edgetpu_device_group_nth_etdev(group, i);
+ edgetpu_sw_wdt_dec_active_ref(etdev);
+ }
edgetpu_device_group_kci_leave(group);
+ /*
+ * Mappings clear should be performed after had a handshake with
+ * the firmware.
+ */
+ edgetpu_mappings_clear_group(group);
edgetpu_usr_release_group(group);
edgetpu_group_remove_remote_dram(group);
#ifdef EDGETPU_HAS_P2P_MAILBOX
@@ -560,22 +571,6 @@ void edgetpu_device_group_leave_locked(struct edgetpu_client *client)
kfree(cur);
edgetpu_client_put(client);
group->n_clients--;
- } else {
- /*
- * Don't modify wdt heartbeat if state is not GOOD.
- * Safe to access etdev->state without state_lock as
- * racing state change may again restart wdt.
- */
- if (will_disband &&
- cur->client->etdev->state == ETDEV_STATE_GOOD) {
- /*
- * set time interval for sw wdt to DORMANT
- * state of all other clients in group.
- */
- edgetpu_sw_wdt_modify_heartbeat(
- cur->client->etdev,
- EDGETPU_DORMANT_DEV_BEAT_MS);
- }
}
}
edgetpu_device_group_put(client->group);
@@ -593,14 +588,6 @@ void edgetpu_device_group_leave_locked(struct edgetpu_client *client)
break;
}
}
- /*
- * if etdev is not in any group and state is still good, set time
- * interval of wdt to DORMANT state.
- */
- if (!edgetpu_in_any_group_locked(client->etdev) &&
- client->etdev->state == ETDEV_STATE_GOOD)
- edgetpu_sw_wdt_modify_heartbeat(client->etdev,
- EDGETPU_DORMANT_DEV_BEAT_MS);
mutex_unlock(&client->etdev->groups_lock);
}
@@ -819,8 +806,7 @@ int edgetpu_device_group_finalize(struct edgetpu_device_group *group)
for (i = 0; i < group->n_clients; i++) {
etdev = edgetpu_device_group_nth_etdev(group, i);
- edgetpu_sw_wdt_modify_heartbeat(etdev,
- EDGETPU_ACTIVE_DEV_BEAT_MS);
+ edgetpu_sw_wdt_inc_active_ref(etdev);
}
mutex_unlock(&group->lock);
return 0;
diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c
index ef2e80f..a4ac9a5 100644
--- a/drivers/edgetpu/edgetpu-firmware.c
+++ b/drivers/edgetpu/edgetpu-firmware.c
@@ -737,7 +737,8 @@ int edgetpu_firmware_create(struct edgetpu_dev *etdev,
}
etdev->firmware = et_fw;
- ret = edgetpu_sw_wdt_create(etdev, EDGETPU_DORMANT_DEV_BEAT_MS);
+ ret = edgetpu_sw_wdt_create(etdev, EDGETPU_ACTIVE_DEV_BEAT_MS,
+ EDGETPU_DORMANT_DEV_BEAT_MS);
if (ret)
etdev_err(etdev, "Failed to create sw wdt instance\n");
else
diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c
index 0f9ca8d..e56eb9a 100644
--- a/drivers/edgetpu/edgetpu-kci.c
+++ b/drivers/edgetpu/edgetpu-kci.c
@@ -29,9 +29,12 @@
#define QUEUE_SIZE MAX_QUEUE_SIZE
/* Timeout for KCI responses from the firmware (milliseconds) */
-#ifdef CONFIG_EDGETPU_FPGA
+#if IS_ENABLED(CONFIG_EDGETPU_FPGA)
/* Set extra ludicrously high to 60 seconds for (slow) Palladium emulation. */
#define KCI_TIMEOUT (60000)
+#elif IS_ENABLED(CONFIG_EDGETPU_TEST)
+/* fake-firmware could respond in a short time */
+#define KCI_TIMEOUT (200)
#else
/* 5 secs. */
#define KCI_TIMEOUT (5000)
diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.c b/drivers/edgetpu/edgetpu-sw-watchdog.c
index 703a5e3..5d96e4d 100644
--- a/drivers/edgetpu/edgetpu-sw-watchdog.c
+++ b/drivers/edgetpu/edgetpu-sw-watchdog.c
@@ -4,6 +4,8 @@
*
* Copyright (C) 2020 Google, Inc.
*/
+
+#include <linux/atomic.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
@@ -25,6 +27,36 @@ static void sw_wdt_handler_work(struct work_struct *work)
et_action_work->edgetpu_sw_wdt_handler(et_action_work->data);
}
+static void sw_wdt_start(struct edgetpu_sw_wdt *wdt)
+{
+ if (wdt->is_wdt_disabled) {
+ etdev_dbg(wdt->etdev, "sw wdt disabled by module param");
+ return;
+ }
+ etdev_dbg(wdt->etdev, "sw wdt: started\n");
+ schedule_delayed_work(&wdt->dwork, wdt->hrtbeat_jiffs);
+}
+
+static void sw_wdt_stop(struct edgetpu_sw_wdt *wdt)
+{
+ etdev_dbg(wdt->etdev, "sw wdt: stopped\n");
+ cancel_delayed_work_sync(&wdt->dwork);
+}
+
+static void sw_wdt_modify_rate(struct edgetpu_sw_wdt *wdt, unsigned long rate)
+{
+ if (rate == wdt->hrtbeat_jiffs)
+ return;
+ wdt->hrtbeat_jiffs = rate;
+ /*
+ * Don't restart the work if we already encountered a firmware timeout.
+ */
+ if (work_pending(&wdt->et_action_work.work))
+ return;
+ sw_wdt_stop(wdt);
+ sw_wdt_start(wdt);
+}
+
void edgetpu_watchdog_bite(struct edgetpu_dev *etdev, bool reset)
{
if (!etdev->etdev_sw_wdt)
@@ -43,9 +75,8 @@ void edgetpu_watchdog_bite(struct edgetpu_dev *etdev, bool reset)
}
/*
- * Ping the f/w for a response. Reschedule the work for next beat
- * in case of response or schedule a worker for action callback in case of
- * TIMEOUT.
+ * Ping the f/w for a response. Reschedule the work for next beat in case of f/w
+ * is responded, or schedule a worker for action callback in case of TIMEOUT.
*/
static void sw_wdt_work(struct work_struct *work)
{
@@ -69,7 +100,8 @@ static void sw_wdt_work(struct work_struct *work)
}
}
-int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long hrtbeat_ms)
+int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long active_ms,
+ unsigned long dormant_ms)
{
struct edgetpu_sw_wdt *etdev_sw_wdt;
@@ -78,7 +110,11 @@ int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long hrtbeat_ms)
return -ENOMEM;
etdev_sw_wdt->etdev = etdev;
- etdev_sw_wdt->hrtbeat_jiffs = msecs_to_jiffies(hrtbeat_ms);
+ etdev_sw_wdt->hrtbeat_active = msecs_to_jiffies(active_ms);
+ etdev_sw_wdt->hrtbeat_dormant = msecs_to_jiffies(dormant_ms);
+ atomic_set(&etdev_sw_wdt->active_counter, 0);
+ /* init to dormant rate */
+ etdev_sw_wdt->hrtbeat_jiffs = etdev_sw_wdt->hrtbeat_dormant;
INIT_DELAYED_WORK(&etdev_sw_wdt->dwork, sw_wdt_work);
INIT_WORK(&etdev_sw_wdt->et_action_work.work, sw_wdt_handler_work);
etdev_sw_wdt->is_wdt_disabled = wdt_disable;
@@ -94,13 +130,7 @@ int edgetpu_sw_wdt_start(struct edgetpu_dev *etdev)
return -EINVAL;
if (!etdev_sw_wdt->et_action_work.edgetpu_sw_wdt_handler)
etdev_err(etdev, "sw wdt handler not set\n");
- if (etdev_sw_wdt->is_wdt_disabled) {
- etdev_dbg(etdev, "sw wdt disabled by module param");
- return 0;
- }
- etdev_dbg(etdev, "sw wdt: started\n");
- schedule_delayed_work(&etdev_sw_wdt->dwork,
- etdev_sw_wdt->hrtbeat_jiffs);
+ sw_wdt_start(etdev_sw_wdt);
return 0;
}
@@ -108,17 +138,24 @@ void edgetpu_sw_wdt_stop(struct edgetpu_dev *etdev)
{
if (!etdev->etdev_sw_wdt)
return;
- etdev_dbg(etdev, "sw wdt: stopped\n");
- cancel_delayed_work_sync(&etdev->etdev_sw_wdt->dwork);
+ sw_wdt_stop(etdev->etdev_sw_wdt);
}
void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev)
{
- /* cancel and sync work due to watchdog bite to prevent UAF */
- cancel_work_sync(&etdev->etdev_sw_wdt->et_action_work.work);
- edgetpu_sw_wdt_stop(etdev);
- kfree(etdev->etdev_sw_wdt);
+ struct edgetpu_sw_wdt *wdt = etdev->etdev_sw_wdt;
+ int counter;
+
+ if (!wdt)
+ return;
etdev->etdev_sw_wdt = NULL;
+ /* cancel and sync work due to watchdog bite to prevent UAF */
+ cancel_work_sync(&wdt->et_action_work.work);
+ sw_wdt_stop(wdt);
+ counter = atomic_read(&wdt->active_counter);
+ if (counter)
+ etdev_warn(etdev, "Unbalanced WDT active counter: %d", counter);
+ kfree(wdt);
}
void edgetpu_sw_wdt_set_handler(struct edgetpu_dev *etdev,
@@ -132,23 +169,22 @@ void edgetpu_sw_wdt_set_handler(struct edgetpu_dev *etdev,
et_sw_wdt->et_action_work.data = data;
}
-void edgetpu_sw_wdt_modify_heartbeat(struct edgetpu_dev *etdev,
- unsigned long hrtbeat_ms)
+void edgetpu_sw_wdt_inc_active_ref(struct edgetpu_dev *etdev)
{
- struct edgetpu_sw_wdt *etdev_sw_wdt = etdev->etdev_sw_wdt;
- unsigned long hrtbeat_jiffs = msecs_to_jiffies(hrtbeat_ms);
+ struct edgetpu_sw_wdt *wdt = etdev->etdev_sw_wdt;
- if (!etdev_sw_wdt)
+ if (!wdt)
return;
- /*
- * check if (et_action_work) is pending, since after watchdog bite
- * there is no need to restart another work.
- */
- if (work_pending(&etdev_sw_wdt->et_action_work.work))
+ if (!atomic_fetch_inc(&wdt->active_counter))
+ sw_wdt_modify_rate(wdt, wdt->hrtbeat_active);
+}
+
+void edgetpu_sw_wdt_dec_active_ref(struct edgetpu_dev *etdev)
+{
+ struct edgetpu_sw_wdt *wdt = etdev->etdev_sw_wdt;
+
+ if (!wdt)
return;
- if (hrtbeat_jiffs != etdev_sw_wdt->hrtbeat_jiffs) {
- edgetpu_sw_wdt_stop(etdev);
- etdev_sw_wdt->hrtbeat_jiffs = hrtbeat_jiffs;
- edgetpu_sw_wdt_start(etdev);
- }
+ if (atomic_fetch_dec(&wdt->active_counter) == 1)
+ sw_wdt_modify_rate(wdt, wdt->hrtbeat_dormant);
}
diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.h b/drivers/edgetpu/edgetpu-sw-watchdog.h
index 7b214b2..bbc1757 100644
--- a/drivers/edgetpu/edgetpu-sw-watchdog.h
+++ b/drivers/edgetpu/edgetpu-sw-watchdog.h
@@ -7,6 +7,7 @@
#ifndef __EDGETPU_SW_WDT_H__
#define __EDGETPU_SW_WDT_H__
+#include <linux/atomic.h>
#include <linux/workqueue.h>
#include "edgetpu-internal.h"
@@ -26,15 +27,30 @@ struct edgetpu_sw_wdt {
struct delayed_work dwork;
/* edgetpu device this watchdog is monitoring. */
struct edgetpu_dev *etdev;
- /* time period in jiffies in pinging device firmware. */
+ /* Time period in jiffies in pinging device firmware. */
unsigned long hrtbeat_jiffs;
- /* work information for watchdog bite. */
+ /* Heartbeat rates passed in edgetpu_sw_wdt_create(), in jiffies. */
+ unsigned long hrtbeat_active;
+ unsigned long hrtbeat_dormant;
+ /* Work information for watchdog bite. */
struct edgetpu_sw_wdt_action_work et_action_work;
- /* flag to mark that watchdog is disabled. */
+ /* Flag to mark that watchdog is disabled. */
bool is_wdt_disabled;
+ /*
+ * When this counter is greater than zero, use @hrtbeat_active heartbeat
+ * rate, otherwise @hrtbeat_dormant. Initial value is zero.
+ */
+ atomic_t active_counter;
};
-int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long hrtbeat_ms);
+/*
+ * Creates and assigns a watchdog object to @etdev->etdev_sw_wdt.
+ *
+ * @active_ms and @dormant_ms are the time periods in pinging device firmware
+ * for active mode and dormant mode, respectively.
+ */
+int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long active_ms,
+ unsigned long dormant_ms);
int edgetpu_sw_wdt_start(struct edgetpu_dev *etdev);
void edgetpu_sw_wdt_stop(struct edgetpu_dev *etdev);
void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev);
@@ -44,9 +60,20 @@ void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev);
*/
void edgetpu_sw_wdt_set_handler(struct edgetpu_dev *etdev,
void (*handler_cb)(void *), void *data);
-/* Modify the time interval of heartbeat. It will also start the watchdog. */
-void edgetpu_sw_wdt_modify_heartbeat(struct edgetpu_dev *etdev,
- unsigned long hrtbeat_ms);
+/*
+ * Increases the @active_counter.
+ *
+ * If @active_counter was zero, watchdog will be restarted with the active
+ * heartbeat rate.
+ */
+void edgetpu_sw_wdt_inc_active_ref(struct edgetpu_dev *etdev);
+/*
+ * Decreases the @active_counter.
+ *
+ * If @active_counter was one, watchdog will be restarted with the dormant
+ * heartbeat rate.
+ */
+void edgetpu_sw_wdt_dec_active_ref(struct edgetpu_dev *etdev);
/*
* Schedule sw watchdog action immediately. Called on fatal errors.
diff --git a/drivers/edgetpu/edgetpu-wakelock.c b/drivers/edgetpu/edgetpu-wakelock.c
index 61af4fa..f308d3a 100644
--- a/drivers/edgetpu/edgetpu-wakelock.c
+++ b/drivers/edgetpu/edgetpu-wakelock.c
@@ -15,21 +15,32 @@
#include "edgetpu-internal.h"
#include "edgetpu-wakelock.h"
+static const char *const event_name[] = {
+#define X(name, _) #name
+ EDGETPU_WAKELOCK_EVENTS
+#undef X
+};
+
/*
- * Returns the first event with a non-zero counter.
- * Returns EDGETPU_WAKELOCK_EVENT_END if all event counters are zero.
+ * Loops through the events and warns if any event has a non-zero counter.
+ * Returns true if at least one non-zero counter is found.
*
* Caller holds @wakelock->lock.
*/
-static enum edgetpu_wakelock_event
-wakelock_non_zero_event(struct edgetpu_wakelock *wakelock)
+static bool wakelock_warn_non_zero_event(struct edgetpu_wakelock *wakelock)
{
int i;
-
- for (i = 0; i < EDGETPU_WAKELOCK_EVENT_END; i++)
- if (wakelock->event_count[i])
- return i;
- return EDGETPU_WAKELOCK_EVENT_END;
+ bool ret = false;
+
+ for (i = 0; i < EDGETPU_WAKELOCK_EVENT_END; i++) {
+ if (wakelock->event_count[i]) {
+ ret = true;
+ etdev_warn(wakelock->etdev,
+ "%s has non-zero counter=%d", event_name[i],
+ wakelock->event_count[i]);
+ }
+ }
+ return ret;
}
struct edgetpu_wakelock *edgetpu_wakelock_alloc(struct edgetpu_dev *etdev)
@@ -143,17 +154,13 @@ int edgetpu_wakelock_release(struct edgetpu_wakelock *wakelock)
return -EINVAL;
}
/* only need to check events when this is the last reference */
- if (wakelock->req_count == 1) {
- enum edgetpu_wakelock_event evt =
- wakelock_non_zero_event(wakelock);
-
- if (evt != EDGETPU_WAKELOCK_EVENT_END) {
- etdev_warn(
- wakelock->etdev,
- "event %d is happening, refusing wakelock release",
- evt);
- return -EAGAIN;
- }
+ if (wakelock->req_count == 1 &&
+ wakelock_warn_non_zero_event(wakelock)) {
+ etdev_warn(
+ wakelock->etdev,
+ "detected non-zero events, refusing wakelock release");
+ return -EAGAIN;
}
+
return --wakelock->req_count;
}
diff --git a/drivers/edgetpu/edgetpu-wakelock.h b/drivers/edgetpu/edgetpu-wakelock.h
index 713f58b..69e0fa7 100644
--- a/drivers/edgetpu/edgetpu-wakelock.h
+++ b/drivers/edgetpu/edgetpu-wakelock.h
@@ -25,14 +25,20 @@
* Events that could block the wakelock from being released.
* Use inc_event() and dec_event() to increase and decrease the counters when
* the event happens.
+ *
+ * Defined with X macros to support fetching event names from values.
*/
-enum edgetpu_wakelock_event {
- EDGETPU_WAKELOCK_EVENT_FULL_CSR = 0,
- EDGETPU_WAKELOCK_EVENT_MBOX_CSR = 1,
- EDGETPU_WAKELOCK_EVENT_CMD_QUEUE = 2,
- EDGETPU_WAKELOCK_EVENT_RESP_QUEUE = 3,
+#define EDGETPU_WAKELOCK_EVENTS \
+ X(EDGETPU_WAKELOCK_EVENT_FULL_CSR, 0), \
+ X(EDGETPU_WAKELOCK_EVENT_MBOX_CSR, 1), \
+ X(EDGETPU_WAKELOCK_EVENT_CMD_QUEUE, 2), \
+ X(EDGETPU_WAKELOCK_EVENT_RESP_QUEUE, 3), \
+ X(EDGETPU_WAKELOCK_EVENT_END, 4)
- EDGETPU_WAKELOCK_EVENT_END = 4
+enum edgetpu_wakelock_event {
+#define X(name, val) name = val
+ EDGETPU_WAKELOCK_EVENTS
+#undef X
};
struct edgetpu_wakelock {