diff options
author | Todd Poynor <toddpoynor@google.com> | 2021-07-14 18:33:15 +0800 |
---|---|---|
committer | Todd Poynor <toddpoynor@google.com> | 2021-07-20 02:54:51 +0000 |
commit | 8fd890bd673196a1c7e14bf9d046711e353b9467 (patch) | |
tree | 9aa4bbfa9f2cec80cb18f0daa1e61b3fc05c7896 | |
parent | f20eac91b6e63ee33793ee4aae418cf9fc84b1cf (diff) | |
download | abrolhos-8fd890bd673196a1c7e14bf9d046711e353b9467.tar.gz |
edgetpu: sync fixes from darwinn-internal
Roll-up the following commits suggested for including in pixel 5.10:
90c2dbb04 edgetpu: add sysfs attr clients to dump client and wakelock state
465a02519 edgetpu: PM log clients holding TPU wakelocks at suspend reject time
6614f7487 edgetpu: add list of clients per device
bcbadfe1c edgetpu: fix kernel paging error in edgetpu_mmu_attach_domain
Bug: 193484549
Bug: 193591701
Signed-off-by: Claire Chang <tientzu@google.com>
Signed-off-by: Todd Poynor <toddpoynor@google.com>
Change-Id: I19eb15187af66d35360349929738c9de6f5cc05b
-rw-r--r-- | drivers/edgetpu/edgetpu-core.c | 29 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-device-group.c | 21 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-fs.c | 23 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-internal.h | 12 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-pm.c | 26 |
5 files changed, 93 insertions, 18 deletions
diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c index 7701a71..bf2ad2d 100644 --- a/drivers/edgetpu/edgetpu-core.c +++ b/drivers/edgetpu/edgetpu-core.c @@ -371,6 +371,8 @@ int edgetpu_device_add(struct edgetpu_dev *etdev, INIT_LIST_HEAD(&etdev->groups); etdev->n_groups = 0; etdev->group_join_lockout = false; + mutex_init(&etdev->clients_lock); + INIT_LIST_HEAD(&etdev->clients); etdev->vcid_pool = (1u << EDGETPU_NUM_VCIDS) - 1; mutex_init(&etdev->state_lock); etdev->state = ETDEV_STATE_NOFW; @@ -457,13 +459,19 @@ void edgetpu_device_remove(struct edgetpu_dev *etdev) struct edgetpu_client *edgetpu_client_add(struct edgetpu_dev *etdev) { struct edgetpu_client *client; + struct edgetpu_list_device_client *l = kmalloc(sizeof(*l), GFP_KERNEL); + if (!l) + return ERR_PTR(-ENOMEM); client = kzalloc(sizeof(*client), GFP_KERNEL); - if (!client) + if (!client) { + kfree(l); return ERR_PTR(-ENOMEM); + } client->wakelock = edgetpu_wakelock_alloc(etdev); if (!client->wakelock) { kfree(client); + kfree(l); return ERR_PTR(-ENOMEM); } @@ -474,6 +482,10 @@ struct edgetpu_client *edgetpu_client_add(struct edgetpu_dev *etdev) /* equivalent to edgetpu_client_get() */ refcount_set(&client->count, 1); client->perdie_events = 0; + mutex_lock(&etdev->clients_lock); + l->client = client; + list_add_tail(&l->list, &etdev->clients); + mutex_unlock(&etdev->clients_lock); return client; } @@ -494,14 +506,27 @@ void edgetpu_client_put(struct edgetpu_client *client) void edgetpu_client_remove(struct edgetpu_client *client) { struct edgetpu_dev *etdev; + struct edgetpu_list_device_client *lc; if (IS_ERR_OR_NULL(client)) return; etdev = client->etdev; + mutex_lock(&etdev->clients_lock); + /* remove the client from the device list */ + for_each_list_device_client(etdev, lc) { + if (lc->client == client) { + list_del(&lc->list); + kfree(lc); + break; + } + } + mutex_unlock(&etdev->clients_lock); /* * A quick check without holding client->group_lock. * - * If client doesn't belong to a group then we are fine to not proceed. + * If client doesn't belong to a group then we are fine to not remove + * from groups. + * * If there is a race that the client belongs to a group but is removing * by another process - this will be detected by the check with holding * client->group_lock later. diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c index 6172b2c..9e07a03 100644 --- a/drivers/edgetpu/edgetpu-device-group.c +++ b/drivers/edgetpu/edgetpu-device-group.c @@ -704,18 +704,11 @@ edgetpu_device_group_alloc(struct edgetpu_client *client, group->mbox_attr = *attr; if (attr->priority & EDGETPU_PRIORITY_DETACHABLE) group->mailbox_detachable = true; - /* adds @client as the first entry */ - ret = edgetpu_device_group_add(group, client); - if (ret) { - etdev_dbg(group->etdev, "%s: group %u add failed ret=%d", - __func__, group->workload_id, ret); - goto error_put_group; - } etdomain = edgetpu_mmu_alloc_domain(group->etdev); if (!etdomain) { ret = -ENOMEM; - goto error_leave_group; + goto error_put_group; } group->etdomain = etdomain; if (etdomain->token != EDGETPU_DOMAIN_TOKEN_END) @@ -723,10 +716,18 @@ edgetpu_device_group_alloc(struct edgetpu_client *client, EDGETPU_CONTEXT_DOMAIN_TOKEN | etdomain->token; else group->context_id = EDGETPU_CONTEXT_INVALID; + + /* adds @client as the first entry */ + ret = edgetpu_device_group_add(group, client); + if (ret) { + etdev_dbg(group->etdev, "%s: group %u add failed ret=%d", + __func__, group->workload_id, ret); + goto error_free_mmu_domain; + } return group; -error_leave_group: - edgetpu_device_group_leave(client); +error_free_mmu_domain: + edgetpu_mmu_free_domain(group->etdev, group->etdomain); error_put_group: edgetpu_device_group_put(group); error: diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c index 1b6e039..c16c964 100644 --- a/drivers/edgetpu/edgetpu-fs.c +++ b/drivers/edgetpu/edgetpu-fs.c @@ -1003,9 +1003,32 @@ static ssize_t watchdog_timeout_count_show( } static DEVICE_ATTR_RO(watchdog_timeout_count); +static ssize_t clients_show( + struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + struct edgetpu_list_device_client *lc; + ssize_t ret = 0; + + mutex_lock(&etdev->clients_lock); + for_each_list_device_client(etdev, lc) { + ret += scnprintf(buf, PAGE_SIZE - ret, + "pid %d tgid %d wakelock %d\n", + lc->client->pid, lc->client->tgid, + NO_WAKELOCK(lc->client->wakelock) ? + 0 : lc->client->wakelock->req_count); + buf += ret; + } + mutex_unlock(&etdev->clients_lock); + return ret; +} +static DEVICE_ATTR_RO(clients); + static struct attribute *edgetpu_dev_attrs[] = { &dev_attr_firmware_crash_count.attr, &dev_attr_watchdog_timeout_count.attr, + &dev_attr_clients.attr, NULL, }; diff --git a/drivers/edgetpu/edgetpu-internal.h b/drivers/edgetpu/edgetpu-internal.h index 23e0c12..4c1d0a4 100644 --- a/drivers/edgetpu/edgetpu-internal.h +++ b/drivers/edgetpu/edgetpu-internal.h @@ -131,6 +131,16 @@ struct edgetpu_client { u64 perdie_events; }; +/* edgetpu_dev#clients list entry. */ +struct edgetpu_list_device_client { + struct list_head list; + struct edgetpu_client *client; +}; + +/* Macro to loop through etdev->clients (hold clients_lock prior). */ +#define for_each_list_device_client(etdev, c) \ + list_for_each_entry(c, &etdev->clients, list) + struct edgetpu_mapping; struct edgetpu_mailbox_manager; struct edgetpu_kci; @@ -179,6 +189,8 @@ struct edgetpu_dev { /* end of fields protected by @groups_lock */ + struct mutex clients_lock; /* protects clients */ + struct list_head clients; void *mmu_cookie; /* mmu driver private data */ void *dram_cookie; /* on-device DRAM private data */ struct edgetpu_mailbox_manager *mailbox_manager; diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c index df1c179..872149f 100644 --- a/drivers/edgetpu/edgetpu-pm.c +++ b/drivers/edgetpu/edgetpu-pm.c @@ -15,6 +15,7 @@ #include "edgetpu-mailbox.h" #include "edgetpu-pm.h" #include "edgetpu-sw-watchdog.h" +#include "edgetpu-wakelock.h" #if IS_ENABLED(CONFIG_EDGETPU_TEST) #include "unittests/factory/fake-edgetpu-firmware.h" @@ -310,15 +311,28 @@ void edgetpu_pchannel_power_up(struct edgetpu_dev *etdev) int edgetpu_pm_suspend(struct edgetpu_dev *etdev) { struct edgetpu_pm *etpm = etdev->pm; + struct edgetpu_list_device_client *lc; - if (etpm && etpm->p->power_up_count) { - etdev_warn_ratelimited( - etdev, "cannot suspend with power up count = %d\n", - etpm->p->power_up_count); + if (!etpm || !etpm->p->power_up_count) + return 0; + + etdev_warn_ratelimited( + etdev, "cannot suspend with power up count = %d\n", + etpm->p->power_up_count); + + if (!mutex_trylock(&etdev->clients_lock)) return -EAGAIN; + for_each_list_device_client(etdev, lc) { + if (NO_WAKELOCK(lc->client->wakelock) || + !lc->client->wakelock->req_count) + continue; + etdev_warn_ratelimited(etdev, "pid %d tgid %d count %d\n", + lc->client->pid, + lc->client->tgid, + lc->client->wakelock->req_count); } - - return 0; + mutex_unlock(&etdev->clients_lock); + return -EAGAIN; } int edgetpu_pm_resume(struct edgetpu_dev *etdev) |