summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTodd Poynor <toddpoynor@google.com>2021-07-31 07:40:33 +0000
committerTodd Poynor <toddpoynor@google.com>2021-08-05 17:25:22 +0000
commita63b00f9e5a757acf4716f01b3eac49660995bdd (patch)
tree85a7d9e7e60f84cfb159a722f16d3b61872d6e78
parentaff3f643c2514c78feea823a247fdf95ec37cbad (diff)
downloadedgetpu-a63b00f9e5a757acf4716f01b3eac49660995bdd.tar.gz
edgetpu: sync critical fixes: page pinning in low memory
Sync the following CLs from darwinn-2.0: 5d2f9ddb7 edgetpu: use pin_user_pages, alloc own vma struct pointers array db00a2f91 edgetpu: abrolhos disable ext mbox on client remove 4e838d0f7 edgetpu: don't fail mbox deactivation on KCI fails 22e155fda edgetpu: abrolhos enable mbox before secure client 12ddd402e edgetpu: add group id to sysfs clients attr, fix output Bug: 194989197 Bug: 195349073 Bug: 195126121 Signed-off-by: Todd Poynor <toddpoynor@google.com> Change-Id: Iedd6cf0314ca76bf74ea91d85c853ed71a163fd3
-rw-r--r--drivers/edgetpu/abrolhos-device.c7
-rw-r--r--drivers/edgetpu/abrolhos-pm.c6
-rw-r--r--drivers/edgetpu/edgetpu-core.c3
-rw-r--r--drivers/edgetpu/edgetpu-device-group.c51
-rw-r--r--drivers/edgetpu/edgetpu-fs.c20
-rw-r--r--drivers/edgetpu/edgetpu-mailbox.c33
-rw-r--r--drivers/edgetpu/edgetpu-mailbox.h2
7 files changed, 82 insertions, 40 deletions
diff --git a/drivers/edgetpu/abrolhos-device.c b/drivers/edgetpu/abrolhos-device.c
index 25f942f..8af3613 100644
--- a/drivers/edgetpu/abrolhos-device.c
+++ b/drivers/edgetpu/abrolhos-device.c
@@ -172,8 +172,9 @@ int edgetpu_chip_acquire_ext_mailbox(struct edgetpu_client *client,
mutex_unlock(&apdev->tz_mailbox_lock);
return -EBUSY;
}
- apdev->secure_client = client;
ret = edgetpu_mailbox_enable_ext(client, ABROLHOS_TZ_MAILBOX_ID, NULL);
+ if (!ret)
+ apdev->secure_client = client;
mutex_unlock(&apdev->tz_mailbox_lock);
return ret;
}
@@ -212,7 +213,9 @@ void edgetpu_chip_client_remove(struct edgetpu_client *client)
struct abrolhos_platform_dev *apdev = to_abrolhos_dev(client->etdev);
mutex_lock(&apdev->tz_mailbox_lock);
- if (apdev->secure_client == client)
+ if (apdev->secure_client == client) {
apdev->secure_client = NULL;
+ edgetpu_mailbox_disable_ext(client, ABROLHOS_TZ_MAILBOX_ID);
+ }
mutex_unlock(&apdev->tz_mailbox_lock);
}
diff --git a/drivers/edgetpu/abrolhos-pm.c b/drivers/edgetpu/abrolhos-pm.c
index 87a82d8..efbd15c 100644
--- a/drivers/edgetpu/abrolhos-pm.c
+++ b/drivers/edgetpu/abrolhos-pm.c
@@ -597,12 +597,12 @@ static void abrolhos_power_down(struct edgetpu_pm *etpm)
abrolhos_pm_cleanup_bts_scenario(etdev);
/*
- * A client closing the edgetpu device or crashing can leave the
- * TZ mailbox in acquired state, but firmware loses state on power down.
+ * It should be impossible that power_down() is called when abpdev->secure_client is set.
+ * Non-null secure_client implies ext mailbox is acquired, which implies wakelock is
+ * acquired.
* Clear the state here just in case.
*/
abpdev->secure_client = NULL;
-
}
static int abrolhos_pm_after_create(struct edgetpu_pm *etpm)
diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c
index cab7a6a..ff51da1 100644
--- a/drivers/edgetpu/edgetpu-core.c
+++ b/drivers/edgetpu/edgetpu-core.c
@@ -545,6 +545,8 @@ void edgetpu_client_remove(struct edgetpu_client *client)
*/
if (client->group)
edgetpu_device_group_leave(client);
+ /* invoke chip-dependent removal handler before releasing resources */
+ edgetpu_chip_client_remove(client);
edgetpu_wakelock_free(client->wakelock);
/*
* It should be impossible to access client->wakelock after this cleanup
@@ -561,7 +563,6 @@ void edgetpu_client_remove(struct edgetpu_client *client)
1 << perdie_event_id_to_num(EDGETPU_PERDIE_EVENT_TRACES_AVAILABLE))
edgetpu_telemetry_unset_event(etdev, EDGETPU_TELEMETRY_TRACE);
- edgetpu_chip_client_remove(client);
edgetpu_client_put(client);
}
diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c
index fd0c410..1b668f5 100644
--- a/drivers/edgetpu/edgetpu-device-group.c
+++ b/drivers/edgetpu/edgetpu-device-group.c
@@ -142,16 +142,11 @@ static int edgetpu_group_activate(struct edgetpu_device_group *group)
static void edgetpu_group_deactivate(struct edgetpu_device_group *group)
{
u8 mailbox_id;
- int ret;
if (edgetpu_group_mailbox_detached_locked(group))
return;
mailbox_id = edgetpu_group_context_id_locked(group);
- ret = edgetpu_mailbox_deactivate(group->etdev, mailbox_id);
- if (ret)
- etdev_err(group->etdev, "deactivate mailbox for VCID %d failed with %d",
- group->vcid, ret);
- return;
+ edgetpu_mailbox_deactivate(group->etdev, mailbox_id);
}
/*
@@ -1141,6 +1136,7 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group,
int i;
int ret;
struct vm_area_struct *vma;
+ struct vm_area_struct **vmas;
unsigned int foll_flags = FOLL_LONGTERM | FOLL_WRITE;
if (size == 0)
@@ -1161,11 +1157,10 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group,
*/
pages = kvmalloc((num_pages * sizeof(*pages)), GFP_KERNEL | __GFP_NOWARN);
if (!pages) {
- etdev_dbg(etdev, "%s: kvmalloc failed (%lu bytes)\n", __func__,
- (num_pages * sizeof(*pages)));
+ etdev_dbg(etdev, "%s: kvmalloc pages failed (%lu bytes)\n",
+ __func__, (num_pages * sizeof(*pages)));
return ERR_PTR(-ENOMEM);
}
-
/*
* The host pages might be read-only and could fail if we attempt to pin
* it with FOLL_WRITE.
@@ -1179,10 +1174,43 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group,
*preadonly = false;
}
+ /* Try fast call first, in case it's actually faster. */
ret = pin_user_pages_fast(host_addr & PAGE_MASK, num_pages, foll_flags,
pages);
+ if (ret == num_pages) {
+ *pnum_pages = num_pages;
+ return pages;
+ }
if (ret < 0) {
- etdev_dbg(etdev, "get user pages failed %u:%pK-%u: %d",
+ etdev_dbg(etdev, "pin_user_pages failed %u:%pK-%u: %d",
+ group->workload_id, (void *)host_addr, num_pages,
+ ret);
+ if (ret != -ENOMEM) {
+ num_pages = 0;
+ goto error;
+ }
+ }
+ etdev_dbg(etdev,
+ "pin_user_pages_fast error %u:%pK npages=%u ret=%d",
+ group->workload_id, (void *)host_addr, num_pages,
+ ret);
+ /* Unpin any partial mapping and start over again. */
+ for (i = 0; i < ret; i++)
+ unpin_user_page(pages[i]);
+
+ /* Allocate our own vmas array non-contiguous. */
+ vmas = kvmalloc((num_pages * sizeof(*vmas)), GFP_KERNEL | __GFP_NOWARN);
+ if (!vmas) {
+ etdev_dbg(etdev, "%s: kvmalloc vmas failed (%lu bytes)\n",
+ __func__, (num_pages * sizeof(*pages)));
+ kvfree(pages);
+ return ERR_PTR(-ENOMEM);
+ }
+ ret = pin_user_pages(host_addr & PAGE_MASK, num_pages, foll_flags,
+ pages, vmas);
+ kvfree(vmas);
+ if (ret < 0) {
+ etdev_dbg(etdev, "pin_user_pages failed %u:%pK-%u: %d",
group->workload_id, (void *)host_addr, num_pages,
ret);
num_pages = 0;
@@ -1190,7 +1218,7 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group,
}
if (ret < num_pages) {
etdev_dbg(etdev,
- "get user pages partial %u:%pK npages=%u pinned=%d",
+ "pin_user_pages partial %u:%pK npages=%u pinned=%d",
group->workload_id, (void *)host_addr, num_pages,
ret);
num_pages = ret;
@@ -1199,7 +1227,6 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group,
}
*pnum_pages = num_pages;
-
return pages;
error:
diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c
index 0dcad64..50ead8e 100644
--- a/drivers/edgetpu/edgetpu-fs.c
+++ b/drivers/edgetpu/edgetpu-fs.c
@@ -1013,16 +1013,24 @@ static ssize_t clients_show(
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
struct edgetpu_list_device_client *lc;
+ ssize_t len;
ssize_t ret = 0;
mutex_lock(&etdev->clients_lock);
for_each_list_device_client(etdev, lc) {
- ret += scnprintf(buf, PAGE_SIZE - ret,
- "pid %d tgid %d wakelock %d\n",
- lc->client->pid, lc->client->tgid,
- NO_WAKELOCK(lc->client->wakelock) ?
- 0 : lc->client->wakelock->req_count);
- buf += ret;
+ struct edgetpu_device_group *group;
+
+ mutex_lock(&lc->client->group_lock);
+ group = lc->client->group;
+ len = scnprintf(buf, PAGE_SIZE - ret,
+ "pid %d tgid %d group %d wakelock %d\n",
+ lc->client->pid, lc->client->tgid,
+ group ? group->workload_id : -1,
+ NO_WAKELOCK(lc->client->wakelock) ?
+ 0 : lc->client->wakelock->req_count);
+ mutex_unlock(&lc->client->group_lock);
+ buf += len;
+ ret += len;
}
mutex_unlock(&etdev->clients_lock);
return ret;
diff --git a/drivers/edgetpu/edgetpu-mailbox.c b/drivers/edgetpu/edgetpu-mailbox.c
index eedde54..261eb52 100644
--- a/drivers/edgetpu/edgetpu-mailbox.c
+++ b/drivers/edgetpu/edgetpu-mailbox.c
@@ -1003,8 +1003,7 @@ static int edgetpu_mailbox_external_alloc_enable(struct edgetpu_client *client,
if (ret) {
while (i--) {
id = ext_mailbox->descriptors[i].mailbox->mailbox_id;
- if (edgetpu_mailbox_deactivate(group->etdev, id))
- etdev_err(group->etdev, "Deactivate mailbox %d failed", id);
+ edgetpu_mailbox_deactivate(group->etdev, id);
}
/*
* Deactivate only fails if f/w is unresponsive which will put group
@@ -1050,8 +1049,7 @@ void edgetpu_mailbox_external_disable_free_locked(struct edgetpu_device_group *g
for (i = 0; i < ext_mailbox->count; i++) {
id = ext_mailbox->descriptors[i].mailbox->mailbox_id;
etdev_dbg(group->etdev, "Disabling mailbox: %d\n", id);
- if (edgetpu_mailbox_deactivate(group->etdev, id))
- etdev_err(group->etdev, "Deactivate mailbox %d failed", id);
+ edgetpu_mailbox_deactivate(group->etdev, id);
}
/*
* Deactivate only fails if f/w is unresponsive which will put group
@@ -1092,8 +1090,13 @@ out:
int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, int mailbox_id)
{
- int ret;
+ int ret = 0;
+ /*
+ * A successful enable_ext() increases the wakelock event which prevents wakelock being
+ * released, so theoretically the check fail here can only happen when enable_ext() is
+ * failed or not called before.
+ */
if (!edgetpu_wakelock_lock(client->wakelock)) {
etdev_err(client->etdev, "Disabling mailbox %d needs wakelock acquired\n",
mailbox_id);
@@ -1107,10 +1110,7 @@ int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, int mailbox_id)
}
etdev_dbg(client->etdev, "Disabling mailbox: %d\n", mailbox_id);
- ret = edgetpu_mailbox_deactivate(client->etdev, mailbox_id);
- if (ret)
- etdev_err(client->etdev, "Deactivate mailbox %d failed: %d", mailbox_id, ret);
-
+ edgetpu_mailbox_deactivate(client->etdev, mailbox_id);
out:
if (!ret)
edgetpu_wakelock_dec_event_locked(client->wakelock,
@@ -1143,7 +1143,7 @@ int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid
return ret;
}
-int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id)
+void edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id)
{
struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices;
const u32 bit = BIT(mailbox_id);
@@ -1152,12 +1152,15 @@ int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id)
mutex_lock(&eh->lock);
if (bit & eh->fw_state)
ret = edgetpu_kci_close_device(etdev->kci, mailbox_id);
- if (!ret) {
- eh->state &= ~bit;
- eh->fw_state &= ~bit;
- }
+ if (ret)
+ etdev_err(etdev, "Deactivate mailbox %d failed: %d", mailbox_id, ret);
+ /*
+ * Always clears the states, FW should never reject CLOSE_DEVICE requests unless it's
+ * unresponsive.
+ */
+ eh->state &= ~bit;
+ eh->fw_state &= ~bit;
mutex_unlock(&eh->lock);
- return ret;
}
void edgetpu_handshake_clear_fw_state(struct edgetpu_handshake *eh)
diff --git a/drivers/edgetpu/edgetpu-mailbox.h b/drivers/edgetpu/edgetpu-mailbox.h
index 1ae4889..c4b1318 100644
--- a/drivers/edgetpu/edgetpu-mailbox.h
+++ b/drivers/edgetpu/edgetpu-mailbox.h
@@ -362,7 +362,7 @@ int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid
/*
* Similar to edgetpu_mailbox_activate() but sends CLOSE_DEVICE KCI instead.
*/
-int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id);
+void edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id);
/* Sets @eh->fw_state to 0. */
void edgetpu_handshake_clear_fw_state(struct edgetpu_handshake *eh);
/*