diff options
author | Todd Poynor <toddpoynor@google.com> | 2021-07-31 07:40:33 +0000 |
---|---|---|
committer | Todd Poynor <toddpoynor@google.com> | 2021-08-05 21:07:45 +0000 |
commit | 23e7c6f5bebb9f80ab97bec076ee9ae626620af7 (patch) | |
tree | 85a7d9e7e60f84cfb159a722f16d3b61872d6e78 | |
parent | aff3f643c2514c78feea823a247fdf95ec37cbad (diff) | |
download | abrolhos-23e7c6f5bebb9f80ab97bec076ee9ae626620af7.tar.gz |
edgetpu: sync critical fixes: page pin out of memory, faceauth crash
Sync the following CLs from darwinn-2.0:
5d2f9ddb7 edgetpu: use pin_user_pages, alloc own vma struct pointers array
db00a2f91 edgetpu: abrolhos disable ext mbox on client remove
4e838d0f7 edgetpu: don't fail mbox deactivation on KCI fails
Bug: 194989197
Bug: 195126121
Signed-off-by: Todd Poynor <toddpoynor@google.com>
Change-Id: Iedd6cf0314ca76bf74ea91d85c853ed71a163fd3
-rw-r--r-- | drivers/edgetpu/abrolhos-device.c | 7 | ||||
-rw-r--r-- | drivers/edgetpu/abrolhos-pm.c | 6 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-core.c | 3 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-device-group.c | 51 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-fs.c | 20 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-mailbox.c | 33 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-mailbox.h | 2 |
7 files changed, 82 insertions, 40 deletions
diff --git a/drivers/edgetpu/abrolhos-device.c b/drivers/edgetpu/abrolhos-device.c index 25f942f..8af3613 100644 --- a/drivers/edgetpu/abrolhos-device.c +++ b/drivers/edgetpu/abrolhos-device.c @@ -172,8 +172,9 @@ int edgetpu_chip_acquire_ext_mailbox(struct edgetpu_client *client, mutex_unlock(&apdev->tz_mailbox_lock); return -EBUSY; } - apdev->secure_client = client; ret = edgetpu_mailbox_enable_ext(client, ABROLHOS_TZ_MAILBOX_ID, NULL); + if (!ret) + apdev->secure_client = client; mutex_unlock(&apdev->tz_mailbox_lock); return ret; } @@ -212,7 +213,9 @@ void edgetpu_chip_client_remove(struct edgetpu_client *client) struct abrolhos_platform_dev *apdev = to_abrolhos_dev(client->etdev); mutex_lock(&apdev->tz_mailbox_lock); - if (apdev->secure_client == client) + if (apdev->secure_client == client) { apdev->secure_client = NULL; + edgetpu_mailbox_disable_ext(client, ABROLHOS_TZ_MAILBOX_ID); + } mutex_unlock(&apdev->tz_mailbox_lock); } diff --git a/drivers/edgetpu/abrolhos-pm.c b/drivers/edgetpu/abrolhos-pm.c index 87a82d8..efbd15c 100644 --- a/drivers/edgetpu/abrolhos-pm.c +++ b/drivers/edgetpu/abrolhos-pm.c @@ -597,12 +597,12 @@ static void abrolhos_power_down(struct edgetpu_pm *etpm) abrolhos_pm_cleanup_bts_scenario(etdev); /* - * A client closing the edgetpu device or crashing can leave the - * TZ mailbox in acquired state, but firmware loses state on power down. + * It should be impossible that power_down() is called when abpdev->secure_client is set. + * Non-null secure_client implies ext mailbox is acquired, which implies wakelock is + * acquired. * Clear the state here just in case. */ abpdev->secure_client = NULL; - } static int abrolhos_pm_after_create(struct edgetpu_pm *etpm) diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c index cab7a6a..ff51da1 100644 --- a/drivers/edgetpu/edgetpu-core.c +++ b/drivers/edgetpu/edgetpu-core.c @@ -545,6 +545,8 @@ void edgetpu_client_remove(struct edgetpu_client *client) */ if (client->group) edgetpu_device_group_leave(client); + /* invoke chip-dependent removal handler before releasing resources */ + edgetpu_chip_client_remove(client); edgetpu_wakelock_free(client->wakelock); /* * It should be impossible to access client->wakelock after this cleanup @@ -561,7 +563,6 @@ void edgetpu_client_remove(struct edgetpu_client *client) 1 << perdie_event_id_to_num(EDGETPU_PERDIE_EVENT_TRACES_AVAILABLE)) edgetpu_telemetry_unset_event(etdev, EDGETPU_TELEMETRY_TRACE); - edgetpu_chip_client_remove(client); edgetpu_client_put(client); } diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c index fd0c410..1b668f5 100644 --- a/drivers/edgetpu/edgetpu-device-group.c +++ b/drivers/edgetpu/edgetpu-device-group.c @@ -142,16 +142,11 @@ static int edgetpu_group_activate(struct edgetpu_device_group *group) static void edgetpu_group_deactivate(struct edgetpu_device_group *group) { u8 mailbox_id; - int ret; if (edgetpu_group_mailbox_detached_locked(group)) return; mailbox_id = edgetpu_group_context_id_locked(group); - ret = edgetpu_mailbox_deactivate(group->etdev, mailbox_id); - if (ret) - etdev_err(group->etdev, "deactivate mailbox for VCID %d failed with %d", - group->vcid, ret); - return; + edgetpu_mailbox_deactivate(group->etdev, mailbox_id); } /* @@ -1141,6 +1136,7 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group, int i; int ret; struct vm_area_struct *vma; + struct vm_area_struct **vmas; unsigned int foll_flags = FOLL_LONGTERM | FOLL_WRITE; if (size == 0) @@ -1161,11 +1157,10 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group, */ pages = kvmalloc((num_pages * sizeof(*pages)), GFP_KERNEL | __GFP_NOWARN); if (!pages) { - etdev_dbg(etdev, "%s: kvmalloc failed (%lu bytes)\n", __func__, - (num_pages * sizeof(*pages))); + etdev_dbg(etdev, "%s: kvmalloc pages failed (%lu bytes)\n", + __func__, (num_pages * sizeof(*pages))); return ERR_PTR(-ENOMEM); } - /* * The host pages might be read-only and could fail if we attempt to pin * it with FOLL_WRITE. @@ -1179,10 +1174,43 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group, *preadonly = false; } + /* Try fast call first, in case it's actually faster. */ ret = pin_user_pages_fast(host_addr & PAGE_MASK, num_pages, foll_flags, pages); + if (ret == num_pages) { + *pnum_pages = num_pages; + return pages; + } if (ret < 0) { - etdev_dbg(etdev, "get user pages failed %u:%pK-%u: %d", + etdev_dbg(etdev, "pin_user_pages failed %u:%pK-%u: %d", + group->workload_id, (void *)host_addr, num_pages, + ret); + if (ret != -ENOMEM) { + num_pages = 0; + goto error; + } + } + etdev_dbg(etdev, + "pin_user_pages_fast error %u:%pK npages=%u ret=%d", + group->workload_id, (void *)host_addr, num_pages, + ret); + /* Unpin any partial mapping and start over again. */ + for (i = 0; i < ret; i++) + unpin_user_page(pages[i]); + + /* Allocate our own vmas array non-contiguous. */ + vmas = kvmalloc((num_pages * sizeof(*vmas)), GFP_KERNEL | __GFP_NOWARN); + if (!vmas) { + etdev_dbg(etdev, "%s: kvmalloc vmas failed (%lu bytes)\n", + __func__, (num_pages * sizeof(*pages))); + kvfree(pages); + return ERR_PTR(-ENOMEM); + } + ret = pin_user_pages(host_addr & PAGE_MASK, num_pages, foll_flags, + pages, vmas); + kvfree(vmas); + if (ret < 0) { + etdev_dbg(etdev, "pin_user_pages failed %u:%pK-%u: %d", group->workload_id, (void *)host_addr, num_pages, ret); num_pages = 0; @@ -1190,7 +1218,7 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group, } if (ret < num_pages) { etdev_dbg(etdev, - "get user pages partial %u:%pK npages=%u pinned=%d", + "pin_user_pages partial %u:%pK npages=%u pinned=%d", group->workload_id, (void *)host_addr, num_pages, ret); num_pages = ret; @@ -1199,7 +1227,6 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group, } *pnum_pages = num_pages; - return pages; error: diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c index 0dcad64..50ead8e 100644 --- a/drivers/edgetpu/edgetpu-fs.c +++ b/drivers/edgetpu/edgetpu-fs.c @@ -1013,16 +1013,24 @@ static ssize_t clients_show( { struct edgetpu_dev *etdev = dev_get_drvdata(dev); struct edgetpu_list_device_client *lc; + ssize_t len; ssize_t ret = 0; mutex_lock(&etdev->clients_lock); for_each_list_device_client(etdev, lc) { - ret += scnprintf(buf, PAGE_SIZE - ret, - "pid %d tgid %d wakelock %d\n", - lc->client->pid, lc->client->tgid, - NO_WAKELOCK(lc->client->wakelock) ? - 0 : lc->client->wakelock->req_count); - buf += ret; + struct edgetpu_device_group *group; + + mutex_lock(&lc->client->group_lock); + group = lc->client->group; + len = scnprintf(buf, PAGE_SIZE - ret, + "pid %d tgid %d group %d wakelock %d\n", + lc->client->pid, lc->client->tgid, + group ? group->workload_id : -1, + NO_WAKELOCK(lc->client->wakelock) ? + 0 : lc->client->wakelock->req_count); + mutex_unlock(&lc->client->group_lock); + buf += len; + ret += len; } mutex_unlock(&etdev->clients_lock); return ret; diff --git a/drivers/edgetpu/edgetpu-mailbox.c b/drivers/edgetpu/edgetpu-mailbox.c index eedde54..261eb52 100644 --- a/drivers/edgetpu/edgetpu-mailbox.c +++ b/drivers/edgetpu/edgetpu-mailbox.c @@ -1003,8 +1003,7 @@ static int edgetpu_mailbox_external_alloc_enable(struct edgetpu_client *client, if (ret) { while (i--) { id = ext_mailbox->descriptors[i].mailbox->mailbox_id; - if (edgetpu_mailbox_deactivate(group->etdev, id)) - etdev_err(group->etdev, "Deactivate mailbox %d failed", id); + edgetpu_mailbox_deactivate(group->etdev, id); } /* * Deactivate only fails if f/w is unresponsive which will put group @@ -1050,8 +1049,7 @@ void edgetpu_mailbox_external_disable_free_locked(struct edgetpu_device_group *g for (i = 0; i < ext_mailbox->count; i++) { id = ext_mailbox->descriptors[i].mailbox->mailbox_id; etdev_dbg(group->etdev, "Disabling mailbox: %d\n", id); - if (edgetpu_mailbox_deactivate(group->etdev, id)) - etdev_err(group->etdev, "Deactivate mailbox %d failed", id); + edgetpu_mailbox_deactivate(group->etdev, id); } /* * Deactivate only fails if f/w is unresponsive which will put group @@ -1092,8 +1090,13 @@ out: int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, int mailbox_id) { - int ret; + int ret = 0; + /* + * A successful enable_ext() increases the wakelock event which prevents wakelock being + * released, so theoretically the check fail here can only happen when enable_ext() is + * failed or not called before. + */ if (!edgetpu_wakelock_lock(client->wakelock)) { etdev_err(client->etdev, "Disabling mailbox %d needs wakelock acquired\n", mailbox_id); @@ -1107,10 +1110,7 @@ int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, int mailbox_id) } etdev_dbg(client->etdev, "Disabling mailbox: %d\n", mailbox_id); - ret = edgetpu_mailbox_deactivate(client->etdev, mailbox_id); - if (ret) - etdev_err(client->etdev, "Deactivate mailbox %d failed: %d", mailbox_id, ret); - + edgetpu_mailbox_deactivate(client->etdev, mailbox_id); out: if (!ret) edgetpu_wakelock_dec_event_locked(client->wakelock, @@ -1143,7 +1143,7 @@ int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid return ret; } -int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id) +void edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id) { struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices; const u32 bit = BIT(mailbox_id); @@ -1152,12 +1152,15 @@ int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id) mutex_lock(&eh->lock); if (bit & eh->fw_state) ret = edgetpu_kci_close_device(etdev->kci, mailbox_id); - if (!ret) { - eh->state &= ~bit; - eh->fw_state &= ~bit; - } + if (ret) + etdev_err(etdev, "Deactivate mailbox %d failed: %d", mailbox_id, ret); + /* + * Always clears the states, FW should never reject CLOSE_DEVICE requests unless it's + * unresponsive. + */ + eh->state &= ~bit; + eh->fw_state &= ~bit; mutex_unlock(&eh->lock); - return ret; } void edgetpu_handshake_clear_fw_state(struct edgetpu_handshake *eh) diff --git a/drivers/edgetpu/edgetpu-mailbox.h b/drivers/edgetpu/edgetpu-mailbox.h index 1ae4889..c4b1318 100644 --- a/drivers/edgetpu/edgetpu-mailbox.h +++ b/drivers/edgetpu/edgetpu-mailbox.h @@ -362,7 +362,7 @@ int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid /* * Similar to edgetpu_mailbox_activate() but sends CLOSE_DEVICE KCI instead. */ -int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id); +void edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id); /* Sets @eh->fw_state to 0. */ void edgetpu_handshake_clear_fw_state(struct edgetpu_handshake *eh); /* |