summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNrithya Kanakasabapathy <nrithya@google.com>2021-05-24 18:14:01 +0000
committerNrithya Kanakasabapathy <nrithya@google.com>2021-05-24 22:47:24 +0000
commit47ebe63e24684873c3692be05aeef437a6346fd7 (patch)
tree24ee6641c5ccfd34ead8d6e9df17aad172cd5df2
parent9e653d1d032ecba4f19e6e257e4e664ee8885d29 (diff)
downloadabrolhos-47ebe63e24684873c3692be05aeef437a6346fd7.tar.gz
Merge branch 'whitechapel' into android-gs-pixel-5.10
* whitechapel: (35 commits) edgetpu: abrolhos fix thermal notify null deref edgetpu: fix UAF write on edgetpu_sync_fence_create edgetpu: add firmware_is_loading utility edgetpu: move etdev state change into FW run / restart edgetpu: don't attempt FW restart on FW loading edgetpu: add firmware_pm_get edgetpu: abrolhos: remove power policy shutdown edgetpu: remove obsolete reg_window from client edgetpu: set FW status when run / restart finished edgetpu: handle tagged pointers in edgetpu_pin_user_pages edgetpu: swap the order of fw desc and etdev state lock edgetpu: enhance size of mapping overflow check edgetpu: refine user-space header documentation edgetpu: abrolhos: Change the minimum state to UUD edgetpu: don't signal signaled dma fence edgetpu: fix error handling on sync_fence creation edgetpu: remove etdev "open" field edgetpu: don't notify runtime of potentially recoverable fw crash ... Bug: 188735590, 189147298 Signed-off-by: Nrithya Kanakasabapathy <nrithya@google.com> Change-Id: I04c6acba0252e641f7bf99d07190b5991ae95566
-rw-r--r--.clang-format2
-rw-r--r--drivers/edgetpu/abrolhos-pm.c61
-rw-r--r--drivers/edgetpu/abrolhos-pm.h2
-rw-r--r--drivers/edgetpu/abrolhos-thermal.c28
-rw-r--r--drivers/edgetpu/edgetpu-core.c21
-rw-r--r--drivers/edgetpu/edgetpu-device-group.c120
-rw-r--r--drivers/edgetpu/edgetpu-device-group.h51
-rw-r--r--drivers/edgetpu/edgetpu-dmabuf.c50
-rw-r--r--drivers/edgetpu/edgetpu-firmware.c220
-rw-r--r--drivers/edgetpu/edgetpu-firmware.h2
-rw-r--r--drivers/edgetpu/edgetpu-fs.c31
-rw-r--r--drivers/edgetpu/edgetpu-internal.h11
-rw-r--r--drivers/edgetpu/edgetpu-kci.h14
-rw-r--r--drivers/edgetpu/edgetpu-mailbox.c62
-rw-r--r--drivers/edgetpu/edgetpu-mailbox.h43
-rw-r--r--drivers/edgetpu/edgetpu-usage-stats.c108
-rw-r--r--drivers/edgetpu/edgetpu.h81
-rw-r--r--drivers/edgetpu/mm-backport.h4
18 files changed, 581 insertions, 330 deletions
diff --git a/.clang-format b/.clang-format
index c24b147..3212542 100644
--- a/.clang-format
+++ b/.clang-format
@@ -52,7 +52,7 @@ BreakConstructorInitializersBeforeComma: false
#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
-ColumnLimit: 80
+ColumnLimit: 100
CommentPragmas: '^ IWYU pragma:'
#CompactNamespaces: false # Unknown to clang-format-4.0
ConstructorInitializerAllOnOneLineOrOnePerLine: false
diff --git a/drivers/edgetpu/abrolhos-pm.c b/drivers/edgetpu/abrolhos-pm.c
index e9b3458..b8a12eb 100644
--- a/drivers/edgetpu/abrolhos-pm.c
+++ b/drivers/edgetpu/abrolhos-pm.c
@@ -409,6 +409,7 @@ static int abrolhos_get_initial_pwr_state(struct device *dev)
case TPU_DEEP_SLEEP_CLOCKS_SLOW:
case TPU_DEEP_SLEEP_CLOCKS_FAST:
case TPU_RETENTION_CLOCKS_SLOW:
+ case TPU_ACTIVE_UUD:
case TPU_ACTIVE_SUD:
case TPU_ACTIVE_UD:
case TPU_ACTIVE_NOM:
@@ -438,7 +439,6 @@ static int abrolhos_power_up(struct edgetpu_pm *etpm)
struct abrolhos_platform_dev *abpdev = to_abrolhos_dev(etdev);
int ret = abrolhos_pwr_state_set(
etpm->etdev, abrolhos_get_initial_pwr_state(etdev->dev));
- enum edgetpu_firmware_status firmware_status;
etdev_info(etpm->etdev, "Powering up\n");
@@ -465,19 +465,26 @@ static int abrolhos_power_up(struct edgetpu_pm *etpm)
if (!etdev->firmware)
return 0;
- firmware_status = edgetpu_firmware_status_locked(etdev);
- if (firmware_status == FW_LOADING)
+ /*
+ * Why this function uses edgetpu_firmware_*_locked functions without explicitly holding
+ * edgetpu_firmware_lock:
+ *
+ * edgetpu_pm_get() is called in two scenarios - one is when the firmware loading is
+ * attempting, another one is when the user-space clients need the device be powered
+ * (usually through acquiring the wakelock).
+ *
+ * For the first scenario edgetpu_firmware_is_loading() below shall return true.
+ * For the second scenario we are indeed called without holding the firmware lock, but the
+ * firmware loading procedures (i.e. the first scenario) always call edgetpu_pm_get() before
+ * changing the firmware state, and edgetpu_pm_get() is blocked until this function
+ * finishes. In short, we are protected by the PM lock.
+ */
+
+ if (edgetpu_firmware_is_loading(etdev))
return 0;
/* attempt firmware run */
- mutex_lock(&etdev->state_lock);
- if (etdev->state == ETDEV_STATE_FWLOADING) {
- mutex_unlock(&etdev->state_lock);
- return -EAGAIN;
- }
- etdev->state = ETDEV_STATE_FWLOADING;
- mutex_unlock(&etdev->state_lock);
- switch (firmware_status) {
+ switch (edgetpu_firmware_status_locked(etdev)) {
case FW_VALID:
ret = edgetpu_firmware_restart_locked(etdev);
break;
@@ -489,15 +496,6 @@ static int abrolhos_power_up(struct edgetpu_pm *etpm)
default:
break;
}
- mutex_lock(&etdev->state_lock);
- if (ret == -EIO)
- etdev->state = ETDEV_STATE_BAD; /* f/w handshake error */
- else if (ret)
- etdev->state = ETDEV_STATE_NOFW; /* other errors */
- else
- etdev->state = ETDEV_STATE_GOOD; /* f/w handshake success */
- mutex_unlock(&etdev->state_lock);
-
if (ret) {
abrolhos_power_down(etpm);
} else {
@@ -519,27 +517,14 @@ abrolhos_pm_shutdown_firmware(struct abrolhos_platform_dev *etpdev,
return;
etdev_warn(etdev, "Firmware shutdown request failed!\n");
- etdev_warn(etdev, "Attempting firmware restart\n");
-
- if (!edgetpu_firmware_restart_locked(etdev) &&
- !edgetpu_pchannel_power_down(etdev, false))
- return;
+ etdev_warn(etdev, "Requesting early GSA reset\n");
- edgetpu_kci_cancel_work_queues(etdev->kci);
- etdev_warn(etdev, "Forcing shutdown through power policy\n");
- /* Request GSA shutdown to make sure the R52 core is reset */
- gsa_send_tpu_cmd(etpdev->gsa_dev, GSA_TPU_SHUTDOWN);
- abrolhos_pwr_policy_set(abpdev, TPU_OFF);
- pm_runtime_put_sync(etdev->dev);
/*
- * TODO: experiment on hardware to verify if this delay
- * is needed, what is a good value or an alternative way
- * to make sure the power policy request turned the
- * device off.
+ * p-channel failed, request GSA shutdown to make sure the R52 core is
+ * reset.
+ * The GSA->APM request will clear any pending DVFS status from R52.
*/
- msleep(100);
- pm_runtime_get_sync(etdev->dev);
- abrolhos_pwr_policy_set(abpdev, TPU_ACTIVE_OD);
+ gsa_send_tpu_cmd(etpdev->gsa_dev, GSA_TPU_SHUTDOWN);
}
static void abrolhos_pm_cleanup_bts_scenario(struct edgetpu_dev *etdev)
diff --git a/drivers/edgetpu/abrolhos-pm.h b/drivers/edgetpu/abrolhos-pm.h
index bf0128f..0f7bdf0 100644
--- a/drivers/edgetpu/abrolhos-pm.h
+++ b/drivers/edgetpu/abrolhos-pm.h
@@ -48,6 +48,7 @@ static inline int exynos_acpm_set_policy(unsigned int id, unsigned long policy)
* 4: Sleep Clocks Off
* 5: Sleep Clocks Slow
* 6: Retention Clocks Slow
+ * 226000: Ultra Underdrive @226MHz
* 500000: Super Underdrive @500MHz
* 800000: Underdrive @800MHz
* 1000000: Nominal @1066MHz
@@ -61,6 +62,7 @@ enum tpu_pwr_state {
TPU_SLEEP_CLOCKS_OFF = 4,
TPU_SLEEP_CLOCKS_SLOW = 5,
TPU_RETENTION_CLOCKS_SLOW = 6,
+ TPU_ACTIVE_UUD = 226000,
TPU_ACTIVE_SUD = 500000,
TPU_ACTIVE_UD = 800000,
TPU_ACTIVE_NOM = 1066000,
diff --git a/drivers/edgetpu/abrolhos-thermal.c b/drivers/edgetpu/abrolhos-thermal.c
index 84be142..106af86 100644
--- a/drivers/edgetpu/abrolhos-thermal.c
+++ b/drivers/edgetpu/abrolhos-thermal.c
@@ -61,18 +61,18 @@ static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev,
pwr_state = state_pwr_map[state_original].state;
if (state_original != cooling->cooling_state) {
/*
- * Cap the minimum state we request here.
- * We cannot go to states below SUD until firmware/runtime
- * handshake is added.
+ * Set the thermal policy through ACPM to allow cooling by DVFS. Any states lower
+ * than UUD should be handled by firmware when it gets the throttling notification
+ * KCI
*/
- if (pwr_state < TPU_ACTIVE_SUD) {
+ if (pwr_state < TPU_ACTIVE_UUD) {
dev_warn_ratelimited(
- dev, "Unable to go to state %lu, going to %d",
- pwr_state, TPU_ACTIVE_SUD);
- pwr_state = TPU_ACTIVE_SUD;
+ dev, "Setting lowest DVFS state, waiting for FW to shutdown TPU");
+ ret = exynos_acpm_set_policy(TPU_ACPM_DOMAIN, TPU_ACTIVE_UUD);
+ } else {
+ ret = exynos_acpm_set_policy(TPU_ACPM_DOMAIN, pwr_state);
}
- ret = exynos_acpm_set_policy(TPU_ACPM_DOMAIN, pwr_state);
if (ret) {
dev_err(dev, "error setting tpu policy: %d\n", ret);
goto out;
@@ -81,13 +81,11 @@ static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev,
ret = edgetpu_kci_notify_throttling(etdev, pwr_state);
if (ret) {
/*
- * TODO(b/185596886) : After FW adds a handler for this
- * KCI, return the correct value of ret and change the
- * debug message to an error message
+ * TODO(b/185596886) : After FW adds a handler for this KCI, return the
+ * correct value of ret and change the debug message to an error message.
*/
- etdev_dbg(
- etdev, "Failed to notify FW about state %lu, error:%d",
- pwr_state, ret);
+ etdev_dbg(etdev, "Failed to notify FW about state %lu, error:%d",
+ pwr_state, ret);
ret = 0;
}
} else {
@@ -331,6 +329,7 @@ struct edgetpu_thermal
if (!thermal)
return ERR_PTR(-ENOMEM);
+ thermal->etdev = etdev;
err = tpu_thermal_init(thermal, dev);
if (err) {
devres_free(thermal);
@@ -338,6 +337,5 @@ struct edgetpu_thermal
}
devres_add(dev, thermal);
- thermal->etdev = etdev;
return thermal;
}
diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c
index 7294dd4..f1158a0 100644
--- a/drivers/edgetpu/edgetpu-core.c
+++ b/drivers/edgetpu/edgetpu-core.c
@@ -70,9 +70,8 @@ static int edgetpu_mmap_full_csr(struct edgetpu_client *client,
if (!uid_eq(current_euid(), GLOBAL_ROOT_UID))
return -EPERM;
vma_size = vma->vm_end - vma->vm_start;
- map_size = min(vma_size, client->reg_window.size);
- phys_base = client->etdev->regs.phys +
- client->reg_window.start_reg_offset;
+ map_size = min_t(ulong, vma_size, client->etdev->regs.size);
+ phys_base = client->etdev->regs.phys;
ret = io_remap_pfn_range(vma, vma->vm_start, phys_base >> PAGE_SHIFT,
map_size, vma->vm_page_prot);
if (ret)
@@ -367,7 +366,6 @@ int edgetpu_device_add(struct edgetpu_dev *etdev,
etdev->mcp_die_index);
}
- mutex_init(&etdev->open.lock);
mutex_init(&etdev->groups_lock);
INIT_LIST_HEAD(&etdev->groups);
etdev->n_groups = 0;
@@ -467,9 +465,6 @@ struct edgetpu_client *edgetpu_client_add(struct edgetpu_dev *etdev)
return ERR_PTR(-ENOMEM);
}
- /* Allow entire CSR space to be mmap()'ed using 1.0 interface */
- client->reg_window.start_reg_offset = 0;
- client->reg_window.size = etdev->regs.size;
client->pid = current->pid;
client->tgid = current->tgid;
client->etdev = etdev;
@@ -583,13 +578,15 @@ void edgetpu_free_coherent(struct edgetpu_dev *etdev,
void edgetpu_handle_firmware_crash(struct edgetpu_dev *etdev,
enum edgetpu_fw_crash_type crash_type)
{
- etdev_err(etdev, "firmware crashed: %u", crash_type);
- etdev->firmware_crash_count++;
- edgetpu_fatal_error_notify(etdev);
-
- if (crash_type == EDGETPU_FW_CRASH_UNRECOV_FAULT)
+ if (crash_type == EDGETPU_FW_CRASH_UNRECOV_FAULT) {
+ etdev_err(etdev, "firmware unrecoverable crash");
+ etdev->firmware_crash_count++;
+ edgetpu_fatal_error_notify(etdev);
/* Restart firmware without chip reset */
edgetpu_watchdog_bite(etdev, false);
+ } else {
+ etdev_err(etdev, "firmware crash event: %u", crash_type);
+ }
}
int __init edgetpu_init(void)
diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c
index 9d00a0f..987e4ab 100644
--- a/drivers/edgetpu/edgetpu-device-group.c
+++ b/drivers/edgetpu/edgetpu-device-group.c
@@ -119,16 +119,11 @@ static int edgetpu_group_kci_open_device(struct edgetpu_device_group *group)
if (edgetpu_group_mailbox_detached_locked(group))
return 0;
mailbox_id = edgetpu_group_context_id_locked(group);
- ret = edgetpu_kci_open_device(group->etdev->kci, BIT(mailbox_id));
- /*
- * This should only happen when the FW hasn't driven this KCI, log once
- * to prevent log storm.
- */
+ ret = edgetpu_mailbox_activate(group->etdev, BIT(mailbox_id));
if (ret)
- etdev_warn_once(group->etdev, "Open device failed with %d",
- ret);
+ etdev_err(group->etdev, "activate mailbox failed with %d", ret);
atomic_inc(&group->etdev->job_count);
- return 0;
+ return ret;
}
static void edgetpu_group_kci_close_device(struct edgetpu_device_group *group)
@@ -139,15 +134,10 @@ static void edgetpu_group_kci_close_device(struct edgetpu_device_group *group)
if (edgetpu_group_mailbox_detached_locked(group))
return;
mailbox_id = edgetpu_group_context_id_locked(group);
- ret = edgetpu_kci_close_device(group->etdev->kci, BIT(mailbox_id));
-
- /*
- * This should only happen when the FW hasn't driven this KCI, log once
- * to prevent log storm.
- */
+ ret = edgetpu_mailbox_deactivate(group->etdev, BIT(mailbox_id));
if (ret)
- etdev_warn_once(group->etdev, "Close device failed with %d",
- ret);
+ etdev_err(group->etdev, "deactivate mailbox failed with %d",
+ ret);
return;
}
@@ -365,6 +355,12 @@ static void do_detach_mailbox_locked(struct edgetpu_device_group *group)
group->context_id = EDGETPU_CONTEXT_INVALID;
}
+static inline bool is_finalized_or_errored(struct edgetpu_device_group *group)
+{
+ return edgetpu_device_group_is_finalized(group) ||
+ edgetpu_device_group_is_errored(group);
+}
+
int edgetpu_group_set_eventfd(struct edgetpu_device_group *group, uint event_id,
int eventfd)
{
@@ -441,7 +437,7 @@ static void edgetpu_device_group_release(struct edgetpu_device_group *group)
struct edgetpu_dev *etdev;
edgetpu_group_clear_events(group);
- if (edgetpu_device_group_is_finalized(group)) {
+ if (is_finalized_or_errored(group)) {
for (i = 0; i < group->n_clients; i++) {
etdev = edgetpu_device_group_nth_etdev(group, i);
edgetpu_sw_wdt_dec_active_ref(etdev);
@@ -569,8 +565,7 @@ static bool edgetpu_in_any_group_locked(struct edgetpu_dev *etdev)
return etdev->n_groups;
}
-/* caller must hold the client's etdev state_lock. */
-void edgetpu_device_group_leave_locked(struct edgetpu_client *client)
+void edgetpu_device_group_leave(struct edgetpu_client *client)
{
struct edgetpu_device_group *group;
struct edgetpu_list_group *l;
@@ -592,7 +587,7 @@ void edgetpu_device_group_leave_locked(struct edgetpu_client *client)
if (edgetpu_device_group_is_waiting(group)) {
if (edgetpu_device_group_leader(group) == client)
will_disband = true;
- } else if (edgetpu_device_group_is_finalized(group)) {
+ } else if (is_finalized_or_errored(group)) {
will_disband = true;
}
@@ -627,22 +622,6 @@ void edgetpu_device_group_leave_locked(struct edgetpu_client *client)
mutex_unlock(&client->etdev->groups_lock);
}
-void edgetpu_device_group_leave(struct edgetpu_client *client)
-{
- mutex_lock(&client->etdev->state_lock);
- /*
- * State might not be GOOD here if the wdt timeout
- * action is working or initial fw load failed. If wdt worker
- * is running let it perform the group leaving.
- */
- etdev_dbg(client->etdev, "%s: state=%u\n",
- __func__, client->etdev->state);
- if (client->etdev->state == ETDEV_STATE_GOOD ||
- client->etdev->state == ETDEV_STATE_NOFW)
- edgetpu_device_group_leave_locked(client);
- mutex_unlock(&client->etdev->state_lock);
-}
-
struct edgetpu_device_group *
edgetpu_device_group_alloc(struct edgetpu_client *client,
const struct edgetpu_mailbox_attr *attr)
@@ -707,7 +686,7 @@ edgetpu_device_group_alloc(struct edgetpu_client *client,
return group;
error_leave_group:
- edgetpu_device_group_leave_locked(client);
+ edgetpu_device_group_leave(client);
error_put_group:
edgetpu_device_group_put(group);
error:
@@ -785,7 +764,7 @@ int edgetpu_device_group_finalize(struct edgetpu_device_group *group)
mutex_lock(&group->lock);
/* do nothing if the group is finalized */
- if (edgetpu_device_group_is_finalized(group))
+ if (is_finalized_or_errored(group))
goto err_unlock;
if (!edgetpu_device_group_is_waiting(group)) {
@@ -846,6 +825,7 @@ int edgetpu_device_group_finalize(struct edgetpu_device_group *group)
edgetpu_usr_init_group(group);
+ /* send KCI only if the device is powered on */
if (edgetpu_wakelock_count_locked(leader->wakelock)) {
ret = edgetpu_device_group_kci_finalized(group);
if (ret)
@@ -1108,9 +1088,9 @@ static void edgetpu_host_map_show(struct edgetpu_mapping *map,
*/
static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group,
struct edgetpu_map_ioctl *arg,
- uint *pnum_pages)
+ uint *pnum_pages, bool *preadonly)
{
- u64 host_addr = arg->host_address;
+ u64 host_addr = untagged_addr(arg->host_address);
u64 size = arg->size;
const enum dma_data_direction dir = arg->flags & EDGETPU_MAP_DIR_MASK;
uint num_pages;
@@ -1119,12 +1099,14 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group,
struct page **pages;
int i;
int ret;
+ struct vm_area_struct *vma;
+ unsigned int foll_flags = FOLL_LONGTERM | FOLL_WRITE;
if (size == 0)
return ERR_PTR(-EINVAL);
offset = host_addr & (PAGE_SIZE - 1);
/* overflow check */
- if (unlikely(size / PAGE_SIZE >= UINT_MAX || size + offset < size))
+ if (unlikely((size + offset) / PAGE_SIZE >= UINT_MAX - 1 || size + offset < size))
return ERR_PTR(-ENOMEM);
num_pages = (size + offset) / PAGE_SIZE;
if ((size + offset) % PAGE_SIZE)
@@ -1141,20 +1123,20 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group,
return ERR_PTR(-ENOMEM);
/*
- * DMA Buffers appear to be always dirty, so mark pages as always writeable
- */
- ret = pin_user_pages_fast(host_addr & PAGE_MASK, num_pages,
- FOLL_WRITE | FOLL_LONGTERM, pages);
-
- /*
- * TODO(b/186876297): finds a way to detect the read / write permission.
* The host pages might be read-only and could fail if we attempt to pin
- * it with FOLL_WRITE. Removes it and tries again.
+ * it with FOLL_WRITE.
+ * default to read/write if find_extend_vma returns NULL
*/
- if (ret == -EFAULT)
- ret = pin_user_pages_fast(host_addr & PAGE_MASK, num_pages,
- FOLL_LONGTERM, pages);
+ vma = find_extend_vma(current->mm, host_addr & PAGE_MASK);
+ if (vma && !(vma->vm_flags & VM_WRITE)) {
+ foll_flags &= ~FOLL_WRITE;
+ *preadonly = true;
+ } else {
+ *preadonly = false;
+ }
+ ret = pin_user_pages_fast(host_addr & PAGE_MASK, num_pages, foll_flags,
+ pages);
if (ret < 0) {
etdev_dbg(etdev, "get user pages failed %u:%pK-%u: %d",
group->workload_id, (void *)host_addr, num_pages,
@@ -1374,18 +1356,24 @@ int edgetpu_device_group_map(struct edgetpu_device_group *group,
enum edgetpu_context_id context_id;
const u32 mmu_flags = map_to_mmu_flags(flags) | EDGETPU_MMU_HOST;
int i;
+ bool readonly;
if (!valid_dma_direction(flags & EDGETPU_MAP_DIR_MASK))
return -EINVAL;
/* Pin user pages before holding any lock. */
- pages = edgetpu_pin_user_pages(group, arg, &num_pages);
+ pages = edgetpu_pin_user_pages(group, arg, &num_pages, &readonly);
if (IS_ERR(pages))
return PTR_ERR(pages);
+ /* If the host pages are read-only, fallback to use DMA_TO_DEVICE. */
+ if (readonly) {
+ flags &= ~EDGETPU_MAP_DIR_MASK;
+ flags |= EDGETPU_MAP_DMA_TO_DEVICE;
+ }
mutex_lock(&group->lock);
context_id = edgetpu_group_context_id_locked(group);
if (!edgetpu_device_group_is_finalized(group)) {
- ret = -EINVAL;
+ ret = edgetpu_group_errno(group);
goto error;
}
if (!IS_MIRRORED(flags)) {
@@ -1461,7 +1449,7 @@ int edgetpu_device_group_unmap(struct edgetpu_device_group *group,
mutex_lock(&group->lock);
if (!edgetpu_device_group_is_finalized(group)) {
- ret = -EINVAL;
+ ret = edgetpu_group_errno(group);
goto unlock_group;
}
@@ -1506,7 +1494,7 @@ int edgetpu_device_group_sync_buffer(struct edgetpu_device_group *group,
mutex_lock(&group->lock);
if (!edgetpu_device_group_is_finalized(group)) {
- ret = -EINVAL;
+ ret = edgetpu_group_errno(group);
goto unlock_group;
}
@@ -1545,6 +1533,9 @@ void edgetpu_group_mappings_show(struct edgetpu_device_group *group,
case EDGETPU_DEVICE_GROUP_WAITING:
case EDGETPU_DEVICE_GROUP_FINALIZED:
break;
+ case EDGETPU_DEVICE_GROUP_ERRORED:
+ seq_puts(s, " (errored)");
+ break;
case EDGETPU_DEVICE_GROUP_DISBANDED:
seq_puts(s, ": disbanded\n");
return;
@@ -1590,7 +1581,7 @@ int edgetpu_mmap_csr(struct edgetpu_device_group *group,
mutex_lock(&group->lock);
if (!edgetpu_group_finalized_and_attached(group)) {
- ret = -EINVAL;
+ ret = edgetpu_group_errno(group);
goto out;
}
@@ -1617,7 +1608,7 @@ int edgetpu_mmap_queue(struct edgetpu_device_group *group,
mutex_lock(&group->lock);
if (!edgetpu_group_finalized_and_attached(group)) {
- ret = -EINVAL;
+ ret = edgetpu_group_errno(group);
goto out;
}
@@ -1665,8 +1656,12 @@ void edgetpu_group_detach_mailbox_locked(struct edgetpu_device_group *group)
void edgetpu_group_close_and_detach_mailbox(struct edgetpu_device_group *group)
{
mutex_lock(&group->lock);
- /* only a finalized group may have mailbox attached */
- if (edgetpu_device_group_is_finalized(group)) {
+ /*
+ * Only a finalized group may have mailbox attached.
+ *
+ * Detaching mailbox for an errored group is also fine.
+ */
+ if (is_finalized_or_errored(group)) {
edgetpu_group_kci_close_device(group);
edgetpu_group_detach_mailbox_locked(group);
}
@@ -1687,7 +1682,10 @@ int edgetpu_group_attach_and_open_mailbox(struct edgetpu_device_group *group)
int ret = 0;
mutex_lock(&group->lock);
- /* only attaching mailbox for finalized groups */
+ /*
+ * Only attaching mailbox for finalized groups.
+ * Don't attach mailbox for errored groups.
+ */
if (edgetpu_device_group_is_finalized(group)) {
ret = edgetpu_group_attach_mailbox_locked(group);
if (!ret)
diff --git a/drivers/edgetpu/edgetpu-device-group.h b/drivers/edgetpu/edgetpu-device-group.h
index e0cd2a3..3a5e252 100644
--- a/drivers/edgetpu/edgetpu-device-group.h
+++ b/drivers/edgetpu/edgetpu-device-group.h
@@ -34,6 +34,13 @@ enum edgetpu_device_group_status {
EDGETPU_DEVICE_GROUP_WAITING,
/* Most operations can only apply on a finalized group. */
EDGETPU_DEVICE_GROUP_FINALIZED,
+ /*
+ * When a fatal error occurs, groups in FINALIZED status are transformed
+ * into this state. Operations on groups with this status mostly return
+ * ECANCELED. Once a member leaves an ERRORED group, the status is
+ * transitioned to DISBANDED.
+ */
+ EDGETPU_DEVICE_GROUP_ERRORED,
/* No operations except member leaving can be performed. */
EDGETPU_DEVICE_GROUP_DISBANDED,
};
@@ -126,10 +133,10 @@ struct edgetpu_list_group {
/*
* Returns if the group is waiting for members to join.
*
- * Must be called with lock held.
+ * Caller holds @group->lock.
*/
-static inline bool edgetpu_device_group_is_waiting(
- const struct edgetpu_device_group *group)
+static inline bool
+edgetpu_device_group_is_waiting(const struct edgetpu_device_group *group)
{
return group->status == EDGETPU_DEVICE_GROUP_WAITING;
}
@@ -137,7 +144,7 @@ static inline bool edgetpu_device_group_is_waiting(
/*
* Returns if the group is finalized.
*
- * Must be called with lock held.
+ * Caller holds @group->lock.
*/
static inline bool
edgetpu_device_group_is_finalized(const struct edgetpu_device_group *group)
@@ -146,16 +153,40 @@ edgetpu_device_group_is_finalized(const struct edgetpu_device_group *group)
}
/*
+ * Returns if the group is errored.
+ *
+ * Caller holds @group->lock.
+ */
+static inline bool
+edgetpu_device_group_is_errored(const struct edgetpu_device_group *group)
+{
+ return group->status == EDGETPU_DEVICE_GROUP_ERRORED;
+}
+
+/*
* Returns if the group is disbanded.
*
- * Must be called with lock held.
+ * Caller holds @group->lock.
*/
-static inline bool edgetpu_device_group_is_disbanded(
- const struct edgetpu_device_group *group)
+static inline bool
+edgetpu_device_group_is_disbanded(const struct edgetpu_device_group *group)
{
return group->status == EDGETPU_DEVICE_GROUP_DISBANDED;
}
+/*
+ * Returns -ECANCELED if the status of group is ERRORED, otherwise returns
+ * -EINVAL.
+ *
+ * Caller holds @group->lock.
+ */
+static inline int edgetpu_group_errno(struct edgetpu_device_group *group)
+{
+ if (edgetpu_device_group_is_errored(group))
+ return -ECANCELED;
+ return -EINVAL;
+}
+
/* Increases ref_count of @group by one and returns @group. */
static inline struct edgetpu_device_group *
edgetpu_device_group_get(struct edgetpu_device_group *group)
@@ -220,8 +251,7 @@ static inline struct edgetpu_dev *edgetpu_device_group_nth_etdev(
}
/*
- * Let @client leave the group it belongs to. Caller should hold the client's
- * etdev state_lock.
+ * Let @client leave the group it belongs to.
*
* If @client is the leader of a group, the group will be marked as "disbanded".
*
@@ -235,9 +265,6 @@ static inline struct edgetpu_dev *edgetpu_device_group_nth_etdev(
* @client->group will be removed from @client->etdev->groups.
* @client->group will be set as NULL.
*/
-void edgetpu_device_group_leave_locked(struct edgetpu_client *client);
-
-/* Let @client leave the group. Device should be in good state, warn if not. */
void edgetpu_device_group_leave(struct edgetpu_client *client);
/* Returns whether @client is the leader of @group. */
diff --git a/drivers/edgetpu/edgetpu-dmabuf.c b/drivers/edgetpu/edgetpu-dmabuf.c
index c2bf3ae..03918ff 100644
--- a/drivers/edgetpu/edgetpu-dmabuf.c
+++ b/drivers/edgetpu/edgetpu-dmabuf.c
@@ -684,8 +684,10 @@ int edgetpu_map_dmabuf(struct edgetpu_device_group *group,
goto err_put;
mutex_lock(&group->lock);
- if (!edgetpu_device_group_is_finalized(group))
+ if (!edgetpu_device_group_is_finalized(group)) {
+ ret = edgetpu_group_errno(group);
goto err_unlock_group;
+ }
dmap = alloc_dmabuf_map(group, flags);
if (!dmap) {
@@ -753,9 +755,10 @@ int edgetpu_unmap_dmabuf(struct edgetpu_device_group *group, u32 die_index,
int ret = -EINVAL;
mutex_lock(&group->lock);
- /* the group is disbanded means all the mappings have been released */
- if (!edgetpu_device_group_is_finalized(group))
+ if (!edgetpu_device_group_is_finalized(group)) {
+ ret = edgetpu_group_errno(group);
goto out_unlock;
+ }
edgetpu_mapping_lock(mappings);
map = edgetpu_mapping_find_locked(mappings, die_index, tpu_addr);
if (!map)
@@ -790,8 +793,10 @@ int edgetpu_map_bulk_dmabuf(struct edgetpu_device_group *group,
if (!valid_dma_direction(dir) || arg->size == 0)
return -EINVAL;
mutex_lock(&group->lock);
- if (!edgetpu_device_group_is_finalized(group))
+ if (!edgetpu_device_group_is_finalized(group)) {
+ ret = edgetpu_group_errno(group);
goto err_unlock_group;
+ }
/* checks not all FDs are ignored */
for (i = 0; i < group->n_clients; i++)
if (arg->dmabuf_fds[i] != EDGETPU_IGNORE_FD)
@@ -880,7 +885,7 @@ static void edgetpu_dma_fence_release(struct dma_fence *fence)
struct edgetpu_dma_fence *etfence = to_etfence(fence);
unsigned long flags;
- if (!fence)
+ if (!etfence)
return;
spin_lock_irqsave(&etfence_list_lock, flags);
@@ -911,17 +916,26 @@ static const struct dma_fence_ops edgetpu_dma_fence_ops = {
int edgetpu_sync_fence_create(struct edgetpu_create_sync_fence_data *datap)
{
- int fd;
+ int fd = get_unused_fd_flags(O_CLOEXEC);
int ret;
struct edgetpu_dma_fence *etfence;
struct sync_file *sync_file;
unsigned long flags;
+ if (fd < 0)
+ return fd;
etfence = kzalloc(sizeof(*etfence), GFP_KERNEL);
- if (!etfence)
- return -ENOMEM;
+ if (!etfence) {
+ ret = -ENOMEM;
+ goto err_put_fd;
+ }
spin_lock_init(&etfence->lock);
+ /*
+ * If sync_file_create() fails, fence release is called on dma_fence_put(). A valid
+ * list_head is needed for list_del().
+ */
+ INIT_LIST_HEAD(&etfence->etfence_list);
memcpy(&etfence->timeline_name, &datap->timeline_name,
EDGETPU_SYNC_TIMELINE_NAME_LEN - 1);
@@ -933,19 +947,20 @@ int edgetpu_sync_fence_create(struct edgetpu_create_sync_fence_data *datap)
dma_fence_put(&etfence->fence);
if (!sync_file) {
ret = -ENOMEM;
- goto err_freefence;
+ /* doesn't need kfree(etfence) here: dma_fence_put does it for us */
+ goto err_put_fd;
}
- fd = get_unused_fd_flags(O_CLOEXEC);
- datap->fence = fd;
- fd_install(fd, sync_file->file);
spin_lock_irqsave(&etfence_list_lock, flags);
list_add_tail(&etfence->etfence_list, &etfence_list_head);
spin_unlock_irqrestore(&etfence_list_lock, flags);
+
+ fd_install(fd, sync_file->file);
+ datap->fence = fd;
return 0;
-err_freefence:
- kfree(etfence);
+err_put_fd:
+ put_unused_fd(fd);
return ret;
}
@@ -966,6 +981,11 @@ int edgetpu_sync_fence_signal(struct edgetpu_signal_sync_fence_data *datap)
return -EINVAL;
spin_lock_irq(fence->lock);
+ /* don't signal fence twice */
+ if (unlikely(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
pr_debug("%s: %s-%s%llu-" SEQ_FMT " errno=%d\n", __func__,
fence->ops->get_driver_name(fence),
fence->ops->get_timeline_name(fence), fence->context,
@@ -973,6 +993,8 @@ int edgetpu_sync_fence_signal(struct edgetpu_signal_sync_fence_data *datap)
if (errno)
dma_fence_set_error(fence, errno);
ret = dma_fence_signal_locked(fence);
+
+out_unlock:
spin_unlock_irq(fence->lock);
dma_fence_put(fence);
return ret;
diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c
index 8ae808b..d0dc575 100644
--- a/drivers/edgetpu/edgetpu-firmware.c
+++ b/drivers/edgetpu/edgetpu-firmware.c
@@ -263,7 +263,6 @@ static int edgetpu_firmware_handshake(struct edgetpu_firmware *et_fw)
fw_flavor = edgetpu_kci_fw_info(etdev->kci, &et_fw->p->fw_info);
if (fw_flavor < 0) {
etdev_err(etdev, "firmware handshake failed: %d", fw_flavor);
- et_fw->p->status = FW_INVALID;
et_fw->p->fw_info.fw_flavor = FW_FLAVOR_UNKNOWN;
et_fw->p->fw_info.fw_changelist = 0;
et_fw->p->fw_info.fw_build_time = 0;
@@ -281,10 +280,9 @@ static int edgetpu_firmware_handshake(struct edgetpu_firmware *et_fw)
} else {
etdev_dbg(etdev, "loaded stage 2 bootloader");
}
- et_fw->p->status = FW_VALID;
/* In case older firmware that doesn't fill out fw_info. */
et_fw->p->fw_info.fw_flavor = fw_flavor;
- /* Hermosa second-stage bootloader doesn't implement log/trace */
+ /* don't attempt log/trace handshake if it's the second-stage bootloader */
if (fw_flavor != FW_FLAVOR_BL1) {
int ret = edgetpu_telemetry_kci(etdev);
@@ -294,6 +292,55 @@ static int edgetpu_firmware_handshake(struct edgetpu_firmware *et_fw)
return 0;
}
+/*
+ * Do edgetpu_pm_get() but prevent it from running the loaded firmware.
+ *
+ * On success, caller must later call edgetpu_pm_put() to decrease the reference count.
+ *
+ * Caller holds firmware lock.
+ */
+static int edgetpu_firmware_pm_get(struct edgetpu_firmware *et_fw)
+{
+ enum edgetpu_firmware_status prev = et_fw->p->status;
+ int ret;
+
+ /* Prevent platform-specific code from trying to run the previous firmware */
+ et_fw->p->status = FW_LOADING;
+ etdev_dbg(et_fw->etdev, "Requesting power up for firmware run\n");
+ ret = edgetpu_pm_get(et_fw->etdev->pm);
+ if (ret)
+ et_fw->p->status = prev;
+ return ret;
+}
+
+static void edgetpu_firmware_set_loading(struct edgetpu_firmware *et_fw)
+{
+ struct edgetpu_dev *etdev = et_fw->etdev;
+
+ mutex_lock(&etdev->state_lock);
+ etdev->state = ETDEV_STATE_FWLOADING;
+ mutex_unlock(&etdev->state_lock);
+
+ et_fw->p->status = FW_LOADING;
+}
+
+/* Set firmware and etdev state according to @ret, which can be an errno or 0. */
+static void edgetpu_firmware_set_state(struct edgetpu_firmware *et_fw, int ret)
+{
+ struct edgetpu_dev *etdev = et_fw->etdev;
+
+ et_fw->p->status = ret ? FW_INVALID : FW_VALID;
+
+ mutex_lock(&etdev->state_lock);
+ if (ret == -EIO)
+ etdev->state = ETDEV_STATE_BAD; /* f/w handshake error */
+ else if (ret)
+ etdev->state = ETDEV_STATE_NOFW; /* other errors */
+ else
+ etdev->state = ETDEV_STATE_GOOD; /* f/w handshake success */
+ mutex_unlock(&etdev->state_lock);
+}
+
enum edgetpu_fw_flavor
edgetpu_firmware_get_flavor(struct edgetpu_firmware *et_fw)
{
@@ -402,7 +449,7 @@ int edgetpu_firmware_run_locked(struct edgetpu_firmware *et_fw,
int ret;
bool is_bl1_run = (flags & FW_BL1);
- et_fw->p->status = FW_LOADING;
+ edgetpu_firmware_set_loading(et_fw);
if (!is_bl1_run)
edgetpu_sw_wdt_stop(et_fw->etdev);
@@ -442,6 +489,7 @@ int edgetpu_firmware_run_locked(struct edgetpu_firmware *et_fw,
handlers->launch_complete(et_fw);
else if (ret && handlers && handlers->launch_failed)
handlers->launch_failed(et_fw, ret);
+ edgetpu_firmware_set_state(et_fw, ret);
return ret;
out_unload_new_fw:
@@ -449,6 +497,7 @@ out_unload_new_fw:
out_failed:
if (handlers && handlers->launch_failed)
handlers->launch_failed(et_fw, ret);
+ edgetpu_firmware_set_state(et_fw, ret);
return ret;
}
@@ -457,57 +506,34 @@ int edgetpu_firmware_run(struct edgetpu_dev *etdev, const char *name,
{
struct edgetpu_firmware *et_fw = etdev->firmware;
int ret;
- enum edgetpu_dev_state prev_state;
if (!et_fw)
return -ENODEV;
- /*
- * All other operations on device will first check for device state
- * and then proceed.
- */
- mutex_lock(&etdev->state_lock);
- if (etdev->state == ETDEV_STATE_FWLOADING) {
- mutex_unlock(&etdev->state_lock);
- return -EAGAIN;
- }
- prev_state = etdev->state;
- etdev->state = ETDEV_STATE_FWLOADING;
- mutex_unlock(&etdev->state_lock);
ret = edgetpu_firmware_load_lock(etdev);
if (ret) {
etdev_err(etdev, "%s: lock failed (%d)\n", __func__, ret);
- mutex_lock(&etdev->state_lock);
- etdev->state = prev_state; /* restore etdev state */
- mutex_unlock(&etdev->state_lock);
return ret;
}
/* will be overwritten when we successfully parse the f/w header */
etdev->fw_version.kci_version = EDGETPU_INVALID_KCI_VERSION;
- /*
- * Prevent platform-specific code from trying to run the previous
- * firmware
- */
- et_fw->p->status = FW_LOADING;
- etdev_dbg(et_fw->etdev, "Requesting power up for firmware run\n");
- ret = edgetpu_pm_get(etdev->pm);
- if (!ret)
+ ret = edgetpu_firmware_pm_get(et_fw);
+ if (!ret) {
ret = edgetpu_firmware_run_locked(et_fw, name, flags);
- etdev->firmware = et_fw;
- edgetpu_pm_put(etdev->pm);
- edgetpu_firmware_load_unlock(etdev);
+ edgetpu_pm_put(etdev->pm);
+ }
- mutex_lock(&etdev->state_lock);
- if (ret == -EIO)
- etdev->state = ETDEV_STATE_BAD; /* f/w handshake error */
- else if (ret)
- etdev->state = ETDEV_STATE_NOFW; /* other errors */
- else
- etdev->state = ETDEV_STATE_GOOD; /* f/w handshake success */
- mutex_unlock(&etdev->state_lock);
+ edgetpu_firmware_load_unlock(etdev);
return ret;
}
+bool edgetpu_firmware_is_loading(struct edgetpu_dev *etdev)
+{
+ struct edgetpu_firmware *et_fw = etdev->firmware;
+
+ return et_fw && et_fw->p->status == FW_LOADING;
+}
+
/* Caller must hold firmware lock. */
enum edgetpu_firmware_status
edgetpu_firmware_status_locked(struct edgetpu_dev *etdev)
@@ -537,7 +563,7 @@ int edgetpu_firmware_restart_locked(struct edgetpu_dev *etdev)
const struct edgetpu_firmware_handlers *handlers = et_fw->p->handlers;
int ret = -1;
- et_fw->p->status = FW_LOADING;
+ edgetpu_firmware_set_loading(et_fw);
edgetpu_sw_wdt_stop(etdev);
/*
* Try restarting the firmware first, fall back to normal firmware start
@@ -548,11 +574,13 @@ int edgetpu_firmware_restart_locked(struct edgetpu_dev *etdev)
if (ret && handlers && handlers->prepare_run) {
ret = handlers->prepare_run(et_fw, &et_fw->p->fw_desc.buf);
if (ret)
- return ret;
+ goto out;
}
ret = edgetpu_firmware_handshake(et_fw);
if (!ret)
edgetpu_sw_wdt_start(etdev);
+out:
+ edgetpu_firmware_set_state(et_fw, ret);
return ret;
}
@@ -671,55 +699,51 @@ static const struct attribute_group edgetpu_firmware_attr_group = {
};
/*
- * Can only be called with etdev->state == ETDEV_STATE_FWLOADING.
+ * Sets all groups related to @etdev as errored.
*/
-static void edgetpu_abort_clients(struct edgetpu_dev *etdev)
+static void edgetpu_set_groups_error(struct edgetpu_dev *etdev)
{
- int i, num_clients = 0;
+ size_t i, num_groups = 0;
struct edgetpu_device_group *group;
+ struct edgetpu_device_group **groups;
struct edgetpu_list_group *g;
- struct edgetpu_client **clients;
- struct edgetpu_list_client *c;
- /*
- * We don't hold etdev->groups_lock here because
- * 1. All group operations (functions in edgetpu-device-group.c)
- * are skipped when "etdev->state is not GOOD", we shall be the
- * only one accessing @etdev->groups, and
- * 2. to prevent LOCKDEP from reporting deadlock with
- * edgetpu_device_group_add_locked, which nested holds group->lock
- * then etdev->groups_lock.
- */
- clients = kmalloc_array(etdev->n_groups, sizeof(*clients), GFP_KERNEL);
- if (!clients) {
+ mutex_lock(&etdev->groups_lock);
+ groups = kmalloc_array(etdev->n_groups, sizeof(*groups), GFP_KERNEL);
+ if (unlikely(!groups)) {
/*
- * Just give up aborting clients in this case, this should never
- * happen after all.
+ * Just give up setting status in this case, this only happens
+ * when the system is OOM.
*/
+ mutex_unlock(&etdev->groups_lock);
edgetpu_fatal_error_notify(etdev);
return;
}
+ /*
+ * Fetch the groups into an array to set the group status without
+ * holding @etdev->groups_lock. To prevent the potential deadlock that
+ * edgetpu_device_group_add() holds group->lock then etdev->groups_lock.
+ */
etdev_for_each_group(etdev, g, group) {
- mutex_lock(&group->lock);
- list_for_each_entry(c, &group->clients, list) {
- if (etdev == c->client->etdev) {
- clients[num_clients++] =
- edgetpu_client_get(c->client);
- break;
- }
- }
- mutex_unlock(&group->lock);
+ if (edgetpu_device_group_is_disbanded(group))
+ continue;
+ groups[num_groups++] = edgetpu_device_group_get(group);
}
- edgetpu_fatal_error_notify(etdev);
- for (i = 0; i < num_clients; i++) {
+ mutex_unlock(&etdev->groups_lock);
+ for (i = 0; i < num_groups; i++) {
+ group = groups[i];
+ mutex_lock(&group->lock);
/*
- * No need to hold state lock here since all group operations on
- * client are protected by state being GOOD.
+ * Only finalized groups may have handshake with the FW, mark
+ * them as errored.
*/
- edgetpu_device_group_leave_locked(clients[i]);
- edgetpu_client_put(clients[i]);
+ if (edgetpu_device_group_is_finalized(group))
+ group->status = EDGETPU_DEVICE_GROUP_ERRORED;
+ mutex_unlock(&group->lock);
+ edgetpu_device_group_put(group);
}
- kfree(clients);
+ edgetpu_fatal_error_notify(etdev);
+ kfree(groups);
}
static void edgetpu_firmware_wdt_timeout_action(void *data)
@@ -733,40 +757,26 @@ static void edgetpu_firmware_wdt_timeout_action(void *data)
if (!edgetpu_is_powered(etdev))
return;
- mutex_lock(&etdev->state_lock);
- if (etdev->state == ETDEV_STATE_FWLOADING) {
- mutex_unlock(&etdev->state_lock);
- return;
- }
- etdev->state = ETDEV_STATE_FWLOADING;
- mutex_unlock(&etdev->state_lock);
-
- edgetpu_abort_clients(etdev);
-
- ret = edgetpu_firmware_load_lock(etdev);
/*
- * edgetpu_firmware_load_lock() should always return success here as
- * etdev is already removed from all groups and fw loader exists.
+ * Zero the FW state of open mailboxes so that when the runtime releases
+ * groups the CLOSE_DEVICE KCIs won't be sent.
*/
- if (ret) {
- etdev_err(etdev, "%s: lock failed (%d)\n", __func__, ret);
+ edgetpu_handshake_clear_fw_state(&etdev->mailbox_manager->open_devices);
+ edgetpu_set_groups_error(etdev);
+
+ /* Another procedure is loading the firmware, let it do the work. */
+ if (edgetpu_firmware_is_loading(etdev))
return;
- }
- et_fw->p->status = FW_LOADING;
- ret = edgetpu_pm_get(etdev->pm);
- if (!ret)
- ret = edgetpu_firmware_restart_locked(etdev);
- edgetpu_pm_put(etdev->pm);
- edgetpu_firmware_load_unlock(etdev);
- mutex_lock(&etdev->state_lock);
- if (ret == -EIO)
- etdev->state = ETDEV_STATE_BAD;
- else if (ret)
- etdev->state = ETDEV_STATE_NOFW;
- else
- etdev->state = ETDEV_STATE_GOOD;
- mutex_unlock(&etdev->state_lock);
+ /* edgetpu_firmware_lock() here never fails */
+ edgetpu_firmware_lock(etdev);
+
+ ret = edgetpu_firmware_pm_get(et_fw);
+ if (!ret) {
+ ret = edgetpu_firmware_restart_locked(etdev);
+ edgetpu_pm_put(etdev->pm);
+ }
+ edgetpu_firmware_unlock(etdev);
}
int edgetpu_firmware_create(struct edgetpu_dev *etdev,
diff --git a/drivers/edgetpu/edgetpu-firmware.h b/drivers/edgetpu/edgetpu-firmware.h
index ad7c484..e41543d 100644
--- a/drivers/edgetpu/edgetpu-firmware.h
+++ b/drivers/edgetpu/edgetpu-firmware.h
@@ -205,6 +205,8 @@ int edgetpu_firmware_lock(struct edgetpu_dev *etdev);
int edgetpu_firmware_trylock(struct edgetpu_dev *etdev);
void edgetpu_firmware_unlock(struct edgetpu_dev *etdev);
+/* Returns whether the firmware loading work is ongoing. */
+bool edgetpu_firmware_is_loading(struct edgetpu_dev *etdev);
/*
* Returns the state of the firmware image currently loaded for this device.
diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c
index 265d5b2..db0f296 100644
--- a/drivers/edgetpu/edgetpu-fs.c
+++ b/drivers/edgetpu/edgetpu-fs.c
@@ -79,14 +79,9 @@ int edgetpu_open(struct edgetpu_dev *etdev, struct file *file)
/* Set client pointer to NULL if error creating client. */
file->private_data = NULL;
- mutex_lock(&etdev->open.lock);
client = edgetpu_client_add(etdev);
- if (IS_ERR(client)) {
- mutex_unlock(&etdev->open.lock);
+ if (IS_ERR(client))
return PTR_ERR(client);
- }
- etdev->open.count++;
- mutex_unlock(&etdev->open.lock);
file->private_data = client;
return 0;
}
@@ -110,28 +105,27 @@ static int edgetpu_fs_release(struct inode *inode, struct file *file)
etdev = client->etdev;
wakelock_count = edgetpu_wakelock_lock(client->wakelock);
-
+ mutex_lock(&client->group_lock);
/*
- * TODO(b/180528495): remove pm_get when disbanding can be performed
- * with device off.
+ * @wakelock = 0 means the device might be powered off. And for group with a non-detachable
+ * mailbox, its mailbox is removed when the group is released, in such case we need to
+ * ensure the device is powered to prevent kernel panic on programming VII mailbox CSRs.
+ *
+ * For mailbox-detachable groups the mailbox had been removed when the wakelock was
+ * released, edgetpu_device_group_release() doesn't need the device be powered in this case.
*/
- if (client->group && !wakelock_count) {
+ if (!wakelock_count && client->group && !client->group->mailbox_detachable) {
wakelock_count = 1;
edgetpu_pm_get(etdev->pm);
}
-
+ mutex_unlock(&client->group_lock);
edgetpu_wakelock_unlock(client->wakelock);
edgetpu_client_remove(client);
- mutex_lock(&etdev->open.lock);
- if (etdev->open.count)
- --etdev->open.count;
-
/* count was zero if client previously released its wake lock */
if (wakelock_count)
edgetpu_pm_put(etdev->pm);
- mutex_unlock(&etdev->open.lock);
return 0;
}
@@ -227,10 +221,7 @@ static int edgetpu_ioctl_finalize_group(struct edgetpu_client *client)
group = client->group;
if (!group || !edgetpu_device_group_is_leader(group, client))
goto out_unlock;
- /*
- * TODO(b/180528495): remove pm_get when finalization can be performed
- * with device off.
- */
+ /* Finalization has to be performed with device on. */
if (!wakelock_count) {
ret = edgetpu_pm_get(client->etdev->pm);
if (ret) {
diff --git a/drivers/edgetpu/edgetpu-internal.h b/drivers/edgetpu/edgetpu-internal.h
index dc83dd4..210d273 100644
--- a/drivers/edgetpu/edgetpu-internal.h
+++ b/drivers/edgetpu/edgetpu-internal.h
@@ -91,11 +91,6 @@ struct edgetpu_coherent_mem {
#endif
};
-struct edgetpu_reg_window {
- uint start_reg_offset;
- size_t size;
-};
-
struct edgetpu_device_group;
struct edgetpu_p2p_csr_map;
struct edgetpu_remote_dram_map;
@@ -128,8 +123,6 @@ struct edgetpu_client {
dma_addr_t *p2p_csrs_dma_addrs;
/* Peer DRAM dma addrs for this client, if has on-device DRAM */
dma_addr_t *remote_drams_dma_addrs;
- /* range of device CSRs mmap()'able */
- struct edgetpu_reg_window reg_window;
/* Per-client request to keep device active */
struct edgetpu_wakelock *wakelock;
/* Bit field of registered per die events */
@@ -170,10 +163,6 @@ struct edgetpu_dev {
struct cdev cdev; /* cdev char device structure */
dev_t devno; /* char device dev_t */
char dev_name[EDGETPU_DEVICE_NAME_MAX];
- struct {
- struct mutex lock;
- uint count; /* number times device currently opened */
- } open;
struct edgetpu_mapped_resource regs; /* ioremapped CSRs */
struct dentry *d_entry; /* debugfs dir for this device */
struct mutex state_lock; /* protects state of this device */
diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h
index 4d9de5f..05f87c8 100644
--- a/drivers/edgetpu/edgetpu-kci.h
+++ b/drivers/edgetpu/edgetpu-kci.h
@@ -343,10 +343,20 @@ int edgetpu_kci_shutdown(struct edgetpu_kci *kci);
int edgetpu_kci_get_debug_dump(struct edgetpu_kci *kci, tpu_addr_t tpu_addr,
size_t size);
-/* Inform the firmware to prepare to serve the VII of @mailbox_ids. */
+/*
+ * Inform the firmware to prepare to serve the VII of @mailbox_ids.
+ *
+ * You usually shouldn't call this directly - consider using
+ * edgetpu_mailbox_activate() instead.
+ */
int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_ids);
-/* Inform the firmware the VII with @mailbox_ids are closed. */
+/*
+ * Inform the firmware the VII with @mailbox_ids are closed.
+ *
+ * You usually shouldn't call this directly - consider using
+ * edgetpu_mailbox_deactivate() instead.
+ */
int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_ids);
/* Cancel work queues or wait until they're done */
diff --git a/drivers/edgetpu/edgetpu-mailbox.c b/drivers/edgetpu/edgetpu-mailbox.c
index de76bb8..11b6fe2 100644
--- a/drivers/edgetpu/edgetpu-mailbox.c
+++ b/drivers/edgetpu/edgetpu-mailbox.c
@@ -542,9 +542,9 @@ void edgetpu_mailbox_free_queue(struct edgetpu_dev *etdev,
/*
* Creates a mailbox manager, one edgetpu device has one manager.
*/
-struct edgetpu_mailbox_manager *edgetpu_mailbox_create_mgr(
- struct edgetpu_dev *etdev,
- const struct edgetpu_mailbox_manager_desc *desc)
+struct edgetpu_mailbox_manager *
+edgetpu_mailbox_create_mgr(struct edgetpu_dev *etdev,
+ const struct edgetpu_mailbox_manager_desc *desc)
{
struct edgetpu_mailbox_manager *mgr;
uint total = 0;
@@ -575,6 +575,7 @@ struct edgetpu_mailbox_manager *edgetpu_mailbox_create_mgr(
if (!mgr->mailboxes)
return ERR_PTR(-ENOMEM);
rwlock_init(&mgr->mailboxes_lock);
+ mutex_init(&mgr->open_devices.lock);
return mgr;
}
@@ -787,11 +788,11 @@ int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_ids)
edgetpu_wakelock_inc_event_locked(client->wakelock,
EDGETPU_WAKELOCK_EVENT_EXT_MAILBOX);
- etdev_dbg(client->etdev, "Opening mailboxes: %08X\n", mailbox_ids);
+ etdev_dbg(client->etdev, "Enabling mailboxes: %08X\n", mailbox_ids);
- ret = edgetpu_kci_open_device(client->etdev->kci, mailbox_ids);
+ ret = edgetpu_mailbox_activate(client->etdev, mailbox_ids);
if (ret)
- etdev_err(client->etdev, "Open mailboxes %08x failed (%d)\n",
+ etdev_err(client->etdev, "Activate mailboxes %08x failed: %d",
mailbox_ids, ret);
edgetpu_wakelock_unlock(client->wakelock);
return ret;
@@ -812,12 +813,55 @@ int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_ids)
edgetpu_wakelock_dec_event_locked(client->wakelock,
EDGETPU_WAKELOCK_EVENT_EXT_MAILBOX);
- etdev_dbg(client->etdev, "Closing mailbox: %08X\n", mailbox_ids);
- ret = edgetpu_kci_close_device(client->etdev->kci, mailbox_ids);
+ etdev_dbg(client->etdev, "Disabling mailbox: %08X\n", mailbox_ids);
+ ret = edgetpu_mailbox_deactivate(client->etdev, mailbox_ids);
if (ret)
- etdev_err(client->etdev, "Close mailboxes %08x failed (%d)\n",
+ etdev_err(client->etdev, "Deactivate mailboxes %08x failed: %d",
mailbox_ids, ret);
edgetpu_wakelock_unlock(client->wakelock);
return ret;
}
+
+int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_ids)
+{
+ struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices;
+ u32 to_send;
+ int ret = 0;
+
+ mutex_lock(&eh->lock);
+ to_send = mailbox_ids & ~eh->fw_state;
+ if (to_send)
+ ret = edgetpu_kci_open_device(etdev->kci, to_send);
+ if (!ret) {
+ eh->state |= mailbox_ids;
+ eh->fw_state |= mailbox_ids;
+ }
+ mutex_unlock(&eh->lock);
+ return ret;
+}
+
+int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_ids)
+{
+ struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices;
+ u32 to_send;
+ int ret = 0;
+
+ mutex_lock(&eh->lock);
+ to_send = mailbox_ids & eh->fw_state;
+ if (to_send)
+ ret = edgetpu_kci_close_device(etdev->kci, to_send);
+ if (!ret) {
+ eh->state &= ~mailbox_ids;
+ eh->fw_state &= ~mailbox_ids;
+ }
+ mutex_unlock(&eh->lock);
+ return ret;
+}
+
+void edgetpu_handshake_clear_fw_state(struct edgetpu_handshake *eh)
+{
+ mutex_lock(&eh->lock);
+ eh->fw_state = 0;
+ mutex_unlock(&eh->lock);
+}
diff --git a/drivers/edgetpu/edgetpu-mailbox.h b/drivers/edgetpu/edgetpu-mailbox.h
index cdab5aa..c212a8a 100644
--- a/drivers/edgetpu/edgetpu-mailbox.h
+++ b/drivers/edgetpu/edgetpu-mailbox.h
@@ -78,6 +78,25 @@ struct edgetpu_vii {
edgetpu_queue_mem resp_queue_mem;
};
+/*
+ * Structure for recording the driver state vs FW state.
+ *
+ * Example usage:
+ * @state is a bit mask that denotes each mailbox to which an "OPEN_DEVICE"
+ * KCI has been sent.
+ * @fw_state is the bit mask of mailbox IDs for which the FW has received the
+ * "OPEN_DEVICE" KCI.
+ * In usual cases @state always equals @fw_state. But when the FW is reloaded,
+ * @fw_state is reset to zero, then this structure can be used to know the FW
+ * state is out-of-sync and need further actions.
+ */
+struct edgetpu_handshake {
+ struct mutex lock;
+ /* fields protected by @lock */
+ u32 state;
+ u32 fw_state;
+};
+
typedef u32 (*get_csr_base_t)(uint index);
struct edgetpu_mailbox_manager {
@@ -94,6 +113,7 @@ struct edgetpu_mailbox_manager {
get_csr_base_t get_context_csr_base;
get_csr_base_t get_cmd_queue_csr_base;
get_csr_base_t get_resp_queue_csr_base;
+ struct edgetpu_handshake open_devices;
};
/* the structure to configure a mailbox manager */
@@ -165,9 +185,9 @@ enum mailbox_queue_type {
* Allocations are device-managed so no release function is needed to free the
* manager.
*/
-struct edgetpu_mailbox_manager *edgetpu_mailbox_create_mgr(
- struct edgetpu_dev *etdev,
- const struct edgetpu_mailbox_manager_desc *desc);
+struct edgetpu_mailbox_manager *
+edgetpu_mailbox_create_mgr(struct edgetpu_dev *etdev,
+ const struct edgetpu_mailbox_manager_desc *desc);
/* interrupt handler */
irqreturn_t edgetpu_mailbox_handle_irq(struct edgetpu_mailbox_manager *mgr);
@@ -282,6 +302,23 @@ int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_ids);
/* Notify firmware of external mailboxes becoming inactive */
int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_ids);
+/*
+ * Activates @mailbox_ids, OPEN_DEVICE KCI will be sent.
+ *
+ * If @mailbox_ids are known to be activated, KCI is not sent and this function
+ * returns 0.
+ *
+ * Returns what edgetpu_kci_open_device() returned.
+ * Caller ensures device is powered on.
+ */
+int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_ids);
+/*
+ * Similar to edgetpu_mailbox_activate() but sends CLOSE_DEVICE KCI instead.
+ */
+int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_ids);
+/* Sets @eh->fw_state to 0. */
+void edgetpu_handshake_clear_fw_state(struct edgetpu_handshake *eh);
+
/* Utilities of circular queue operations */
/*
diff --git a/drivers/edgetpu/edgetpu-usage-stats.c b/drivers/edgetpu/edgetpu-usage-stats.c
index 1404674..73ee06c 100644
--- a/drivers/edgetpu/edgetpu-usage-stats.c
+++ b/drivers/edgetpu/edgetpu-usage-stats.c
@@ -17,6 +17,7 @@
#include "abrolhos-pm.h"
static enum tpu_pwr_state tpu_states_arr[] = {
+ TPU_ACTIVE_UUD,
TPU_ACTIVE_SUD,
TPU_ACTIVE_UD,
TPU_ACTIVE_NOM,
@@ -151,6 +152,22 @@ static void edgetpu_counter_update(
mutex_unlock(&ustats->usage_stats_lock);
}
+static void edgetpu_counter_clear(
+ struct edgetpu_dev *etdev,
+ enum edgetpu_usage_counter_type counter_type)
+{
+ struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+
+ if (!ustats)
+ return;
+ if (counter_type >= EDGETPU_COUNTER_COUNT)
+ return;
+
+ mutex_lock(&ustats->usage_stats_lock);
+ ustats->counter[counter_type] = 0;
+ mutex_unlock(&ustats->usage_stats_lock);
+}
+
static void edgetpu_max_watermark_update(
struct edgetpu_dev *etdev,
struct edgetpu_usage_max_watermark *max_watermark)
@@ -403,7 +420,18 @@ static ssize_t tpu_active_cycle_count_show(struct device *dev,
EDGETPU_COUNTER_TPU_ACTIVE_CYCLES);
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
}
-static DEVICE_ATTR_RO(tpu_active_cycle_count);
+
+static ssize_t tpu_active_cycle_count_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_TPU_ACTIVE_CYCLES);
+ return count;
+}
+static DEVICE_ATTR_RW(tpu_active_cycle_count);
static ssize_t tpu_throttle_stall_count_show(struct device *dev,
struct device_attribute *attr,
@@ -416,7 +444,18 @@ static ssize_t tpu_throttle_stall_count_show(struct device *dev,
EDGETPU_COUNTER_TPU_THROTTLE_STALLS);
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
}
-static DEVICE_ATTR_RO(tpu_throttle_stall_count);
+
+static ssize_t tpu_throttle_stall_count_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_TPU_THROTTLE_STALLS);
+ return count;
+}
+static DEVICE_ATTR_RW(tpu_throttle_stall_count);
static ssize_t inference_count_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -428,7 +467,18 @@ static ssize_t inference_count_show(struct device *dev,
EDGETPU_COUNTER_INFERENCES);
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
}
-static DEVICE_ATTR_RO(inference_count);
+
+static ssize_t inference_count_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_INFERENCES);
+ return count;
+}
+static DEVICE_ATTR_RW(inference_count);
static ssize_t tpu_op_count_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -440,7 +490,18 @@ static ssize_t tpu_op_count_show(struct device *dev,
EDGETPU_COUNTER_TPU_OPS);
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
}
-static DEVICE_ATTR_RO(tpu_op_count);
+
+static ssize_t tpu_op_count_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_TPU_OPS);
+ return count;
+}
+static DEVICE_ATTR_RW(tpu_op_count);
static ssize_t param_cache_hit_count_show(struct device *dev,
struct device_attribute *attr,
@@ -453,7 +514,18 @@ static ssize_t param_cache_hit_count_show(struct device *dev,
EDGETPU_COUNTER_PARAM_CACHE_HITS);
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
}
-static DEVICE_ATTR_RO(param_cache_hit_count);
+
+static ssize_t param_cache_hit_count_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_PARAM_CACHE_HITS);
+ return count;
+}
+static DEVICE_ATTR_RW(param_cache_hit_count);
static ssize_t param_cache_miss_count_show(struct device *dev,
struct device_attribute *attr,
@@ -466,7 +538,18 @@ static ssize_t param_cache_miss_count_show(struct device *dev,
EDGETPU_COUNTER_PARAM_CACHE_MISSES);
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
}
-static DEVICE_ATTR_RO(param_cache_miss_count);
+
+static ssize_t param_cache_miss_count_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_PARAM_CACHE_MISSES);
+ return count;
+}
+static DEVICE_ATTR_RW(param_cache_miss_count);
static ssize_t context_preempt_count_show(struct device *dev,
struct device_attribute *attr,
@@ -479,7 +562,18 @@ static ssize_t context_preempt_count_show(struct device *dev,
EDGETPU_COUNTER_CONTEXT_PREEMPTS);
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
}
-static DEVICE_ATTR_RO(context_preempt_count);
+
+static ssize_t context_preempt_count_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_CONTEXT_PREEMPTS);
+ return count;
+}
+static DEVICE_ATTR_RW(context_preempt_count);
static ssize_t outstanding_commands_max_show(
struct device *dev, struct device_attribute *attr, char *buf)
diff --git a/drivers/edgetpu/edgetpu.h b/drivers/edgetpu/edgetpu.h
index 85532b1..eacbe80 100644
--- a/drivers/edgetpu/edgetpu.h
+++ b/drivers/edgetpu/edgetpu.h
@@ -47,11 +47,12 @@ typedef __u32 edgetpu_map_flag_t;
#define EDGETPU_EXT_MAILBOX_TYPE_DSP 3
struct edgetpu_map_ioctl {
- __u64 host_address;
+ __u64 host_address; /* user-space address to be mapped */
__u64 size; /* size of mapping in bytes */
__u64 device_address; /* returned TPU VA */
/*
- * Flags indicating mapping attribute requests from the runtime.
+ * Flags or'ed with EDGETPU_MAP_*, indicating mapping attribute requests from
+ * the runtime.
* Set RESERVED bits to 0 to ensure backwards compatibility.
*
* Bitfields:
@@ -83,17 +84,31 @@ struct edgetpu_map_ioctl {
/*
* Index of die in a device group. The index is decided by the order of
* joining the group, with value from zero to (# dies in group) - 1.
- * Index 0 for the master die in the group.
+ * Index 0 for the leader die in the group.
*
* This field is ignored unless EDGETPU_MAP_NONMIRRORED is passed to
- * flags.
+ * @flags.
*/
__u32 die_index;
};
#define EDGETPU_IOCTL_BASE 0xED
-/* Map host buffer to TPU. */
+/*
+ * Map a host buffer to TPU.
+ *
+ * This operation can be performed without acquiring the wakelock. This
+ * characteristic holds for all mapping / un-mapping ioctls.
+ *
+ * On success, @device_address is set, and TPU can access the content of
+ * @host_address by @device_address afterwards.
+ *
+ * EINVAL: If the group is not finalized.
+ * EINVAL: If size equals 0.
+ * EINVAL: (for EDGETPU_MAP_NONMIRRORED case) If @die_index exceeds the number
+ * of clients in the group.
+ * EINVAL: If the target device group is disbanded.
+ */
#define EDGETPU_MAP_BUFFER \
_IOWR(EDGETPU_IOCTL_BASE, 0, struct edgetpu_map_ioctl)
@@ -107,6 +122,8 @@ struct edgetpu_map_ioctl {
*
* Note: Only the SKIP_CPU_SYNC flag is considered, other bits in @flags are
* fetched from the kernel's record.
+ *
+ * EINVAL: If the requested @device_address is not found.
*/
#define EDGETPU_UNMAP_BUFFER \
_IOW(EDGETPU_IOCTL_BASE, 4, struct edgetpu_map_ioctl)
@@ -123,7 +140,12 @@ struct edgetpu_event_register {
__u32 eventfd;
};
-/* Set eventfd for notification of events from kernel to the device group. */
+/*
+ * Set eventfd for notification of events from kernel to the device group.
+ *
+ * EINVAL: If @event_id is not one of EDGETPU_EVENT_*.
+ * EBADF, EINVAL: If @eventfd is not a valid event file descriptor.
+ */
#define EDGETPU_SET_EVENTFD \
_IOW(EDGETPU_IOCTL_BASE, 5, struct edgetpu_event_register)
@@ -150,19 +172,29 @@ struct edgetpu_mailbox_attr {
* Create a new device group with the caller as the master.
*
* EINVAL: If the caller already belongs to a group.
- * EINVAL: If @cmd/resp_queue_size equals 0.
- * EINVAL: If @sizeof_cmd/resp equals 0.
+ * EINVAL: If @cmd_queue_size or @resp_queue_size equals 0.
+ * EINVAL: If @sizeof_cmd or @sizeof_resp equals 0.
* EINVAL: If @cmd_queue_size * 1024 / @sizeof_cmd >= 1024, this is a hardware
* limitation. Same rule for the response sizes pair.
*/
#define EDGETPU_CREATE_GROUP \
_IOW(EDGETPU_IOCTL_BASE, 6, struct edgetpu_mailbox_attr)
-/* Join the calling fd to the device group of the supplied fd. */
+/*
+ * Join the calling fd to the device group of the supplied fd.
+ *
+ * EINVAL: If the caller already belongs to a group.
+ * EINVAL: If the supplied FD is not for an open EdgeTPU device file.
+ */
#define EDGETPU_JOIN_GROUP \
_IOW(EDGETPU_IOCTL_BASE, 7, __u32)
-/* Finalize the device group with the caller as the master. */
+/*
+ * Finalize the device group with the caller as the leader.
+ *
+ * EINVAL: If the dies in this group are not allowed to form a device group.
+ * ETIMEDOUT: If the handshake with TPU firmware times out.
+ */
#define EDGETPU_FINALIZE_GROUP \
_IO(EDGETPU_IOCTL_BASE, 8)
@@ -173,7 +205,12 @@ struct edgetpu_mailbox_attr {
#define EDGETPU_PERDIE_EVENT_LOGS_AVAILABLE 0x1000
#define EDGETPU_PERDIE_EVENT_TRACES_AVAILABLE 0x1001
-/* Set eventfd for notification of per-die events from kernel. */
+/*
+ * Set eventfd for notification of per-die events from kernel.
+ *
+ * EINVAL: If @event_id is not one of EDGETPU_PERDIE_EVENT_*.
+ * EBADF, EINVAL: If @eventfd is not a valid eventfd.
+ */
#define EDGETPU_SET_PERDIE_EVENTFD \
_IOW(EDGETPU_IOCTL_BASE, 9, struct edgetpu_event_register)
@@ -194,11 +231,11 @@ struct edgetpu_sync_ioctl {
* device address returned by EDGETPU_MAP_BUFFER.
*/
__u64 device_address;
- /* size in bytes to be sync'ed */
+ /* Size in bytes to be sync'ed. */
__u64 size;
/*
- * offset in bytes at which the sync operation is to begin from the
- * start of the buffer
+ * Offset in bytes at which the sync operation is to begin from the
+ * start of the buffer.
*/
__u64 offset;
/*
@@ -274,7 +311,8 @@ struct edgetpu_map_dmabuf_ioctl {
*
* EINVAL: If @offset is not page-aligned.
* EINVAL: If @size is zero.
- * EINVAL: If @die_index exceeds the number of clients in the group.
+ * EINVAL: (for EDGETPU_MAP_NONMIRRORED case) If @die_index exceeds the number
+ * of clients in the group.
* EINVAL: If the target device group is disbanded.
*/
#define EDGETPU_MAP_DMABUF \
@@ -423,8 +461,13 @@ struct edgetpu_sync_fence_status {
* Release the current client's wakelock, allowing firmware to be shut down if
* no other clients are active.
* Groups and buffer mappings are preserved.
- * WARNING: Attempts to access any mapped CSRs before re-acquiring the wakelock
- * may crash the system.
+ *
+ * Some mmap operations (listed below) are not allowed when the client's
+ * wakelock is released. And if the runtime is holding the mmap'ed buffers, this
+ * ioctl returns EAGAIN and the wakelock is not released.
+ * - EDGETPU_MMAP_CSR_OFFSET
+ * - EDGETPU_MMAP_CMD_QUEUE_OFFSET
+ * - EDGETPU_MMAP_RESP_QUEUE_OFFSET
*/
#define EDGETPU_RELEASE_WAKE_LOCK _IO(EDGETPU_IOCTL_BASE, 25)
@@ -446,9 +489,7 @@ struct edgetpu_fw_version {
* When there is an attempt to load firmware, its version numbers are recorded
* by the kernel and will be returned on the following EDGETPU_FIRMWARE_VERSION
* calls. If the latest firmware attempted to load didn't exist or had an
- * invalid header, this call returns -ENODEV.
- *
- * Returns 0 on success, -errno on error.
+ * invalid header, this call returns ENODEV.
*/
#define EDGETPU_FIRMWARE_VERSION \
_IOR(EDGETPU_IOCTL_BASE, 27, struct edgetpu_fw_version)
diff --git a/drivers/edgetpu/mm-backport.h b/drivers/edgetpu/mm-backport.h
index 2e2f9a7..8831285 100644
--- a/drivers/edgetpu/mm-backport.h
+++ b/drivers/edgetpu/mm-backport.h
@@ -23,6 +23,10 @@
#define pin_user_pages_fast get_user_pages_fast
#define unpin_user_page put_page
+#ifndef untagged_addr
+#define untagged_addr(addr) (addr)
+#endif
+
#endif /* FOLL_PIN */
#endif /* __MM_BACKPORT_H__ */