From c850c0750912c2ce39959a6d2129c281e133dd17 Mon Sep 17 00:00:00 2001 From: Nrithya Kanakasabapathy Date: Thu, 25 Feb 2021 03:40:23 +0000 Subject: Merge branch 'whitechapel' into android-gs-pixel-5.10 * whitechapel: GKI: edgetpu: Remove unused variable *tz edgetpu: sleep between p-channel handshake attempts edgetpu: abrolhos: handle PM_QOS and BTS requests edgetpu: add reverse KCI handler edgetpu: fix memory leak in edgetpu_device_group_map edgetpu: fix asynchronous utility crash edgetpu: add warning when mmap-ing the full CSR edgetpu: add accessor function for utilization stats edgetpu: asynchronously map sgt edgetpu: don't update stats if device powered down edgetpu: add DRAM used accessor function edgetpu: only poll mailbox activity for KCI+VII mailboxes in ISR edgetpu: load_firmware read "[none]" if firmware is not valid Signed-off-by: Nrithya Kanakasabapathy Change-Id: I053c982a54cff2981785e22ab7a392ec2f851338 --- drivers/edgetpu/abrolhos-device.c | 28 +++++++ drivers/edgetpu/abrolhos-pm.c | 120 ++++++++++++++++++++++++++++- drivers/edgetpu/abrolhos-pm.h | 14 +++- drivers/edgetpu/abrolhos-thermal.c | 10 +++ drivers/edgetpu/edgetpu-async.h | 13 ++-- drivers/edgetpu/edgetpu-core.c | 9 ++- drivers/edgetpu/edgetpu-device-group.c | 94 ++++++++++++++++++----- drivers/edgetpu/edgetpu-dram.h | 15 ++++ drivers/edgetpu/edgetpu-firmware.c | 16 ++-- drivers/edgetpu/edgetpu-fs.c | 4 +- drivers/edgetpu/edgetpu-internal.h | 9 +++ drivers/edgetpu/edgetpu-kci.c | 135 +++++++++++++++++++++++++++++++-- drivers/edgetpu/edgetpu-kci.h | 49 +++++++++++- drivers/edgetpu/edgetpu-mailbox.c | 8 +- drivers/edgetpu/edgetpu-pm.c | 4 + drivers/edgetpu/edgetpu-pm.h | 2 + drivers/edgetpu/edgetpu-usage-stats.c | 32 +++++--- drivers/edgetpu/edgetpu-usage-stats.h | 2 + 18 files changed, 503 insertions(+), 61 deletions(-) diff --git a/drivers/edgetpu/abrolhos-device.c b/drivers/edgetpu/abrolhos-device.c index ed1e7d5..70520fa 100644 --- a/drivers/edgetpu/abrolhos-device.c +++ b/drivers/edgetpu/abrolhos-device.c @@ -12,6 +12,7 @@ #include "edgetpu-internal.h" #include "edgetpu-mailbox.h" #include "abrolhos-platform.h" +#include "abrolhos-pm.h" #include "edgetpu-telemetry.h" #define HOST_NONSECURE_INTRSRCMASKREG 0x000f0004 @@ -140,3 +141,30 @@ struct edgetpu_dumpregs_range edgetpu_chip_tile_statusregs_ranges[] = { }; int edgetpu_chip_tile_statusregs_nranges = ARRAY_SIZE(edgetpu_chip_tile_statusregs_ranges); + +static void edgetpu_chip_set_pm_qos(struct edgetpu_dev *etdev, u32 value) +{ + abrolhos_pm_set_pm_qos(etdev, value); +} + +static void edgetpu_chip_set_bts(struct edgetpu_dev *etdev, u32 value) +{ + abrolhos_pm_set_bts(etdev, value); +} + +void edgetpu_chip_handle_reverse_kci(struct edgetpu_dev *etdev, + struct edgetpu_kci_response_element *resp) +{ + switch (resp->code) { + case RKCI_CODE_PM_QOS: + edgetpu_chip_set_pm_qos(etdev, resp->retval); + break; + case RKCI_CODE_BTS: + edgetpu_chip_set_bts(etdev, resp->retval); + break; + default: + etdev_warn(etdev, "%s: Unrecognized KCI request: %u\n", + __func__, resp->code); + break; + } +} diff --git a/drivers/edgetpu/abrolhos-pm.c b/drivers/edgetpu/abrolhos-pm.c index 12bbe3f..b71e45d 100644 --- a/drivers/edgetpu/abrolhos-pm.c +++ b/drivers/edgetpu/abrolhos-pm.c @@ -5,6 +5,7 @@ * Copyright (C) 2020 Google, Inc. */ +#include #include #include #include @@ -20,8 +21,27 @@ #include "edgetpu-pm.h" #include "edgetpu-telemetry.h" +#include "soc/google/exynos_pm_qos.h" +#include "soc/google/bts.h" + #include "edgetpu-pm.c" +/* + * Encode INT/MIF values as a 16 bit pair in the 32-bit return value + * (in units of MHz, to provide enough range) + */ +#define PM_QOS_INT_SHIFT (16) +#define PM_QOS_MIF_MASK (0xFFFF) +#define PM_QOS_FACTOR (1000) + +/* INT/MIF requests for memory bandwidth */ +static struct exynos_pm_qos_request int_min; +static struct exynos_pm_qos_request mif_min; + +/* BTS */ +static unsigned int performance_scenario; +static atomic64_t scenario_count = ATOMIC_INIT(0); + /* Default power state: the lowest power state that keeps firmware running */ static int power_state = TPU_DEEP_SLEEP_CLOCKS_SLOW; @@ -448,7 +468,7 @@ abrolhos_pm_shutdown_firmware(struct abrolhos_platform_dev *etpdev, !edgetpu_pchannel_power_down(etdev, false)) return; - cancel_work_sync(&etdev->kci->work); + edgetpu_kci_cancel_work_queues(etdev->kci); etdev_warn(etdev, "Forcing shutdown through power policy\n"); /* Request GSA shutdown to make sure the R52 core is reset */ gsa_send_tpu_cmd(etpdev->gsa_dev, GSA_TPU_SHUTDOWN); @@ -465,6 +485,24 @@ abrolhos_pm_shutdown_firmware(struct abrolhos_platform_dev *etpdev, abrolhos_pwr_policy_set(edgetpu_pdev, TPU_ACTIVE_OD); } +static void abrolhos_pm_cleanup_bts_scenario(struct edgetpu_dev *etdev) +{ + if (!performance_scenario) + return; + while (atomic64_fetch_dec(&scenario_count) > 0) { + int ret = bts_del_scenario(performance_scenario); + + if (ret) { + atomic64_set(&scenario_count, 0); + etdev_warn_once( + etdev, + "error %d in cleaning up BTS scenario %u\n", + ret, performance_scenario); + return; + } + } +} + static void abrolhos_power_down(struct edgetpu_pm *etpm) { struct edgetpu_dev *etdev = etpm->etdev; @@ -474,6 +512,12 @@ static void abrolhos_power_down(struct edgetpu_pm *etpm) etdev_info(etdev, "Powering down\n"); + /* Remove our vote for INT/MIF state (if any) */ + exynos_pm_qos_update_request(&int_min, 0); + exynos_pm_qos_update_request(&mif_min, 0); + + abrolhos_pm_cleanup_bts_scenario(etdev); + if (abrolhos_pwr_state_get(etdev->dev, &val)) { etdev_warn(etdev, "Failed to read current power state\n"); val = TPU_ACTIVE_NOM; @@ -488,7 +532,7 @@ static void abrolhos_power_down(struct edgetpu_pm *etpm) edgetpu_kci_update_usage(etdev); abrolhos_pm_shutdown_firmware(edgetpu_pdev, etdev, edgetpu_pdev); - cancel_work_sync(&etdev->kci->work); + edgetpu_kci_cancel_work_queues(etdev->kci); } res = gsa_send_tpu_cmd(edgetpu_pdev->gsa_dev, GSA_TPU_SHUTDOWN); @@ -561,10 +605,82 @@ static struct edgetpu_pm_handlers abrolhos_pm_handlers = { int abrolhos_pm_create(struct edgetpu_dev *etdev) { + exynos_pm_qos_add_request(&int_min, PM_QOS_DEVICE_THROUGHPUT, 0); + exynos_pm_qos_add_request(&mif_min, PM_QOS_BUS_THROUGHPUT, 0); + + performance_scenario = bts_get_scenindex("tpu_performance"); + + if (!performance_scenario) + etdev_warn(etdev, "tpu_performance BTS scenario not found\n"); + return edgetpu_pm_create(etdev, &abrolhos_pm_handlers); } void abrolhos_pm_destroy(struct edgetpu_dev *etdev) { + abrolhos_pm_cleanup_bts_scenario(etdev); + exynos_pm_qos_remove_request(&int_min); + exynos_pm_qos_remove_request(&mif_min); + edgetpu_pm_destroy(etdev); } + +void abrolhos_pm_set_pm_qos(struct edgetpu_dev *etdev, u32 pm_qos_val) +{ + s32 int_val = (pm_qos_val >> PM_QOS_INT_SHIFT) * PM_QOS_FACTOR; + s32 mif_val = (pm_qos_val & PM_QOS_MIF_MASK) * PM_QOS_FACTOR; + + etdev_dbg(etdev, "%s: pm_qos request - int = %d mif = %d\n", __func__, + int_val, mif_val); + + exynos_pm_qos_update_request(&int_min, int_val); + exynos_pm_qos_update_request(&mif_min, mif_val); +} + +static void abrolhos_pm_activate_bts_scenario(struct edgetpu_dev *etdev) +{ + /* bts_add_scenario() keeps track of reference count internally.*/ + int ret; + + if (!performance_scenario) + return; + ret = bts_add_scenario(performance_scenario); + if (ret) + etdev_warn_once(etdev, "error %d adding BTS scenario %u\n", ret, + performance_scenario); + else + atomic64_inc(&scenario_count); +} + +static void abrolhos_pm_deactivate_bts_scenario(struct edgetpu_dev *etdev) +{ + /* bts_del_scenario() keeps track of reference count internally.*/ + int ret; + + if (!performance_scenario) + return; + ret = bts_del_scenario(performance_scenario); + if (ret) + etdev_warn_once(etdev, "error %d deleting BTS scenario %u\n", + ret, performance_scenario); + else + atomic64_dec(&scenario_count); +} + +void abrolhos_pm_set_bts(struct edgetpu_dev *etdev, u32 bts_val) +{ + etdev_dbg(etdev, "%s: bts request - val = %u\n", __func__, bts_val); + + switch (bts_val) { + case 0: + abrolhos_pm_deactivate_bts_scenario(etdev); + break; + case 1: + abrolhos_pm_activate_bts_scenario(etdev); + break; + default: + etdev_warn(etdev, "%s: invalid BTS request value: %u\n", + __func__, bts_val); + break; + } +} diff --git a/drivers/edgetpu/abrolhos-pm.h b/drivers/edgetpu/abrolhos-pm.h index 864f070..bf0128f 100644 --- a/drivers/edgetpu/abrolhos-pm.h +++ b/drivers/edgetpu/abrolhos-pm.h @@ -7,6 +7,7 @@ #ifndef __ABROLHOS_PM_H__ #define __ABROLHOS_PM_H__ +#include "edgetpu-kci.h" #include "edgetpu-internal.h" /* Can't build out of tree with acpm_dvfs unless kernel supports ACPM */ @@ -66,8 +67,16 @@ enum tpu_pwr_state { TPU_ACTIVE_OD = 1230000, }; -#define TPU_POLICY_MAX TPU_ACTIVE_OD +/* + * Request codes from firmware + * Values must match with firmware code base + */ +enum abrolhos_reverse_kci_code { + RKCI_CODE_PM_QOS = RKCI_CHIP_CODE_FIRST + 1, + RKCI_CODE_BTS = RKCI_CHIP_CODE_FIRST + 2, +}; +#define TPU_POLICY_MAX TPU_ACTIVE_OD #define TPU_ACPM_DOMAIN 7 @@ -150,5 +159,8 @@ int abrolhos_pm_create(struct edgetpu_dev *etdev); void abrolhos_pm_destroy(struct edgetpu_dev *etdev); +void abrolhos_pm_set_pm_qos(struct edgetpu_dev *etdev, u32 pm_qos_val); + +void abrolhos_pm_set_bts(struct edgetpu_dev *etdev, u32 bts_val); #endif /* __ABROLHOS_PM_H__ */ diff --git a/drivers/edgetpu/abrolhos-thermal.c b/drivers/edgetpu/abrolhos-thermal.c index 2404f63..2740d08 100644 --- a/drivers/edgetpu/abrolhos-thermal.c +++ b/drivers/edgetpu/abrolhos-thermal.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "abrolhos-firmware.h" #include "abrolhos-platform.h" @@ -141,6 +142,9 @@ static int edgetpu_state2power_internal(unsigned long state, u32 *power, } static int edgetpu_get_requested_power(struct thermal_cooling_device *cdev, +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0) + struct thermal_zone_device *tz, +#endif u32 *power) { unsigned long state_original; @@ -152,6 +156,9 @@ static int edgetpu_get_requested_power(struct thermal_cooling_device *cdev, } static int edgetpu_state2power(struct thermal_cooling_device *cdev, +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0) + struct thermal_zone_device *tz, +#endif unsigned long state, u32 *power) { struct edgetpu_thermal *cooling = cdev->devdata; @@ -167,6 +174,9 @@ static int edgetpu_state2power(struct thermal_cooling_device *cdev, } static int edgetpu_power2state(struct thermal_cooling_device *cdev, +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0) + struct thermal_zone_device *tz, +#endif u32 power, unsigned long *state) { int i; diff --git a/drivers/edgetpu/edgetpu-async.h b/drivers/edgetpu/edgetpu-async.h index 6278208..b7c2a48 100644 --- a/drivers/edgetpu/edgetpu-async.h +++ b/drivers/edgetpu/edgetpu-async.h @@ -48,6 +48,12 @@ struct edgetpu_async_entry { int ret; }; +/* + * Reduce duplicate code in for_each_async_ret. Do not use this in other place. + */ +#define _set_ret_val(ctx, val, i) \ + ((i) < (ctx)->n_jobs ? (val = (typeof(val))(size_t)((ctx)->ret[i])) : 0) + /* * Helper to loop through the return values. Use this if and only if * edgetpu_async_wait(ctx) is executed successfully. @@ -56,11 +62,8 @@ struct edgetpu_async_entry { * for_each_async_ret(ctx, ret, i) { ... } */ #define for_each_async_ret(ctx, val, i) \ - for (i = 0, val = (typeof(val))(size_t)((ctx)->ret[i]); \ - i < (ctx)->n_jobs; \ - ++i < (ctx)->n_jobs ? \ - (val = (typeof(val))(size_t)((ctx)->ret[i])) : \ - 0) + for (i = 0, _set_ret_val(ctx, val, 0); i < (ctx)->n_jobs; \ + ++i, _set_ret_val(ctx, val, i)) /* * Helper to loop through the jobs currently added, with the same order of diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c index a5c05e1..e1ac96f 100644 --- a/drivers/edgetpu/edgetpu-core.c +++ b/drivers/edgetpu/edgetpu-core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include #include "edgetpu-config.h" #include "edgetpu-debug-dump.h" @@ -39,13 +41,17 @@ static atomic_t single_dev_count = ATOMIC_INIT(-1); -/* TODO(b/156444816): Check permission. */ static int edgetpu_mmap_compat(struct edgetpu_client *client, struct vm_area_struct *vma) { int ret; ulong phys_base, vma_size, map_size; + /* TODO(b/156444816): return -EPERM for non-root users */ + if (!uid_eq(current_euid(), GLOBAL_ROOT_UID)) + etdev_warn_once( + client->etdev, + "mmap full CSR region without root permission is deprecated"); vma_size = vma->vm_end - vma->vm_start; map_size = min(vma_size, client->reg_window.size); phys_base = client->etdev->regs.phys + @@ -304,6 +310,7 @@ void edgetpu_device_remove(struct edgetpu_dev *etdev) { edgetpu_chip_exit(etdev); edgetpu_debug_dump_exit(etdev); + edgetpu_device_dram_exit(etdev); edgetpu_mailbox_remove_all(etdev->mailbox_manager); edgetpu_usage_stats_exit(etdev); edgetpu_mmu_detach(etdev); diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c index d42da25..5035314 100644 --- a/drivers/edgetpu/edgetpu-device-group.c +++ b/drivers/edgetpu/edgetpu-device-group.c @@ -814,6 +814,28 @@ bool edgetpu_set_group_join_lockout(struct edgetpu_dev *etdev, bool lockout) return ret; } +/* parameter to be used in async iova mapping jobs */ +struct iova_mapping_worker_param { + struct edgetpu_device_group *group; + struct edgetpu_host_map *hmap; + uint idx; +}; + +static int edgetpu_map_iova_sgt_worker(struct iova_mapping_worker_param *param) +{ + struct edgetpu_device_group *group = param->group; + uint i = param->idx; + struct edgetpu_host_map *hmap = param->hmap; + const struct edgetpu_mapping *map = &hmap->map; + enum edgetpu_context_id ctx_id = edgetpu_group_context_id_locked(group); + struct edgetpu_dev *etdev = edgetpu_device_group_nth_etdev(group, i); + + edgetpu_mmu_reserve(etdev, map->alloc_iova, map->alloc_size); + return edgetpu_mmu_map_iova_sgt(etdev, map->device_address, + &hmap->sg_tables[i], map->dir, + ctx_id); +} + /* * Requests all devices except the leader in @group to map * @hmap->map.device_address -> corresponding @hmap->sg_tables[]. @@ -827,34 +849,66 @@ bool edgetpu_set_group_join_lockout(struct edgetpu_dev *etdev, bool lockout) static int edgetpu_device_group_map_iova_sgt(struct edgetpu_device_group *group, struct edgetpu_host_map *hmap) { - struct edgetpu_dev *etdev; - const struct edgetpu_mapping *map = &hmap->map; - enum edgetpu_context_id ctx_id = edgetpu_group_context_id_locked(group); uint i; int ret; + int val; + const struct edgetpu_mapping *map = &hmap->map; + enum edgetpu_context_id ctx_id = edgetpu_group_context_id_locked(group); + struct edgetpu_async_ctx *ctx; + struct iova_mapping_worker_param *params; - for (i = 1; i < group->n_clients; i++) { - etdev = edgetpu_device_group_nth_etdev(group, i); - edgetpu_mmu_reserve(etdev, map->alloc_iova, map->alloc_size); - ret = edgetpu_mmu_map_iova_sgt(etdev, map->device_address, - &hmap->sg_tables[i], map->dir, - ctx_id); + /* only leader in @group */ + if (group->n_clients == 1) { + ret = 0; + goto out; + } + ctx = edgetpu_async_alloc_ctx(); + params = kmalloc_array(group->n_clients - 1, sizeof(*params), + GFP_KERNEL); + if (!params || !ctx) { + ret = -ENOMEM; + goto out_free; + } + for (i = 0; i < group->n_clients - 1; i++) { + params[i].hmap = hmap; + params[i].group = group; + params[i].idx = i + 1; + ret = edgetpu_async_add_job( + ctx, ¶ms[i], + (edgetpu_async_job_t)edgetpu_map_iova_sgt_worker); if (ret) + goto out_free; + } + ret = edgetpu_async_wait(ctx); + if (ret) + goto out_free; + for_each_async_ret(ctx, val, i) { + if (val) { + ret = val; goto rollback; + } } - - return 0; + goto out_free; rollback: - while (i > 1) { - i--; - etdev = edgetpu_device_group_nth_etdev(group, i); - edgetpu_mmu_unmap_iova_sgt_attrs(etdev, map->device_address, - &hmap->sg_tables[i], map->dir, - ctx_id, - DMA_ATTR_SKIP_CPU_SYNC); - edgetpu_mmu_free(etdev, map->alloc_iova, map->alloc_size); + for_each_async_ret(ctx, val, i) { + if (val == 0) { + struct edgetpu_dev *etdev; + int idx = i + 1; + + etdev = edgetpu_device_group_nth_etdev(group, idx); + edgetpu_mmu_unmap_iova_sgt_attrs( + etdev, map->device_address, + &hmap->sg_tables[idx], map->dir, ctx_id, + DMA_ATTR_SKIP_CPU_SYNC); + edgetpu_mmu_free(etdev, map->alloc_iova, + map->alloc_size); + } } +out_free: + edgetpu_async_free_ctx(ctx); + kfree(params); +out: return ret; } @@ -1239,7 +1293,7 @@ int edgetpu_device_group_map(struct edgetpu_device_group *group, mutex_unlock(&group->lock); arg->device_address = map->device_address; - + kfree(pages); return 0; error_release_map: diff --git a/drivers/edgetpu/edgetpu-dram.h b/drivers/edgetpu/edgetpu-dram.h index baffb9f..8df49e8 100644 --- a/drivers/edgetpu/edgetpu-dram.h +++ b/drivers/edgetpu/edgetpu-dram.h @@ -21,6 +21,9 @@ /* Initializes structures for device DRAM management. */ int edgetpu_device_dram_init(struct edgetpu_dev *etdev); +/* Teardown actions for device DRAM management. */ +void edgetpu_device_dram_exit(struct edgetpu_dev *etdev); + /* * Returns the dma-buf FD allocated on the @client's DRAM with size @size. */ @@ -63,6 +66,10 @@ void edgetpu_device_dram_free(struct edgetpu_dev *etdev, void __iomem *kva, /* Add any private debug info related to @dmabuf in seq_file @s. */ void edgetpu_device_dram_dmabuf_info_show(struct dma_buf *dmabuf, struct seq_file *s); + +/* Return amount of on-device DRAM currently used in bytes. */ +size_t edgetpu_device_dram_used(struct edgetpu_dev *etdev); + #else /* !CONFIG_EDGETPU_DEVICE_DRAM */ static inline int edgetpu_device_dram_init(struct edgetpu_dev *etdev) @@ -74,6 +81,10 @@ static inline int edgetpu_device_dram_init(struct edgetpu_dev *etdev) return 0; } +static inline void edgetpu_device_dram_exit(struct edgetpu_dev *etdev) +{ +} + static inline int edgetpu_device_dram_getfd(struct edgetpu_client *client, u64 size) { @@ -112,6 +123,10 @@ static inline void edgetpu_device_dram_dmabuf_info_show(struct dma_buf *dmabuf, { } +static inline size_t edgetpu_device_dram_used(struct edgetpu_dev *etdev) +{ + return 0; +} #endif /* CONFIG_EDGETPU_DEVICE_DRAM */ #endif /* __EDGETPU_DRAM_H__ */ diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c index 6edff9e..ef2e80f 100644 --- a/drivers/edgetpu/edgetpu-firmware.c +++ b/drivers/edgetpu/edgetpu-firmware.c @@ -495,16 +495,22 @@ ssize_t edgetpu_firmware_get_name(struct edgetpu_dev *etdev, char *buf, const char *fw_name; if (!et_fw) - return -ENODEV; + goto fw_none; mutex_lock(&et_fw->p->fw_desc_lock); + if (edgetpu_firmware_status_locked(etdev) != FW_VALID) + goto unlock_fw_none; fw_name = et_fw->p->fw_desc.buf.name; - if (fw_name) - ret = scnprintf(buf, buflen, "%s\n", fw_name); - else - ret = -ENODATA; + if (!fw_name) + goto unlock_fw_none; + ret = scnprintf(buf, buflen, "%s\n", fw_name); mutex_unlock(&et_fw->p->fw_desc_lock); return ret; + +unlock_fw_none: + mutex_unlock(&et_fw->p->fw_desc_lock); +fw_none: + return scnprintf(buf, buflen, "[none]\n"); } static ssize_t load_firmware_show( diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c index 047b713..31111eb 100644 --- a/drivers/edgetpu/edgetpu-fs.c +++ b/drivers/edgetpu/edgetpu-fs.c @@ -817,9 +817,9 @@ static void dump_mboxes(struct seq_file *s, struct edgetpu_dev *etdev) enum edgetpu_csrs base; uint32_t val; int mbox_id; - int n_p2p_mbox_dump = min(EDGETPU_NUM_P2P_MAILBOXES, 2); + int n_p2p_mbox_dump = EDGETPU_NUM_P2P_MAILBOXES; - /* Dump VII mailboxes plus 2 P2P (if any) + KCI. */ + /* Dump VII mailboxes plus P2P (if any) + KCI. */ for (mbox_id = 0, base = EDGETPU_MBOX_BASE; mbox_id < EDGETPU_NUM_VII_MAILBOXES + n_p2p_mbox_dump + 1; mbox_id++, base += EDGETPU_MBOX_CSRS_SIZE) { diff --git a/drivers/edgetpu/edgetpu-internal.h b/drivers/edgetpu/edgetpu-internal.h index 8974125..37adebc 100644 --- a/drivers/edgetpu/edgetpu-internal.h +++ b/drivers/edgetpu/edgetpu-internal.h @@ -136,6 +136,7 @@ struct edgetpu_client { struct edgetpu_mapping; struct edgetpu_mailbox_manager; struct edgetpu_kci; +struct edgetpu_kci_response_element; struct edgetpu_telemetry_ctx; struct edgetpu_mempool; @@ -332,6 +333,14 @@ void edgetpu_setup_mmu(struct edgetpu_dev *etdev); /* Read TPU timestamp */ u64 edgetpu_chip_tpu_timestamp(struct edgetpu_dev *etdev); +/* + * Handle chip-specific incoming requests from firmware over KCI + * Note: This will get called from the system's work queue. + * Code should not block for extended periods of time + */ +void edgetpu_chip_handle_reverse_kci(struct edgetpu_dev *etdev, + struct edgetpu_kci_response_element *resp); + /* Device -> Core API */ /* Add current thread as new TPU client */ diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c index 40f0b70..0f9ca8d 100644 --- a/drivers/edgetpu/edgetpu-kci.c +++ b/drivers/edgetpu/edgetpu-kci.c @@ -6,6 +6,7 @@ * Copyright (C) 2019 Google, Inc. */ +#include #include #include /* dmam_alloc_coherent */ #include @@ -74,6 +75,97 @@ static void edgetpu_kci_free_queue(struct edgetpu_dev *etdev, edgetpu_iremap_free(etdev, mem, EDGETPU_CONTEXT_KCI); } +/* Handle one incoming request from firmware */ +static void +edgetpu_reverse_kci_consume_response(struct edgetpu_dev *etdev, + struct edgetpu_kci_response_element *resp) +{ + if (resp->code <= RKCI_CHIP_CODE_LAST) { + edgetpu_chip_handle_reverse_kci(etdev, resp); + return; + } + /* We don't have any generic reverse KCI codes yet */ + etdev_warn(etdev, "%s: Unrecognized KCI request: %u\n", __func__, + resp->code); +} + +/* Remove one element from the circular buffer */ +static int +edgetpu_reverse_kci_remove_response(struct edgetpu_reverse_kci *rkci, + struct edgetpu_kci_response_element *resp) +{ + unsigned long head, tail; + int ret = 0; + + spin_lock(&rkci->consumer_lock); + + /* + * Prevents the compiler from discarding and reloading its cached value + * additionally forces the CPU to order against subsequent memory + * references. + * Shamelessly stolen from: + * https://www.kernel.org/doc/html/latest/core-api/circular-buffers.html + */ + head = smp_load_acquire(&rkci->head); + tail = rkci->tail; + if (CIRC_CNT(head, tail, REVERSE_KCI_BUFFER_SIZE) >= 1) { + *resp = rkci->buffer[tail]; + tail = (tail + 1) & (REVERSE_KCI_BUFFER_SIZE - 1); + ret = 1; + smp_store_release(&rkci->tail, tail); + } + spin_unlock(&rkci->consumer_lock); + return ret; +} + +/* Worker for incoming requests from firmware */ +static void edgetpu_reverse_kci_work(struct work_struct *work) +{ + struct edgetpu_kci_response_element resp; + struct edgetpu_reverse_kci *rkci = + container_of(work, struct edgetpu_reverse_kci, work); + struct edgetpu_kci *kci = container_of(rkci, struct edgetpu_kci, rkci); + + while (edgetpu_reverse_kci_remove_response(rkci, &resp)) + edgetpu_reverse_kci_consume_response(kci->mailbox->etdev, + &resp); +} + +/* + * Add an incoming request from firmware to the circular buffer and + * schedule the work queue for processing + */ +static int edgetpu_reverse_kci_add_response( + struct edgetpu_kci *kci, + const struct edgetpu_kci_response_element *resp) +{ + struct edgetpu_reverse_kci *rkci = &kci->rkci; + unsigned long head, tail; + int ret = 0; + + spin_lock(&rkci->producer_lock); + head = rkci->head; + tail = READ_ONCE(rkci->tail); + if (CIRC_SPACE(head, tail, REVERSE_KCI_BUFFER_SIZE) >= 1) { + rkci->buffer[head] = *resp; + smp_store_release(&rkci->head, + (head + 1) & (REVERSE_KCI_BUFFER_SIZE - 1)); + schedule_work(&rkci->work); + } else { + ret = -ENOSPC; + } + spin_unlock(&rkci->producer_lock); + return ret; +} + +/* Initialize the Reverse KCI handler */ +static void edgetpu_reverse_kci_init(struct edgetpu_reverse_kci *rkci) +{ + spin_lock_init(&rkci->producer_lock); + spin_lock_init(&rkci->consumer_lock); + INIT_WORK(&rkci->work, edgetpu_reverse_kci_work); +} + /* * Pops the wait_list until the sequence number of @resp is found, and copies * @resp to the found entry. @@ -118,11 +210,24 @@ static void edgetpu_kci_consume_wait_list( spin_unlock(&kci->wait_list_lock); } -/* Handler of a response. */ -static void edgetpu_kci_handle_response( - struct edgetpu_kci *kci, - const struct edgetpu_kci_response_element *resp) +/* + * Handler of a response. + * if seq has the MSB set, forward the response to the reverse KCI handler + */ +static void +edgetpu_kci_handle_response(struct edgetpu_kci *kci, + const struct edgetpu_kci_response_element *resp) { + if (resp->seq & KCI_REVERSE_FLAG) { + int ret = edgetpu_reverse_kci_add_response(kci, resp); + + if (ret) + etdev_warn( + kci->mailbox->etdev, + "Failed to handle reverse KCI code %u (%d)\n", + resp->code, ret); + return; + } edgetpu_kci_consume_wait_list(kci, resp); } @@ -348,6 +453,7 @@ int edgetpu_kci_init(struct edgetpu_mailbox_manager *mgr, spin_lock_init(&kci->wait_list_lock); init_waitqueue_head(&kci->wait_list_waitq); INIT_WORK(&kci->work, edgetpu_kci_consume_responses_work); + edgetpu_reverse_kci_init(&kci->rkci); EDGETPU_MAILBOX_CONTEXT_WRITE(mailbox, context_enable, 1); return 0; } @@ -375,6 +481,13 @@ int edgetpu_kci_reinit(struct edgetpu_kci *kci) return 0; } +void edgetpu_kci_cancel_work_queues(struct edgetpu_kci *kci) +{ + /* Cancel KCI and reverse KCI workers */ + cancel_work_sync(&kci->work); + cancel_work_sync(&kci->rkci.work); +} + void edgetpu_kci_release(struct edgetpu_dev *etdev, struct edgetpu_kci *kci) { if (!kci) @@ -384,8 +497,7 @@ void edgetpu_kci_release(struct edgetpu_dev *etdev, struct edgetpu_kci *kci) * need to free them. */ - /* Cancel the queue consumer worker or wait until it's done. */ - cancel_work_sync(&kci->work); + edgetpu_kci_cancel_work_queues(kci); edgetpu_kci_free_queue(etdev, &kci->cmd_queue_mem); edgetpu_kci_free_queue(etdev, &kci->resp_queue_mem); @@ -736,13 +848,19 @@ int edgetpu_kci_update_usage(struct edgetpu_dev *etdev) struct edgetpu_kci_response_element resp; int ret; + /* Quick return if device already powered down, else get PM ref. */ + if (!edgetpu_is_powered(etdev)) + return -EAGAIN; + ret = edgetpu_pm_get(etdev->pm); + if (ret) + return ret; ret = edgetpu_iremap_alloc(etdev, EDGETPU_USAGE_BUFFER_SIZE, &mem, EDGETPU_CONTEXT_KCI); if (ret) { etdev_warn_once(etdev, "%s: failed to allocate usage buffer", __func__); - return ret; + goto out; } cmd.dma.address = mem.tpu_addr; @@ -758,6 +876,9 @@ int edgetpu_kci_update_usage(struct edgetpu_dev *etdev) etdev_warn_once(etdev, "%s: error %d", __func__, ret); edgetpu_iremap_free(etdev, &mem, EDGETPU_CONTEXT_KCI); + +out: + edgetpu_pm_put(etdev->pm); return ret; } diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h index 09189c0..d9e7aeb 100644 --- a/drivers/edgetpu/edgetpu-kci.h +++ b/drivers/edgetpu/edgetpu-kci.h @@ -20,6 +20,12 @@ #include "edgetpu-internal.h" #include "edgetpu-mailbox.h" +/* + * Maximum number of outstanding KCI requests from firmware + * This is used to size a circular buffer, so it must be a power of 2 + */ +#define REVERSE_KCI_BUFFER_SIZE (8) + /* * The status field in a firmware response is set to this by us when the * response is fetched from the queue. @@ -37,6 +43,14 @@ */ #define KCI_STATUS_NO_RESPONSE (2) + +/* + * Command/response sequence numbers capped at half the range of the 64-bit + * value range. The second half is reserved for incoming requests from firmware. + * These are tagged with the MSB set. + */ +#define KCI_REVERSE_FLAG (0x8000000000000000ull) + /* command/response queue elements for KCI */ struct edgetpu_dma_descriptor { @@ -62,8 +76,8 @@ struct edgetpu_kci_response_element { */ u16 status; /* - * Return value is not currently needed by KCI, but firmware may set - * this to a watermark value to aid in debugging + * Return value is not currently needed by KCI command responses, but + * incoming requests from firmware may encode information here. */ u32 retval; } __packed; @@ -98,6 +112,19 @@ enum edgetpu_kci_code { KCI_CODE_GET_USAGE = 12, }; +/* + * Definition of reverse KCI request code ranges + * 16-bit unsigned integer + * First half is reserved for chip specific codes, + * Generic codes can use the second half. + */ +enum edgetpu_reverse_kci_code { + RKCI_CHIP_CODE_FIRST = 0, + RKCI_CHIP_CODE_LAST = 0x7FFF, + RKCI_GENERIC_CODE_FIRST = 0x8000, + RKCI_GENERIC_CODE_LAST = 0xFFFF, +}; + /* * Definition of code in response elements. * It is a 16-bit unsigned integer. @@ -131,6 +158,19 @@ struct edgetpu_kci_wait_list { struct edgetpu_kci_response_element *resp; }; +/* Struct to hold a circular buffer for incoming KCI responses */ +struct edgetpu_reverse_kci { + unsigned long head; + unsigned long tail; + struct edgetpu_kci_response_element buffer[REVERSE_KCI_BUFFER_SIZE]; + /* Lock to push elements in the buffer from the interrupt handler */ + spinlock_t producer_lock; + /* Lock to pop elements from the buffer in the worker */ + spinlock_t consumer_lock; + /* Worker to handle responses */ + struct work_struct work; +}; + struct edgetpu_kci { struct edgetpu_mailbox *mailbox; struct mutex mailbox_lock; /* protects mailbox */ @@ -151,6 +191,8 @@ struct edgetpu_kci { /* queue for waiting for the wait_list to be consumed */ wait_queue_head_t wait_list_waitq; struct work_struct work; /* worker of consuming responses */ + /* Handler for reverse (firmware -> kernel) requests */ + struct edgetpu_reverse_kci rkci; }; struct edgetpu_kci_device_group_detail { @@ -288,4 +330,7 @@ int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_ids); /* Inform the firmware the VII with @mailbox_ids are closed. */ int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_ids); +/* Cancel work queues or wait until they're done */ +void edgetpu_kci_cancel_work_queues(struct edgetpu_kci *kci); + #endif /* __EDGETPU_KCI_H__ */ diff --git a/drivers/edgetpu/edgetpu-mailbox.c b/drivers/edgetpu/edgetpu-mailbox.c index 90b3ed8..1a06578 100644 --- a/drivers/edgetpu/edgetpu-mailbox.c +++ b/drivers/edgetpu/edgetpu-mailbox.c @@ -611,10 +611,10 @@ void edgetpu_mailbox_remove_all(struct edgetpu_mailbox_manager *mgr) } /* - * The interrupt handler for mailboxes. + * The interrupt handler for KCI and VII mailboxes. * - * This handler loops through mailboxes with an interrupt pending and invokes - * their IRQ handlers. + * This handler loops through such mailboxes with an interrupt pending and + * invokes their IRQ handlers. */ irqreturn_t edgetpu_mailbox_handle_irq(struct edgetpu_mailbox_manager *mgr) { @@ -625,7 +625,7 @@ irqreturn_t edgetpu_mailbox_handle_irq(struct edgetpu_mailbox_manager *mgr) return IRQ_NONE; read_lock(&mgr->mailboxes_lock); - for (i = 0; i < mgr->num_mailbox; i++) { + for (i = 0; i < mgr->vii_index_to; i++) { mailbox = mgr->mailboxes[i]; if (!mailbox) continue; diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c index 7d89754..f0eb316 100644 --- a/drivers/edgetpu/edgetpu-pm.c +++ b/drivers/edgetpu/edgetpu-pm.c @@ -241,6 +241,10 @@ int edgetpu_pchannel_power_down(struct edgetpu_dev *etdev, bool wait_on_pactive) do { ret = pchannel_state_change_request(etdev, STATE_SHUTDOWN); tries--; + /* Throttle the retry */ + if (tries && ret == -EACCES) + usleep_range(EDGETPU_PCHANNEL_RETRY_DELAY_MIN, + EDGETPU_PCHANNEL_RETRY_DELAY_MAX); } while (ret && tries); if (ret) diff --git a/drivers/edgetpu/edgetpu-pm.h b/drivers/edgetpu/edgetpu-pm.h index 45d2fad..345ad29 100644 --- a/drivers/edgetpu/edgetpu-pm.h +++ b/drivers/edgetpu/edgetpu-pm.h @@ -14,6 +14,8 @@ #define STATE_RUN 0 #define EDGETPU_PCHANNEL_STATE_CHANGE_TIMEOUT 1000 /* 1 ms */ #define EDGETPU_PCHANNEL_STATE_CHANGE_RETRIES 10 +#define EDGETPU_PCHANNEL_RETRY_DELAY_MIN 900 +#define EDGETPU_PCHANNEL_RETRY_DELAY_MAX 1000 struct edgetpu_pm_private; struct edgetpu_pm; diff --git a/drivers/edgetpu/edgetpu-usage-stats.c b/drivers/edgetpu/edgetpu-usage-stats.c index 56bbfa6..06328ae 100644 --- a/drivers/edgetpu/edgetpu-usage-stats.c +++ b/drivers/edgetpu/edgetpu-usage-stats.c @@ -167,6 +167,22 @@ void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf) } } +int edgetpu_usage_get_utilization(struct edgetpu_dev *etdev, + enum edgetpu_usage_component component) +{ + struct edgetpu_usage_stats *ustats = etdev->usage_stats; + int32_t val; + + if (component >= EDGETPU_USAGE_COMPONENT_COUNT) + return -1; + edgetpu_kci_update_usage(etdev); + mutex_lock(&ustats->usage_stats_lock); + val = ustats->component_utilization[component]; + ustats->component_utilization[component] = 0; + mutex_unlock(&ustats->usage_stats_lock); + return val; +} + static ssize_t tpu_usage_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -247,14 +263,10 @@ static ssize_t device_utilization_show(struct device *dev, char *buf) { struct edgetpu_dev *etdev = dev_get_drvdata(dev); - struct edgetpu_usage_stats *ustats = etdev->usage_stats; int32_t val; - edgetpu_kci_update_usage(etdev); - mutex_lock(&ustats->usage_stats_lock); - val = ustats->component_utilization[EDGETPU_USAGE_COMPONENT_DEVICE]; - ustats->component_utilization[EDGETPU_USAGE_COMPONENT_DEVICE] = 0; - mutex_unlock(&ustats->usage_stats_lock); + val = edgetpu_usage_get_utilization( + etdev, EDGETPU_USAGE_COMPONENT_DEVICE); return scnprintf(buf, PAGE_SIZE, "%d\n", val); } static DEVICE_ATTR_RO(device_utilization); @@ -264,14 +276,10 @@ static ssize_t tpu_utilization_show(struct device *dev, char *buf) { struct edgetpu_dev *etdev = dev_get_drvdata(dev); - struct edgetpu_usage_stats *ustats = etdev->usage_stats; int32_t val; - edgetpu_kci_update_usage(etdev); - mutex_lock(&ustats->usage_stats_lock); - val = ustats->component_utilization[EDGETPU_USAGE_COMPONENT_TPU]; - ustats->component_utilization[EDGETPU_USAGE_COMPONENT_TPU] = 0; - mutex_unlock(&ustats->usage_stats_lock); + val = edgetpu_usage_get_utilization( + etdev, EDGETPU_USAGE_COMPONENT_TPU); return scnprintf(buf, PAGE_SIZE, "%d\n", val); } static DEVICE_ATTR_RO(tpu_utilization); diff --git a/drivers/edgetpu/edgetpu-usage-stats.h b/drivers/edgetpu/edgetpu-usage-stats.h index 6b72747..644d66d 100644 --- a/drivers/edgetpu/edgetpu-usage-stats.h +++ b/drivers/edgetpu/edgetpu-usage-stats.h @@ -83,6 +83,8 @@ struct edgetpu_usage_stats { }; int edgetpu_usage_add(struct edgetpu_dev *etdev, struct tpu_usage *tpu_usage); +int edgetpu_usage_get_utilization(struct edgetpu_dev *etdev, + enum edgetpu_usage_component component); void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf); void edgetpu_usage_stats_init(struct edgetpu_dev *etdev); void edgetpu_usage_stats_exit(struct edgetpu_dev *etdev); -- cgit v1.2.3