summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWhi copybara merger <whitechapel-automerger@google.com>2023-03-27 09:57:12 -0700
committerCopybara-Service <copybara-worker@google.com>2023-03-28 14:25:37 -0700
commit7d226040fcdb03c939b20f5824617a7de9c00962 (patch)
tree847a809ff974de780cd19777cd97be8275934b93
parent10294976775d8ffa64640346f8eb66b1da7f40f0 (diff)
downloadabrolhos-7d226040fcdb03c939b20f5824617a7de9c00962.tar.gz
[Copybara Auto Merge] Merge branch whitechapel into partner-android
edgetpu: fix -Wcast-function-type-strict Building this driver with the newer AOSP Clang 17.0.0 produces the following error: private/google-modules/edgetpu/abrolhos/drivers/edgetpu/edgetpu-device-group.c:1007:4: error: cast from 'int (*)(struct iova_mapping_worker_param *)' to 'edgetpu_async_job_t' (aka 'int (*)(void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] (edgetpu_async_job_t)edgetpu_map_iova_sgt_worker); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix this by declaring edgetpu_map_iova_sgt_worker have the correct signature, cast the parameter in edgetpu_map_iova_sgt_worker, and remove the function pointer cast. This warning was added to clang to help catch CFI failures at compile time rather than runtime. Bug: 264965700 edgetpu: sync with darwinn-2.0 9364e47c8 edgetpu: Make group required for creating fence a257e3575 edgetpu: create debugfs symlinks for non-default device names b25b06c56 Revert "edgetpu: Continue powering up if the block is still on" 5dea12ffd edgetpu: Continue powering up if the block is still on af318f68d edgetpu: retry and sleep until block down b4cf36957 edgetpu: Add etdev as private data for syncfences ede90cae7 edgetpu: usage_stats add cluster reconfigurations counters 77dae3e48 edgetpu: usage_stats: process metrics v2 data 3b2bc8d98 edgetpu: pm: reject power up if thermal suspended 5f19efb5b edgetpu: usage_stats send metrics v2 requests with v1 fallback 52c262671 edgetpu: usage stats: sync additional metrics v2 changes e5cc5696b edgetpu: Only call .power_up if needed d06a8f889 edgetpu: Downgrade warning on external mailbox alloc 52fe2ac98 edgetpu: usage stats add field definitions for metrics v2 28bbb7446 edgetpu: usage stats ignore metric fields beyond known size Bug: 271372136 Bug: 271374892 Bug: 269476405 Bug: 272701322 Bug: 264971968 Bug: 258868303 GitOrigin-RevId: 03a5015bf5a6601295e1967dbbedbe242192c30c Change-Id: I2fa4b73fd554d559734f8803c7893570ae9fad8d
-rw-r--r--drivers/edgetpu/abrolhos/config.h3
-rw-r--r--drivers/edgetpu/edgetpu-device-group.c8
-rw-r--r--drivers/edgetpu/edgetpu-dmabuf.c20
-rw-r--r--drivers/edgetpu/edgetpu-external.c4
-rw-r--r--drivers/edgetpu/edgetpu-firmware.c10
-rw-r--r--drivers/edgetpu/edgetpu-fs.c50
-rw-r--r--drivers/edgetpu/edgetpu-internal.h1
-rw-r--r--drivers/edgetpu/edgetpu-kci.c14
-rw-r--r--drivers/edgetpu/edgetpu-kci.h5
-rw-r--r--drivers/edgetpu/edgetpu-pm.c11
-rw-r--r--drivers/edgetpu/edgetpu-thermal.h22
-rw-r--r--drivers/edgetpu/edgetpu-usage-stats.c380
-rw-r--r--drivers/edgetpu/edgetpu-usage-stats.h79
-rw-r--r--drivers/edgetpu/mobile-pm.c22
14 files changed, 372 insertions, 257 deletions
diff --git a/drivers/edgetpu/abrolhos/config.h b/drivers/edgetpu/abrolhos/config.h
index 9cb5ffc..b1db928 100644
--- a/drivers/edgetpu/abrolhos/config.h
+++ b/drivers/edgetpu/abrolhos/config.h
@@ -34,6 +34,9 @@
*/
#define EDGETPU_HAS_REMAPPED_DATA
+/* Metrics are reported for a single default "cluster" component. */
+#define EDGETPU_TPU_CLUSTER_COUNT 1
+
/*
* The TPU VA where the firmware is located.
*
diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c
index 7734f33..2b13cf0 100644
--- a/drivers/edgetpu/edgetpu-device-group.c
+++ b/drivers/edgetpu/edgetpu-device-group.c
@@ -949,8 +949,9 @@ struct iova_mapping_worker_param {
uint idx;
};
-static int edgetpu_map_iova_sgt_worker(struct iova_mapping_worker_param *param)
+static int edgetpu_map_iova_sgt_worker(void *p)
{
+ struct iova_mapping_worker_param *param = p;
struct edgetpu_device_group *group = param->group;
uint i = param->idx;
struct edgetpu_host_map *hmap = param->hmap;
@@ -1006,9 +1007,8 @@ static int edgetpu_device_group_map_iova_sgt(struct edgetpu_device_group *group,
params[i].hmap = hmap;
params[i].group = group;
params[i].idx = i + 1;
- ret = edgetpu_async_add_job(
- ctx, &params[i],
- (edgetpu_async_job_t)edgetpu_map_iova_sgt_worker);
+ ret = edgetpu_async_add_job(ctx, &params[i],
+ edgetpu_map_iova_sgt_worker);
if (ret)
goto out_free;
}
diff --git a/drivers/edgetpu/edgetpu-dmabuf.c b/drivers/edgetpu/edgetpu-dmabuf.c
index 72072f6..fbc9f48 100644
--- a/drivers/edgetpu/edgetpu-dmabuf.c
+++ b/drivers/edgetpu/edgetpu-dmabuf.c
@@ -888,7 +888,7 @@ static void edgetpu_dma_fence_release(struct dma_fence *fence)
list_del(&etfence->etfence_list);
spin_unlock_irqrestore(&etfence_list_lock, flags);
- /* TODO(b/258868303): Don't remove this check when group required, might not yet be set. */
+ /* group might not yet be set if error at init time. */
group = etfence->group;
if (group) {
mutex_lock(&group->lock);
@@ -963,15 +963,10 @@ int edgetpu_sync_fence_create(struct edgetpu_device_group *group,
spin_lock_irqsave(&etfence_list_lock, flags);
list_add_tail(&etfence->etfence_list, &etfence_list_head);
spin_unlock_irqrestore(&etfence_list_lock, flags);
-
- /* TODO(b/258868303): Make group required, disallow creating fence we can't track. */
- if (group) {
- etfence->group = edgetpu_device_group_get(group);
- mutex_lock(&group->lock);
- list_add_tail(&etfence->group_list, &group->dma_fence_list);
- mutex_unlock(&group->lock);
- }
-
+ etfence->group = edgetpu_device_group_get(group);
+ mutex_lock(&group->lock);
+ list_add_tail(&etfence->group_list, &group->dma_fence_list);
+ mutex_unlock(&group->lock);
fd_install(fd, sync_file->file);
datap->fence = fd;
return 0;
@@ -1098,10 +1093,7 @@ int edgetpu_sync_fence_debugfs_show(struct seq_file *s, void *unused)
if (fence->error)
seq_printf(s, " err=%d", fence->error);
- /* TODO(b/258868303): Remove check when group is required. */
- if (etfence->group)
- seq_printf(s, " group=%u", etfence->group->workload_id);
- seq_putc(s, '\n');
+ seq_printf(s, " group=%u\n", etfence->group->workload_id);
spin_unlock_irq(&etfence->lock);
}
diff --git a/drivers/edgetpu/edgetpu-external.c b/drivers/edgetpu/edgetpu-external.c
index 4b86e13..b954844 100644
--- a/drivers/edgetpu/edgetpu-external.c
+++ b/drivers/edgetpu/edgetpu-external.c
@@ -95,8 +95,8 @@ static int edgetpu_external_mailbox_alloc(struct device *edgetpu_dev,
if (copy_from_user(&req.attr, (void __user *)client_info->attr, sizeof(req.attr))) {
if (!client_info->attr)
- etdev_warn(client->etdev,
- "Illegal mailbox attributes, using VII mailbox attrs\n");
+ etdev_dbg(client->etdev,
+ "Using VII mailbox attrs for external mailbox\n");
req.attr = group->mbox_attr;
}
diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c
index cf9009b..ad27ec9 100644
--- a/drivers/edgetpu/edgetpu-firmware.c
+++ b/drivers/edgetpu/edgetpu-firmware.c
@@ -478,20 +478,21 @@ int edgetpu_firmware_run_locked(struct edgetpu_firmware *et_fw,
enum edgetpu_firmware_flags flags)
{
const struct edgetpu_firmware_chip_data *chip_fw = et_fw->p->chip_fw;
+ struct edgetpu_dev *etdev = et_fw->etdev;
struct edgetpu_firmware_desc new_fw_desc;
int ret;
bool is_bl1_run = (flags & FW_BL1);
edgetpu_firmware_set_loading(et_fw);
if (!is_bl1_run)
- edgetpu_sw_wdt_stop(et_fw->etdev);
+ edgetpu_sw_wdt_stop(etdev);
memset(&new_fw_desc, 0, sizeof(new_fw_desc));
ret = edgetpu_firmware_load_locked(et_fw, &new_fw_desc, name, flags);
if (ret)
goto out_failed;
- etdev_dbg(et_fw->etdev, "run fw %s flags=%#x", name, flags);
+ etdev_dbg(etdev, "run fw %s flags=%#x", name, flags);
if (chip_fw->prepare_run) {
/* Note this may recursively call us to run BL1 */
ret = chip_fw->prepare_run(et_fw, &new_fw_desc.buf);
@@ -516,13 +517,16 @@ int edgetpu_firmware_run_locked(struct edgetpu_firmware *et_fw,
/* Don't start wdt if loaded firmware is second stage bootloader. */
if (!ret && !is_bl1_run && et_fw->p->fw_info.fw_flavor != FW_FLAVOR_BL1)
- edgetpu_sw_wdt_start(et_fw->etdev);
+ edgetpu_sw_wdt_start(etdev);
if (!ret && !is_bl1_run && chip_fw->launch_complete)
chip_fw->launch_complete(et_fw);
else if (ret && chip_fw->launch_failed)
chip_fw->launch_failed(et_fw, ret);
edgetpu_firmware_set_state(et_fw, ret);
+ /* If previous firmware was metrics v1-only reset that flag and probe this again. */
+ if (etdev->usage_stats)
+ etdev->usage_stats->use_metrics_v1 = false;
return ret;
out_unload_new_fw:
diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c
index 68ff32e..854c14c 100644
--- a/drivers/edgetpu/edgetpu-fs.c
+++ b/drivers/edgetpu/edgetpu-fs.c
@@ -441,10 +441,11 @@ static int edgetpu_ioctl_sync_fence_create(
if (copy_from_user(&data, (void __user *)datap, sizeof(data)))
return -EFAULT;
LOCK(client);
- if (!client->group)
- /* TODO(b/258868303): Require a group, disallow creating a fence we can't track. */
- etdev_warn(client->etdev,
- "client creating sync fence not joined to a device group");
+ if (!client->group) {
+ etdev_err(client->etdev, "client creating sync fence not joined to a device group");
+ UNLOCK(client);
+ return -EINVAL;
+ }
ret = edgetpu_sync_fence_create(client->group, &data);
UNLOCK(client);
if (ret)
@@ -597,17 +598,14 @@ static int edgetpu_ioctl_acquire_wakelock(struct edgetpu_client *client)
*/
client->pid = current->pid;
client->tgid = current->tgid;
- edgetpu_thermal_lock(thermal);
if (edgetpu_thermal_is_suspended(thermal)) {
/* TPU is thermal suspended, so fail acquiring wakelock */
ret = -EAGAIN;
etdev_warn_ratelimited(client->etdev,
- "wakelock acquire rejected due to thermal suspend");
- edgetpu_thermal_unlock(thermal);
+ "wakelock acquire rejected due to device thermal limit exceeded");
goto error_client_unlock;
} else {
ret = edgetpu_pm_get(client->etdev->pm);
- edgetpu_thermal_unlock(thermal);
if (ret) {
etdev_warn(client->etdev, "%s: pm_get failed (%d)",
__func__, ret);
@@ -1030,6 +1028,19 @@ static const struct file_operations mappings_ops = {
.release = single_release,
};
+static int syncfences_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, edgetpu_sync_fence_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations syncfences_ops = {
+ .open = syncfences_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .owner = THIS_MODULE,
+ .release = single_release,
+};
+
static int edgetpu_pm_debugfs_set_wakelock(void *data, u64 val)
{
struct edgetpu_dev *etdev = data;
@@ -1054,6 +1065,7 @@ static void edgetpu_fs_setup_debugfs(struct edgetpu_dev *etdev)
}
debugfs_create_file("mappings", 0440, etdev->d_entry,
etdev, &mappings_ops);
+ debugfs_create_file("syncfences", 0440, etdev->d_entry, etdev, &syncfences_ops);
debugfs_create_file("wakelock", 0220, etdev->d_entry, etdev,
&fops_wakelock);
#ifndef EDGETPU_FEATURE_MOBILE
@@ -1253,6 +1265,10 @@ static int edgeptu_fs_add_interface(struct edgetpu_dev *etdev, struct edgetpu_de
return ret;
}
+ if (etiparams->name)
+ etiface->d_entry =
+ debugfs_create_symlink(etiparams->name, edgetpu_debugfs_dir,
+ etdev->dev_name);
return 0;
}
@@ -1288,6 +1304,7 @@ void edgetpu_fs_remove(struct edgetpu_dev *etdev)
for (i = 0; i < etdev->num_ifaces; i++) {
struct edgetpu_dev_iface *etiface = &etdev->etiface[i];
+ debugfs_remove(etiface->d_entry);
device_destroy(edgetpu_class, etiface->devno);
etiface->etcdev = NULL;
cdev_del(&etiface->cdev);
@@ -1295,20 +1312,6 @@ void edgetpu_fs_remove(struct edgetpu_dev *etdev)
debugfs_remove_recursive(etdev->d_entry);
}
-static int syncfences_open(struct inode *inode, struct file *file)
-{
- return single_open(file, edgetpu_sync_fence_debugfs_show,
- inode->i_private);
-}
-
-static const struct file_operations syncfences_ops = {
- .open = syncfences_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .owner = THIS_MODULE,
- .release = single_release,
-};
-
static void edgetpu_debugfs_global_setup(void)
{
edgetpu_debugfs_dir = debugfs_create_dir("edgetpu", NULL);
@@ -1316,9 +1319,6 @@ static void edgetpu_debugfs_global_setup(void)
pr_warn(DRIVER_NAME " error creating edgetpu debugfs dir\n");
return;
}
-
- debugfs_create_file("syncfences", 0440, edgetpu_debugfs_dir, NULL,
- &syncfences_ops);
}
int __init edgetpu_fs_init(void)
diff --git a/drivers/edgetpu/edgetpu-internal.h b/drivers/edgetpu/edgetpu-internal.h
index 37ea27f..e8cbf9a 100644
--- a/drivers/edgetpu/edgetpu-internal.h
+++ b/drivers/edgetpu/edgetpu-internal.h
@@ -238,6 +238,7 @@ struct edgetpu_dev_iface {
struct edgetpu_dev *etdev; /* Pointer to core device struct */
dev_t devno; /* char device dev_t */
const char *name; /* interface specific device name */
+ struct dentry *d_entry; /* debugfs symlink if not default device name iface */
};
/* Firmware crash_type codes */
diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c
index 4bd62c3..fd52c3c 100644
--- a/drivers/edgetpu/edgetpu-kci.c
+++ b/drivers/edgetpu/edgetpu-kci.c
@@ -934,10 +934,11 @@ int edgetpu_kci_update_usage_locked(struct edgetpu_dev *etdev)
{
#define EDGETPU_USAGE_BUFFER_SIZE 4096
struct edgetpu_command_element cmd = {
- .code = KCI_CODE_GET_USAGE,
+ .code = KCI_CODE_GET_USAGE_V2,
.dma = {
.address = 0,
.size = 0,
+ .flags = EDGETPU_USAGE_METRIC_VERSION,
},
};
struct edgetpu_coherent_mem mem;
@@ -953,13 +954,22 @@ int edgetpu_kci_update_usage_locked(struct edgetpu_dev *etdev)
return ret;
}
+ /* TODO(b/271372136): remove v1 when v1 firmware no longer in use. */
+retry_v1:
+ if (etdev->usage_stats && etdev->usage_stats->use_metrics_v1)
+ cmd.code = KCI_CODE_GET_USAGE_V1;
cmd.dma.address = mem.tpu_addr;
cmd.dma.size = EDGETPU_USAGE_BUFFER_SIZE;
memset(mem.vaddr, 0, sizeof(struct edgetpu_usage_header));
ret = edgetpu_kci_send_cmd_return_resp(etdev->kci, &cmd, &resp);
- if (ret == KCI_ERROR_UNIMPLEMENTED || ret == KCI_ERROR_UNAVAILABLE)
+ if (ret == KCI_ERROR_UNIMPLEMENTED || ret == KCI_ERROR_UNAVAILABLE) {
+ if (etdev->usage_stats && !etdev->usage_stats->use_metrics_v1) {
+ etdev->usage_stats->use_metrics_v1 = true;
+ goto retry_v1;
+ }
etdev_dbg(etdev, "firmware does not report usage\n");
+ }
else if (ret == KCI_ERROR_OK)
edgetpu_usage_stats_process_buffer(etdev, mem.vaddr);
else if (ret != -ETIMEDOUT)
diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h
index b32b097..4d2f4b0 100644
--- a/drivers/edgetpu/edgetpu-kci.h
+++ b/drivers/edgetpu/edgetpu-kci.h
@@ -112,11 +112,14 @@ enum edgetpu_kci_code {
KCI_CODE_OPEN_DEVICE = 9,
KCI_CODE_CLOSE_DEVICE = 10,
KCI_CODE_FIRMWARE_INFO = 11,
- KCI_CODE_GET_USAGE = 12,
+ /* TODO(b/271372136): remove v1 when v1 firmware no longer in use. */
+ KCI_CODE_GET_USAGE_V1 = 12,
KCI_CODE_NOTIFY_THROTTLING = 13,
KCI_CODE_BLOCK_BUS_SPEED_CONTROL = 14,
/* 15..18 not implemented in this branch */
KCI_CODE_FIRMWARE_TRACING_LEVEL = 19,
+ /* 20 not implemented in this branch */
+ KCI_CODE_GET_USAGE_V2 = 21,
KCI_CODE_RKCI_ACK = 256,
};
diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c
index a71232d..40d41ff 100644
--- a/drivers/edgetpu/edgetpu-pm.c
+++ b/drivers/edgetpu/edgetpu-pm.c
@@ -53,9 +53,13 @@ static int edgetpu_pm_get_locked(struct edgetpu_pm *etpm)
int ret = 0;
if (!power_up_count) {
- ret = etpm->p->handlers->power_up(etpm);
- if (!ret)
- edgetpu_mailbox_restore_active_mailbox_queues(etpm->etdev);
+ if (etpm->p->power_down_pending) {
+ etpm->p->power_down_pending = false;
+ } else {
+ ret = etpm->p->handlers->power_up(etpm);
+ if (!ret)
+ edgetpu_mailbox_restore_active_mailbox_queues(etpm->etdev);
+ }
}
if (ret)
etpm->p->power_up_count--;
@@ -103,7 +107,6 @@ int edgetpu_pm_get(struct edgetpu_pm *etpm)
return 0;
mutex_lock(&etpm->p->lock);
- etpm->p->power_down_pending = false;
ret = edgetpu_pm_get_locked(etpm);
mutex_unlock(&etpm->p->lock);
diff --git a/drivers/edgetpu/edgetpu-thermal.h b/drivers/edgetpu/edgetpu-thermal.h
index dbd283f..0c163e0 100644
--- a/drivers/edgetpu/edgetpu-thermal.h
+++ b/drivers/edgetpu/edgetpu-thermal.h
@@ -56,17 +56,6 @@ int edgetpu_thermal_suspend(struct device *dev);
int edgetpu_thermal_resume(struct device *dev);
/*
- * Holds thermal->lock.
- *
- * Does nothing if the thermal management is not supported.
- */
-static inline void edgetpu_thermal_lock(struct edgetpu_thermal *thermal)
-{
- if (!IS_ERR_OR_NULL(thermal))
- mutex_lock(&thermal->lock);
-}
-
-/*
* Checks whether device is thermal suspended.
* Returns false if the thermal management is not supported.
*/
@@ -77,15 +66,4 @@ static inline bool edgetpu_thermal_is_suspended(struct edgetpu_thermal *thermal)
return false;
}
-/*
- * Releases thermal->lock.
- *
- * Does nothing if the thermal management is not supported.
- */
-static inline void edgetpu_thermal_unlock(struct edgetpu_thermal *thermal)
-{
- if (!IS_ERR_OR_NULL(thermal))
- mutex_unlock(&thermal->lock);
-}
-
#endif /* __EDGETPU_THERMAL_H__ */
diff --git a/drivers/edgetpu/edgetpu-usage-stats.c b/drivers/edgetpu/edgetpu-usage-stats.c
index ba93d49..9934ca6 100644
--- a/drivers/edgetpu/edgetpu-usage-stats.c
+++ b/drivers/edgetpu/edgetpu-usage-stats.c
@@ -74,6 +74,7 @@ int edgetpu_usage_add(struct edgetpu_dev *etdev, struct tpu_usage *tpu_usage)
if (!ustats)
return 0;
+ /* Note: as of metrics v2 the cluster_id is always zero and is ignored. */
etdev_dbg(etdev, "%s: uid=%u state=%u dur=%u", __func__,
tpu_usage->uid, tpu_usage->power_state,
tpu_usage->duration_us);
@@ -125,63 +126,78 @@ static void edgetpu_utilization_update(
mutex_unlock(&ustats->usage_stats_lock);
}
-static void edgetpu_counter_update(
- struct edgetpu_dev *etdev,
- struct edgetpu_usage_counter *counter)
+static void edgetpu_counter_update(struct edgetpu_dev *etdev, struct edgetpu_usage_counter *counter,
+ uint version)
{
struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+ uint component = version > 1 ? counter->component_id : 0;
if (!ustats)
return;
- etdev_dbg(etdev, "%s: type=%d value=%llu\n", __func__,
- counter->type, counter->value);
+ etdev_dbg(etdev, "%s: type=%d value=%llu comp=%u\n", __func__, counter->type,
+ counter->value, component);
mutex_lock(&ustats->usage_stats_lock);
if (counter->type >= 0 && counter->type < EDGETPU_COUNTER_COUNT)
- ustats->counter[counter->type] += counter->value;
+ ustats->counter[counter->type][component] += counter->value;
mutex_unlock(&ustats->usage_stats_lock);
}
-static void edgetpu_counter_clear(
- struct edgetpu_dev *etdev,
- enum edgetpu_usage_counter_type counter_type)
+static void edgetpu_counter_clear(struct edgetpu_dev *etdev,
+ enum edgetpu_usage_counter_type counter_type)
{
struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+ int i;
- if (!ustats)
- return;
if (counter_type >= EDGETPU_COUNTER_COUNT)
return;
mutex_lock(&ustats->usage_stats_lock);
- ustats->counter[counter_type] = 0;
+ for (i = 0; i < EDGETPU_TPU_CLUSTER_COUNT; i++)
+ ustats->counter[counter_type][i] = 0;
mutex_unlock(&ustats->usage_stats_lock);
}
-static void edgetpu_max_watermark_update(
- struct edgetpu_dev *etdev,
- struct edgetpu_usage_max_watermark *max_watermark)
+static void edgetpu_max_watermark_update(struct edgetpu_dev *etdev,
+ struct edgetpu_usage_max_watermark *max_watermark,
+ uint version)
{
struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+ uint component = version > 1 ? max_watermark->component_id : 0;
if (!ustats)
return;
- etdev_dbg(etdev, "%s: type=%d value=%llu\n", __func__,
- max_watermark->type, max_watermark->value);
+ etdev_dbg(etdev, "%s: type=%d value=%llu comp=%u\n", __func__, max_watermark->type,
+ max_watermark->value, component);
if (max_watermark->type < 0 ||
max_watermark->type >= EDGETPU_MAX_WATERMARK_TYPE_COUNT)
return;
mutex_lock(&ustats->usage_stats_lock);
- if (max_watermark->value > ustats->max_watermark[max_watermark->type])
- ustats->max_watermark[max_watermark->type] =
+ if (max_watermark->value > ustats->max_watermark[max_watermark->type][component])
+ ustats->max_watermark[max_watermark->type][component] =
max_watermark->value;
mutex_unlock(&ustats->usage_stats_lock);
}
+static void edgetpu_max_watermark_clear(struct edgetpu_dev *etdev,
+ enum edgetpu_usage_max_watermark_type max_watermark_type)
+{
+ struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+ int i;
+
+ if (max_watermark_type < 0 || max_watermark_type >= EDGETPU_MAX_WATERMARK_TYPE_COUNT)
+ return;
+
+ mutex_lock(&ustats->usage_stats_lock);
+ for (i = 0; i < EDGETPU_TPU_CLUSTER_COUNT; i++)
+ ustats->max_watermark[max_watermark_type][i] = 0;
+ mutex_unlock(&ustats->usage_stats_lock);
+}
+
static void edgetpu_thread_stats_update(
struct edgetpu_dev *etdev,
struct edgetpu_thread_stats *thread_stats)
@@ -241,20 +257,44 @@ out:
void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf)
{
- struct edgetpu_usage_header *header = buf;
- struct edgetpu_usage_metric *metric =
- (struct edgetpu_usage_metric *)(header + 1);
+ struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+ struct edgetpu_usage_metric *metric;
+ uint metric_size;
+ uint num_metrics;
+ uint version;
int i;
- etdev_dbg(etdev, "%s: n=%u sz=%u", __func__,
- header->num_metrics, header->metric_size);
- if (header->metric_size != sizeof(struct edgetpu_usage_metric)) {
- etdev_dbg(etdev, "%s: expected sz=%zu, discard", __func__,
- sizeof(struct edgetpu_usage_metric));
+ if (!ustats)
+ return;
+
+ /* TODO(b/271372136): remove v1 when v1 firmware no longer in use. */
+ if (ustats->use_metrics_v1) {
+ struct edgetpu_usage_header_v1 *header = buf;
+
+ metric_size = header->metric_size;
+ num_metrics = header->num_metrics;
+ version = 1;
+ metric = (struct edgetpu_usage_metric *)(header + 1);
+ } else {
+ struct edgetpu_usage_header *header = buf;
+
+ metric_size = header->metric_size;
+ num_metrics = header->num_metrics;
+ version = header->version;
+ metric = (struct edgetpu_usage_metric *)((char *)header + header->header_bytes);
+ }
+
+ etdev_dbg(etdev, "%s: v=%u n=%u sz=%u", __func__, version, num_metrics, metric_size);
+ if (metric_size < EDGETPU_USAGE_METRIC_SIZE_V1) {
+ etdev_warn_once(etdev, "fw metric size %u less than minimum %u",
+ metric_size, EDGETPU_USAGE_METRIC_SIZE_V1);
return;
}
- for (i = 0; i < header->num_metrics; i++) {
+ if (metric_size > sizeof(struct edgetpu_usage_metric))
+ etdev_dbg(etdev, "fw metrics are later version with unknown fields");
+
+ for (i = 0; i < num_metrics; i++) {
switch (metric->type) {
case EDGETPU_METRIC_TYPE_TPU_USAGE:
edgetpu_usage_add(etdev, &metric->tpu_usage);
@@ -264,19 +304,16 @@ void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf)
etdev, &metric->component_activity);
break;
case EDGETPU_METRIC_TYPE_COUNTER:
- edgetpu_counter_update(etdev, &metric->counter);
+ edgetpu_counter_update(etdev, &metric->counter, version);
break;
case EDGETPU_METRIC_TYPE_MAX_WATERMARK:
- edgetpu_max_watermark_update(
- etdev, &metric->max_watermark);
+ edgetpu_max_watermark_update(etdev, &metric->max_watermark, version);
break;
case EDGETPU_METRIC_TYPE_THREAD_STATS:
- edgetpu_thread_stats_update(
- etdev, &metric->thread_stats);
+ edgetpu_thread_stats_update(etdev, &metric->thread_stats);
break;
case EDGETPU_METRIC_TYPE_DVFS_FREQUENCY_INFO:
- edgetpu_dvfs_frequency_update(
- etdev, metric->dvfs_frequency_info);
+ edgetpu_dvfs_frequency_update(etdev, metric->dvfs_frequency_info);
break;
default:
etdev_dbg(etdev, "%s: %d: skip unknown type=%u",
@@ -284,7 +321,7 @@ void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf)
break;
}
- metric++;
+ metric = (struct edgetpu_usage_metric *)((char *)metric + metric_size);
}
}
@@ -304,36 +341,72 @@ int edgetpu_usage_get_utilization(struct edgetpu_dev *etdev,
return val;
}
-static int64_t edgetpu_usage_get_counter(
- struct edgetpu_dev *etdev,
- enum edgetpu_usage_counter_type counter_type)
+/*
+ * Resyncs firmware stats and formats the requested counter in the supplied buffer.
+ *
+ * If @report_per_cluster is true, and if the firmware implements metrics V2 or higher,
+ * then one value is formatted per cluster (for chips with only one cluster only one value is
+ * formatted).
+ *
+ * Returns the number of bytes written to buf.
+ */
+static ssize_t edgetpu_usage_format_counter(struct edgetpu_dev *etdev, char *buf,
+ enum edgetpu_usage_counter_type counter_type,
+ bool report_per_cluster)
{
struct edgetpu_usage_stats *ustats = etdev->usage_stats;
- int64_t val;
+ uint ncomponents = report_per_cluster && !etdev->usage_stats->use_metrics_v1 ?
+ EDGETPU_TPU_CLUSTER_COUNT : 1;
+ uint i;
+ ssize_t ret = 0;
if (counter_type >= EDGETPU_COUNTER_COUNT)
- return -1;
+ return 0;
edgetpu_kci_update_usage(etdev);
mutex_lock(&ustats->usage_stats_lock);
- val = ustats->counter[counter_type];
+ for (i = 0; i < ncomponents; i++) {
+ if (i)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, " ");
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%llu",
+ ustats->counter[counter_type][i]);
+ }
mutex_unlock(&ustats->usage_stats_lock);
- return val;
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+ return ret;
}
-static int64_t edgetpu_usage_get_max_watermark(
- struct edgetpu_dev *etdev,
- enum edgetpu_usage_max_watermark_type max_watermark_type)
+/*
+ * Resyncs firmware stats and formats the requested max watermark in the supplied buffer.
+ *
+ * If @report_per_cluster is true, and if the firmware implements metrics V2 or higher,
+ * then one value is formatted per cluster (for chips with only one cluster only one value is
+ * formatted).
+ *
+ * Returns the number of bytes written to buf.
+ */
+static ssize_t edgetpu_usage_format_max_watermark(
+ struct edgetpu_dev *etdev, char *buf,
+ enum edgetpu_usage_max_watermark_type max_watermark_type, bool report_per_cluster)
{
struct edgetpu_usage_stats *ustats = etdev->usage_stats;
- int64_t val;
+ uint ncomponents = report_per_cluster && !etdev->usage_stats->use_metrics_v1 ?
+ EDGETPU_TPU_CLUSTER_COUNT : 1;
+ uint i;
+ ssize_t ret = 0;
if (max_watermark_type >= EDGETPU_MAX_WATERMARK_TYPE_COUNT)
- return -1;
+ return 0;
edgetpu_kci_update_usage(etdev);
mutex_lock(&ustats->usage_stats_lock);
- val = ustats->max_watermark[max_watermark_type];
+ for (i = 0; i < ncomponents; i++) {
+ if (i)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, " ");
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%llu",
+ ustats->max_watermark[max_watermark_type][i]);
+ }
mutex_unlock(&ustats->usage_stats_lock);
- return val;
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+ return ret;
}
static ssize_t tpu_usage_show(struct device *dev,
@@ -447,11 +520,8 @@ static ssize_t tpu_active_cycle_count_show(struct device *dev,
char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_counter(etdev,
- EDGETPU_COUNTER_TPU_ACTIVE_CYCLES);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_TPU_ACTIVE_CYCLES, false);
}
static ssize_t tpu_active_cycle_count_store(struct device *dev,
@@ -472,11 +542,8 @@ static ssize_t tpu_throttle_stall_count_show(struct device *dev,
char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_counter(etdev,
- EDGETPU_COUNTER_TPU_THROTTLE_STALLS);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_TPU_THROTTLE_STALLS, false);
}
static ssize_t tpu_throttle_stall_count_store(struct device *dev,
@@ -497,11 +564,8 @@ static ssize_t inference_count_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_counter(etdev,
- EDGETPU_COUNTER_INFERENCES);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_INFERENCES, true);
}
static ssize_t inference_count_store(struct device *dev,
@@ -517,21 +581,15 @@ static ssize_t inference_count_store(struct device *dev,
static DEVICE_ATTR(inference_count, 0664, inference_count_show,
inference_count_store);
-static ssize_t tpu_op_count_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t tpu_op_count_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_counter(etdev,
- EDGETPU_COUNTER_TPU_OPS);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_TPU_OPS, true);
}
-static ssize_t tpu_op_count_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
+static ssize_t tpu_op_count_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
@@ -540,22 +598,16 @@ static ssize_t tpu_op_count_store(struct device *dev,
}
static DEVICE_ATTR(tpu_op_count, 0664, tpu_op_count_show, tpu_op_count_store);
-static ssize_t param_cache_hit_count_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t param_cache_hit_count_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_counter(etdev,
- EDGETPU_COUNTER_PARAM_CACHE_HITS);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_PARAM_CACHE_HITS, false);
}
-static ssize_t param_cache_hit_count_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
+static ssize_t param_cache_hit_count_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
@@ -565,22 +617,16 @@ static ssize_t param_cache_hit_count_store(struct device *dev,
static DEVICE_ATTR(param_cache_hit_count, 0664, param_cache_hit_count_show,
param_cache_hit_count_store);
-static ssize_t param_cache_miss_count_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t param_cache_miss_count_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_counter(etdev,
- EDGETPU_COUNTER_PARAM_CACHE_MISSES);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_PARAM_CACHE_MISSES, false);
}
-static ssize_t param_cache_miss_count_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
+static ssize_t param_cache_miss_count_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
@@ -590,22 +636,16 @@ static ssize_t param_cache_miss_count_store(struct device *dev,
static DEVICE_ATTR(param_cache_miss_count, 0664, param_cache_miss_count_show,
param_cache_miss_count_store);
-static ssize_t context_preempt_count_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t context_preempt_count_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_counter(etdev,
- EDGETPU_COUNTER_CONTEXT_PREEMPTS);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_CONTEXT_PREEMPTS, true);
}
-static ssize_t context_preempt_count_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
+static ssize_t context_preempt_count_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
@@ -619,10 +659,8 @@ static ssize_t hardware_preempt_count_show(struct device *dev, struct device_att
char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_counter(etdev, EDGETPU_COUNTER_HARDWARE_PREEMPTS);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_HARDWARE_PREEMPTS, true);
}
static ssize_t hardware_preempt_count_store(struct device *dev, struct device_attribute *attr,
@@ -640,10 +678,9 @@ static ssize_t hardware_ctx_save_time_show(struct device *dev, struct device_att
char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_counter(etdev, EDGETPU_COUNTER_HARDWARE_CTX_SAVE_TIME_US);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_HARDWARE_CTX_SAVE_TIME_US,
+ true);
}
static ssize_t hardware_ctx_save_time_store(struct device *dev, struct device_attribute *attr,
@@ -661,10 +698,9 @@ static ssize_t scalar_fence_wait_time_show(struct device *dev, struct device_att
char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_counter(etdev, EDGETPU_COUNTER_SCALAR_FENCE_WAIT_TIME_US);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_SCALAR_FENCE_WAIT_TIME_US,
+ true);
}
static ssize_t scalar_fence_wait_time_store(struct device *dev, struct device_attribute *attr,
@@ -679,13 +715,11 @@ static DEVICE_ATTR(scalar_fence_wait_time, 0664, scalar_fence_wait_time_show,
scalar_fence_wait_time_store);
static ssize_t long_suspend_count_show(struct device *dev, struct device_attribute *attr,
- char *buf)
+ char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_counter(etdev, EDGETPU_COUNTER_LONG_SUSPEND);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_LONG_SUSPEND, false);
}
static ssize_t long_suspend_count_store(struct device *dev, struct device_attribute *attr,
@@ -699,15 +733,53 @@ static ssize_t long_suspend_count_store(struct device *dev, struct device_attrib
static DEVICE_ATTR(long_suspend_count, 0664, long_suspend_count_show,
long_suspend_count_store);
+#if EDGETPU_TPU_CLUSTER_COUNT > 1
+static ssize_t reconfigurations_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_RECONFIGURATIONS, false);
+}
+
+static ssize_t reconfigurations_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_RECONFIGURATIONS);
+ return count;
+}
+static DEVICE_ATTR(reconfigurations, 0664, reconfigurations_show, reconfigurations_store);
+
+static ssize_t preempt_reconfigurations_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ return edgetpu_usage_format_counter(etdev, buf, EDGETPU_COUNTER_PREEMPT_RECONFIGURATIONS,
+ false);
+}
+
+static ssize_t preempt_reconfigurations_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_PREEMPT_RECONFIGURATIONS);
+ return count;
+}
+static DEVICE_ATTR(preempt_reconfigurations, 0664, preempt_reconfigurations_show,
+ preempt_reconfigurations_store);
+#endif /* EDGETPU_TPU_CLUSTER_COUNT > 1 */
+
+
static ssize_t outstanding_commands_max_show(
struct device *dev, struct device_attribute *attr, char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_max_watermark(
- etdev, EDGETPU_MAX_WATERMARK_OUT_CMDS);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_max_watermark(etdev, buf, EDGETPU_MAX_WATERMARK_OUT_CMDS,
+ false);
}
static ssize_t outstanding_commands_max_store(
@@ -715,14 +787,8 @@ static ssize_t outstanding_commands_max_store(
const char *buf, size_t count)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- struct edgetpu_usage_stats *ustats = etdev->usage_stats;
-
- if (ustats) {
- mutex_lock(&ustats->usage_stats_lock);
- ustats->max_watermark[EDGETPU_MAX_WATERMARK_OUT_CMDS] = 0;
- mutex_unlock(&ustats->usage_stats_lock);
- }
+ edgetpu_max_watermark_clear(etdev, EDGETPU_MAX_WATERMARK_OUT_CMDS);
return count;
}
static DEVICE_ATTR(outstanding_commands_max, 0664,
@@ -733,11 +799,9 @@ static ssize_t preempt_depth_max_show(
struct device *dev, struct device_attribute *attr, char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_max_watermark(
- etdev, EDGETPU_MAX_WATERMARK_PREEMPT_DEPTH);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_max_watermark(etdev, buf, EDGETPU_MAX_WATERMARK_PREEMPT_DEPTH,
+ true);
}
static ssize_t preempt_depth_max_store(
@@ -745,14 +809,8 @@ static ssize_t preempt_depth_max_store(
const char *buf, size_t count)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- struct edgetpu_usage_stats *ustats = etdev->usage_stats;
-
- if (ustats) {
- mutex_lock(&ustats->usage_stats_lock);
- ustats->max_watermark[EDGETPU_MAX_WATERMARK_PREEMPT_DEPTH] = 0;
- mutex_unlock(&ustats->usage_stats_lock);
- }
+ edgetpu_max_watermark_clear(etdev, EDGETPU_MAX_WATERMARK_PREEMPT_DEPTH);
return count;
}
static DEVICE_ATTR(preempt_depth_max, 0664, preempt_depth_max_show,
@@ -762,11 +820,10 @@ static ssize_t hardware_ctx_save_time_max_show(
struct device *dev, struct device_attribute *attr, char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_max_watermark(
- etdev, EDGETPU_MAX_WATERMARK_HARDWARE_CTX_SAVE_TIME_US);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_max_watermark(etdev, buf,
+ EDGETPU_MAX_WATERMARK_HARDWARE_CTX_SAVE_TIME_US,
+ true);
}
static ssize_t hardware_ctx_save_time_max_store(
@@ -774,14 +831,8 @@ static ssize_t hardware_ctx_save_time_max_store(
const char *buf, size_t count)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- struct edgetpu_usage_stats *ustats = etdev->usage_stats;
-
- if (ustats) {
- mutex_lock(&ustats->usage_stats_lock);
- ustats->max_watermark[EDGETPU_MAX_WATERMARK_HARDWARE_CTX_SAVE_TIME_US] = 0;
- mutex_unlock(&ustats->usage_stats_lock);
- }
+ edgetpu_max_watermark_clear(etdev, EDGETPU_MAX_WATERMARK_HARDWARE_CTX_SAVE_TIME_US);
return count;
}
static DEVICE_ATTR(hardware_ctx_save_time_max, 0664, hardware_ctx_save_time_max_show,
@@ -791,11 +842,9 @@ static ssize_t scalar_fence_wait_time_max_show(
struct device *dev, struct device_attribute *attr, char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_max_watermark(
- etdev, EDGETPU_MAX_WATERMARK_SCALAR_FENCE_WAIT_TIME_US);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_max_watermark(
+ etdev, buf, EDGETPU_MAX_WATERMARK_SCALAR_FENCE_WAIT_TIME_US, true);
}
static ssize_t scalar_fence_wait_time_max_store(
@@ -803,14 +852,8 @@ static ssize_t scalar_fence_wait_time_max_store(
const char *buf, size_t count)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- struct edgetpu_usage_stats *ustats = etdev->usage_stats;
-
- if (ustats) {
- mutex_lock(&ustats->usage_stats_lock);
- ustats->max_watermark[EDGETPU_MAX_WATERMARK_SCALAR_FENCE_WAIT_TIME_US] = 0;
- mutex_unlock(&ustats->usage_stats_lock);
- }
+ edgetpu_max_watermark_clear(etdev, EDGETPU_MAX_WATERMARK_SCALAR_FENCE_WAIT_TIME_US);
return count;
}
static DEVICE_ATTR(scalar_fence_wait_time_max, 0664, scalar_fence_wait_time_max_show,
@@ -820,11 +863,9 @@ static ssize_t suspend_time_max_show(
struct device *dev, struct device_attribute *attr, char *buf)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- int64_t val;
- val = edgetpu_usage_get_max_watermark(
- etdev, EDGETPU_MAX_WATERMARK_SUSPEND_TIME_US);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return edgetpu_usage_format_max_watermark(etdev, buf, EDGETPU_MAX_WATERMARK_SUSPEND_TIME_US,
+ false);
}
static ssize_t suspend_time_max_store(
@@ -832,14 +873,8 @@ static ssize_t suspend_time_max_store(
const char *buf, size_t count)
{
struct edgetpu_dev *etdev = dev_get_drvdata(dev);
- struct edgetpu_usage_stats *ustats = etdev->usage_stats;
-
- if (ustats) {
- mutex_lock(&ustats->usage_stats_lock);
- ustats->max_watermark[EDGETPU_MAX_WATERMARK_SUSPEND_TIME_US] = 0;
- mutex_unlock(&ustats->usage_stats_lock);
- }
+ edgetpu_max_watermark_clear(etdev, EDGETPU_MAX_WATERMARK_SUSPEND_TIME_US);
return count;
}
static DEVICE_ATTR(suspend_time_max, 0664, suspend_time_max_show,
@@ -900,6 +935,10 @@ static struct attribute *usage_stats_dev_attrs[] = {
&dev_attr_hardware_ctx_save_time.attr,
&dev_attr_scalar_fence_wait_time.attr,
&dev_attr_long_suspend_count.attr,
+#if EDGETPU_TPU_CLUSTER_COUNT > 1
+ &dev_attr_reconfigurations.attr,
+ &dev_attr_preempt_reconfigurations.attr,
+#endif
&dev_attr_outstanding_commands_max.attr,
&dev_attr_preempt_depth_max.attr,
&dev_attr_hardware_ctx_save_time_max.attr,
@@ -912,6 +951,7 @@ static struct attribute *usage_stats_dev_attrs[] = {
static const struct attribute_group usage_stats_attr_group = {
.attrs = usage_stats_dev_attrs,
};
+
void edgetpu_usage_stats_init(struct edgetpu_dev *etdev)
{
struct edgetpu_usage_stats *ustats;
diff --git a/drivers/edgetpu/edgetpu-usage-stats.h b/drivers/edgetpu/edgetpu-usage-stats.h
index a60b107..2d97043 100644
--- a/drivers/edgetpu/edgetpu-usage-stats.h
+++ b/drivers/edgetpu/edgetpu-usage-stats.h
@@ -10,9 +10,31 @@
#include <linux/hashtable.h>
#include <linux/mutex.h>
+/* The highest version of usage metrics handled by this driver. */
+#define EDGETPU_USAGE_METRIC_VERSION 2
+
+/* Max # of TPU clusters accounted for in the highest supported metrics version. */
+#define EDGETPU_USAGE_CLUSTERS_MAX 3
+
+/*
+ * Size in bytes of usage metric v1.
+ * If fewer bytes than this are received then discard the invalid buffer.
+ * This size also identifies the fw response as v1; subsequent versions will add another field
+ * with the version number.
+ */
+#define EDGETPU_USAGE_METRIC_SIZE_V1 20
+
+/* v1 metric header struct. */
+struct edgetpu_usage_header_v1 {
+ uint32_t num_metrics; /* Number of metrics being reported */
+ uint32_t metric_size; /* Size of each metric struct */
+};
+
/* Header struct in the metric buffer. */
/* Must be kept in sync with firmware struct UsageTrackerHeader */
struct edgetpu_usage_header {
+ uint16_t header_bytes; /* Number of bytes in this header */
+ uint16_t version; /* Metrics version */
uint32_t num_metrics; /* Number of metrics being reported */
uint32_t metric_size; /* Size of each metric struct */
};
@@ -20,15 +42,25 @@ struct edgetpu_usage_header {
/*
* Encapsulate TPU core usage information of a specific application for a
* specific power state.
- * Must be kept in sync with firmware struct TpuUsage.
+ * Must be kept in sync with firmware struct CoreUsage.
*/
struct tpu_usage {
/* Unique identifier of the application. */
int32_t uid;
/* The power state of the device (values are chip dependent) */
+ /* Now called operating_point in FW. */
uint32_t power_state;
/* Duration of usage in microseconds. */
uint32_t duration_us;
+
+ /* Following fields are added in metrics v2 */
+
+ /* Compute Core: TPU cluster ID. */
+ /* Called core_id in FW. */
+ /* Note: as of metrics v2 the cluster_id is always zero and is ignored. */
+ uint8_t cluster_id;
+ /* Reserved. Filling out the next 32-bit boundary. */
+ uint8_t reserved[3];
};
/*
@@ -38,9 +70,13 @@ struct tpu_usage {
enum edgetpu_usage_component {
/* The device as a whole */
EDGETPU_USAGE_COMPONENT_DEVICE = 0,
- /* Just the TPU core */
+ /* Just the TPU core (scalar core and tiles) */
EDGETPU_USAGE_COMPONENT_TPU = 1,
- EDGETPU_USAGE_COMPONENT_COUNT = 2, /* number of components above */
+ /* Control core (ARM Cortex-R52 CPU) */
+ /* Note: this component is not reported as of metrics v2. */
+ EDGETPU_USAGE_COMPONENT_CONTROLCORE = 2,
+
+ EDGETPU_USAGE_COMPONENT_COUNT = 3, /* number of components above */
};
/*
@@ -62,7 +98,7 @@ enum edgetpu_usage_counter_type {
EDGETPU_COUNTER_TPU_ACTIVE_CYCLES = 0,
/* Number of stalls caused by throttling. */
EDGETPU_COUNTER_TPU_THROTTLE_STALLS = 1,
- /* Number of graph invocations. */
+ /* Number of graph invocations. (Now called kWorkload in FW.) */
EDGETPU_COUNTER_INFERENCES = 2,
/* Number of TPU offload op invocations. */
EDGETPU_COUNTER_TPU_OPS = 3,
@@ -81,7 +117,18 @@ enum edgetpu_usage_counter_type {
/* Number of times (firmware)suspend function takes longer than SLA time. */
EDGETPU_COUNTER_LONG_SUSPEND = 10,
- EDGETPU_COUNTER_COUNT = 11, /* number of counters above */
+ /* The following counters are added in metrics v2. */
+
+ /* Counter 11 not used on TPU. */
+ EDGETPU_COUNTER_CONTEXT_SWITCHES = 11,
+
+ /* Number of TPU Cluster Reconfigurations. */
+ EDGETPU_COUNTER_RECONFIGURATIONS = 12,
+
+ /* Number of TPU Cluster Reconfigurations motivated exclusively by a preemption. */
+ EDGETPU_COUNTER_PREEMPT_RECONFIGURATIONS = 13,
+
+ EDGETPU_COUNTER_COUNT = 14, /* number of counters above */
};
/* Generic counter. Only reported if it has a value larger than 0. */
@@ -91,6 +138,11 @@ struct __packed edgetpu_usage_counter {
/* Accumulated value since last initialization. */
uint64_t value;
+
+ /* Following fields are added in metrics v2 */
+
+ /* Reporting component. */
+ uint8_t component_id;
};
/* Defines different max watermarks we track. */
@@ -121,15 +173,22 @@ struct __packed edgetpu_usage_max_watermark {
* non-mobile, firmware boot on mobile).
*/
uint64_t value;
+
+ /* Following fields are added in metrics v2 */
+
+ /* Reporting component. */
+ uint8_t component_id;
};
/* An enum to identify the tracked firmware threads. */
/* Must be kept in sync with firmware enum class UsageTrackerThreadId. */
enum edgetpu_usage_threadid {
- /* Individual thread IDs are not tracked. */
+ /* Individual thread IDs do not have identifiers assigned. */
+
+ /* Thread ID 14 is not used for TPU */
/* Number of task identifiers. */
- EDGETPU_FW_THREAD_COUNT = 12,
+ EDGETPU_FW_THREAD_COUNT = 17,
};
/* Statistics related to a single thread in firmware. */
@@ -173,11 +232,13 @@ struct edgetpu_usage_metric {
#define UID_HASH_BITS 3
struct edgetpu_usage_stats {
+ /* if true the current firmware only implements metrics V1 */
+ bool use_metrics_v1;
DECLARE_HASHTABLE(uid_hash_table, UID_HASH_BITS);
/* component utilization values reported by firmware */
int32_t component_utilization[EDGETPU_USAGE_COMPONENT_COUNT];
- int64_t counter[EDGETPU_COUNTER_COUNT];
- int64_t max_watermark[EDGETPU_MAX_WATERMARK_TYPE_COUNT];
+ int64_t counter[EDGETPU_COUNTER_COUNT][EDGETPU_USAGE_CLUSTERS_MAX];
+ int64_t max_watermark[EDGETPU_MAX_WATERMARK_TYPE_COUNT][EDGETPU_USAGE_CLUSTERS_MAX];
int32_t thread_stack_max[EDGETPU_FW_THREAD_COUNT];
struct mutex usage_stats_lock;
};
diff --git a/drivers/edgetpu/mobile-pm.c b/drivers/edgetpu/mobile-pm.c
index 50c3866..2aafd4c 100644
--- a/drivers/edgetpu/mobile-pm.c
+++ b/drivers/edgetpu/mobile-pm.c
@@ -21,6 +21,7 @@
#include "edgetpu-mailbox.h"
#include "edgetpu-mobile-platform.h"
#include "edgetpu-pm.h"
+#include "edgetpu-thermal.h"
#include "mobile-firmware.h"
#include "mobile-pm.h"
@@ -40,6 +41,10 @@ module_param(power_state, int, 0660);
#define MAX_VOLTAGE_VAL 1250000
+#define BLOCK_DOWN_RETRY_TIMES 50
+#define BLOCK_DOWN_MIN_DELAY_US 1000
+#define BLOCK_DOWN_MAX_DELAY_US 1500
+
enum edgetpu_pwr_state edgetpu_active_states[EDGETPU_NUM_STATES] = {
TPU_ACTIVE_UUD,
TPU_ACTIVE_SUD,
@@ -434,8 +439,23 @@ static int mobile_power_up(struct edgetpu_pm *etpm)
struct edgetpu_mobile_platform_pwr *platform_pwr = &etmdev->platform_pwr;
int ret;
- if (platform_pwr->is_block_down && !platform_pwr->is_block_down(etdev))
+ if (platform_pwr->is_block_down) {
+ int times = 0;
+
+ do {
+ if (platform_pwr->is_block_down(etdev))
+ break;
+ usleep_range(BLOCK_DOWN_MIN_DELAY_US, BLOCK_DOWN_MAX_DELAY_US);
+ } while (++times < BLOCK_DOWN_RETRY_TIMES);
+ if (times >= BLOCK_DOWN_RETRY_TIMES && !platform_pwr->is_block_down(etdev))
+ return -EAGAIN;
+ }
+
+ if (edgetpu_thermal_is_suspended(etdev->thermal)) {
+ etdev_warn_ratelimited(etdev,
+ "power up rejected due to device thermal limit exceeded");
return -EAGAIN;
+ }
ret = mobile_pwr_state_set(etpm->etdev, mobile_get_initial_pwr_state(etdev->dev));