diff options
author | Nrithya Kanakasabapathy <nrithya@google.com> | 2021-01-12 18:25:59 -0800 |
---|---|---|
committer | Nrithya Kanakasabapathy <nrithya@google.com> | 2021-01-12 18:25:59 -0800 |
commit | e0d2f4a867dba26c3cade6e25f2b1b61700c0978 (patch) | |
tree | f3cf1dccec60026ee31640fb19649bd2fc6d5527 /drivers | |
parent | 411a6d8e49545ea18b1c2734de51d103d25b4d52 (diff) | |
download | abrolhos-e0d2f4a867dba26c3cade6e25f2b1b61700c0978.tar.gz |
Merge branch 'whitechapel' into android-gs-pixel-mainline
* whitechapel: (30 commits)
edgetpu: use pin_user_pages_fast
edgetpu: add mm-backport.h
scripts: update checkpatch.pl to v5.10 version
edgetpu: add API for immediate watchdog bite
edgetpu: replace groups array with list
edgetpu: add flag requesting full chip reset
edgetpu: create unittest for edgetpu usage stats
edgetpu: make edgetpu_debug_dump_work static
edgetpu: process usage stats from firmware
edgetpu: Create a sysfs for usage stats
edgetpu: abrolhos: Handles debug dump in a work queue
edgetpu: abrolhos: Sets up infrastructure to get dump information
edgetpu: added config-specific debug dump functions
edgetpu: KCI usage stats retrieve fix error check
...
Signed-off-by: Nrithya Kanakasabapathy <nrithya@google.com>
Change-Id: I56782e427a289e5c8126f762c043c6171c67bcc4
Diffstat (limited to 'drivers')
30 files changed, 863 insertions, 305 deletions
diff --git a/drivers/edgetpu/Kbuild b/drivers/edgetpu/Kbuild index da88973..5361fa3 100644 --- a/drivers/edgetpu/Kbuild +++ b/drivers/edgetpu/Kbuild @@ -12,7 +12,7 @@ endif edgetpu-fw-objs := edgetpu-firmware.o edgetpu-firmware-util.o edgetpu-shared-fw.o edgetpu-objs := edgetpu-mailbox.o edgetpu-kci.o edgetpu-telemetry.o edgetpu-mapping.o edgetpu-dmabuf.o edgetpu-async.o edgetpu-iremap-pool.o edgetpu-sw-watchdog.o $(edgetpu-fw-objs) -abrolhos-y := abrolhos-device.o abrolhos-device-group.o abrolhos-fs.o abrolhos-core.o abrolhos-platform.o abrolhos-iommu.o abrolhos-firmware.o abrolhos-thermal.o abrolhos-pm.o abrolhos-debug-dump.o $(edgetpu-objs) +abrolhos-y := abrolhos-device.o abrolhos-device-group.o abrolhos-fs.o abrolhos-core.o abrolhos-platform.o abrolhos-iommu.o abrolhos-firmware.o abrolhos-thermal.o abrolhos-pm.o abrolhos-debug-dump.o abrolhos-usage-stats.o $(edgetpu-objs) CFLAGS_abrolhos-fs.o := -DCONFIG_ABROLHOS=1 CFLAGS_abrolhos-core.o := -DCONFIG_ABROLHOS=1 CFLAGS_abrolhos-device.o := -DCONFIG_ABROLHOS=1 @@ -23,3 +23,4 @@ CFLAGS_abrolhos-pm.o := -DCONFIG_ABROLHOS=1 CFLAGS_abrolhos-thermal.o := -DCONFIG_ABROLHOS=1 CFLAGS_abrolhos-iommu.o := -DCONFIG_ABROLHOS=1 CFLAGS_abrolhos-debug-dump.o := -DCONFIG_ABROLHOS=1 +CFLAGS_abrolhos-usage-stats.o := -DCONFIG_ABROLHOS=1 diff --git a/drivers/edgetpu/Makefile b/drivers/edgetpu/Makefile index b85b8fc..0391dcf 100644 --- a/drivers/edgetpu/Makefile +++ b/drivers/edgetpu/Makefile @@ -16,7 +16,7 @@ endif edgetpu-fw-objs := edgetpu-firmware-util.o edgetpu-shared-fw.o edgetpu-firmware.o edgetpu-objs := edgetpu-core.o edgetpu-mailbox.o edgetpu-kci.o edgetpu-device-group.o edgetpu-telemetry.o edgetpu-mapping.o edgetpu-dmabuf.o edgetpu-async.o edgetpu-iremap-pool.o edgetpu-sw-watchdog.o $(edgetpu-fw-objs) -abrolhos-objs := abrolhos-device.o abrolhos-firmware.o edgetpu-fs.o abrolhos-platform.o abrolhos-iommu.o abrolhos-thermal.o abrolhos-pm.o abrolhos-debug-dump.o $(edgetpu-objs) +abrolhos-objs := abrolhos-device.o abrolhos-firmware.o edgetpu-fs.o abrolhos-platform.o abrolhos-iommu.o abrolhos-thermal.o abrolhos-pm.o abrolhos-debug-dump.o abrolhos-usage-stats.o $(edgetpu-objs) KBUILD_OPTIONS += CONFIG_ABROLHOS=m diff --git a/drivers/edgetpu/abrolhos-debug-dump.c b/drivers/edgetpu/abrolhos-debug-dump.c index d39e674..cdc57e2 100644 --- a/drivers/edgetpu/abrolhos-debug-dump.c +++ b/drivers/edgetpu/abrolhos-debug-dump.c @@ -1,2 +1,136 @@ // SPDX-License-Identifier: GPL-2.0 + +#include <linux/platform_data/sscoredump.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include "abrolhos-platform.h" + #include "edgetpu-debug-dump.c" + +static int abrolhos_sscd_generate_coredump(void *p_etdev, void *p_dump_setup) +{ + struct edgetpu_dev *etdev; + struct edgetpu_debug_dump_setup *dump_setup; + struct abrolhos_platform_dev *pdev; + struct sscd_platform_data *pdata; + struct platform_device *sscd_dev; + struct sscd_segment *segs; + struct edgetpu_debug_dump *debug_dump; + struct edgetpu_crash_reason *crash_reason; + struct edgetpu_dump_segment *dump_seg; + char crash_info[128]; + int dump_segments_num; + int i, ret; + u64 offset; + + if (!p_etdev || !p_dump_setup) + return -EINVAL; + + etdev = (struct edgetpu_dev *)p_etdev; + dump_setup = (struct edgetpu_debug_dump_setup *)p_dump_setup; + pdev = container_of(etdev, struct abrolhos_platform_dev, edgetpu_dev); + pdata = (struct sscd_platform_data *)pdev->sscd_info.pdata; + sscd_dev = (struct platform_device *)pdev->sscd_info.dev; + if (!pdata->sscd_report) { + etdev_err(etdev, "failed to generate coredump"); + return -ENOENT; + } + + offset = sizeof(struct edgetpu_debug_dump_setup); + debug_dump = (struct edgetpu_debug_dump *)((u64 *)dump_setup + + word_align_offset(offset)); + + /* Populate crash reason */ + crash_reason = (struct edgetpu_crash_reason *)((u64 *)dump_setup + + word_align_offset(debug_dump->crash_reason_offset)); + scnprintf(crash_info, sizeof(crash_info), + "[edgetpu_coredump] error code: 0x%llx", crash_reason->code); + + /* Populate dump segments */ + dump_segments_num = debug_dump->dump_segments_num; + segs = kmalloc_array(dump_segments_num, + sizeof(struct sscd_segment), + GFP_KERNEL); + if (!segs) + return -ENOMEM; + + dump_seg = (struct edgetpu_dump_segment *)((u64 *)dump_setup + + word_align_offset(debug_dump->dump_segments_offset)); + offset = debug_dump->dump_segments_offset + + sizeof(struct edgetpu_dump_segment); + for (i = 0; i < dump_segments_num; i++) { + segs[i].addr = &dump_seg[i].src_addr + 1; + segs[i].size = dump_seg[i].size; + segs[i].paddr = (void *)(etdev->debug_dump_mem.tpu_addr + + offset); + segs[i].vaddr = (void *)(etdev->debug_dump_mem.vaddr + + offset); + offset += sizeof(struct edgetpu_dump_segment) + dump_seg->size; + dump_seg = (struct edgetpu_dump_segment *) + ((u64 *)dump_seg + word_align_offset( + sizeof(struct edgetpu_dump_segment) + + dump_seg->size)); + } + + /* Pass dump data to SSCD daemon */ + etdev_dbg(etdev, "report: %d segments", dump_segments_num); + ret = pdata->sscd_report(sscd_dev, segs, dump_segments_num, + SSCD_FLAGS_ELFARM64HDR, crash_info); + + kfree(segs); + + return ret; +} + +int edgetpu_debug_dump_init(struct edgetpu_dev *etdev) +{ + size_t size; + int ret; + struct edgetpu_debug_dump_setup *dump_setup; + + size = EDGETPU_DEBUG_DUMP_MEM_SIZE; + + /* + * Allocate buffers for various dump segments and map them to FW + * accessible regions + */ + ret = edgetpu_iremap_alloc(etdev, size, &etdev->debug_dump_mem, + EDGETPU_CONTEXT_KCI); + if (ret) { + etdev_err(etdev, "Debug dump seg alloc failed"); + etdev->debug_dump_mem.vaddr = NULL; + return ret; + } + dump_setup = + (struct edgetpu_debug_dump_setup *)etdev->debug_dump_mem.vaddr; + dump_setup->dump_mem_size = size; + memset(dump_setup, 0, dump_setup->dump_mem_size); + + /* + * Allocate memory for debug dump handlers + */ + etdev->debug_dump_handlers = kcalloc(DUMP_REQ_REASON_NUM, + sizeof(*etdev->debug_dump_handlers), + GFP_KERNEL); + if (!etdev->debug_dump_handlers) + return -ENOMEM; + etdev->debug_dump_handlers[DUMP_REQ_REASON_BY_USER] = + abrolhos_sscd_generate_coredump; + + return ret; +} + +void edgetpu_debug_dump_exit(struct edgetpu_dev *etdev) +{ + if (!etdev->debug_dump_mem.vaddr) { + etdev_dbg(etdev, "Debug dump not allocated"); + return; + } + /* + * Free the memory assigned for debug dump + */ + edgetpu_iremap_free(etdev, &etdev->debug_dump_mem, + EDGETPU_CONTEXT_KCI); + kfree(etdev->debug_dump_handlers); +} diff --git a/drivers/edgetpu/abrolhos-debug-dump.h b/drivers/edgetpu/abrolhos-debug-dump.h new file mode 100644 index 0000000..62ef111 --- /dev/null +++ b/drivers/edgetpu/abrolhos-debug-dump.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Module that defines structure to retrieve debug dump segments + * from abrolhos firmware. + * + * Copyright (C) 2020 Google, Inc. + */ +#ifndef __ABROLHOS_DEBUG_DUMP_H__ +#define __ABROLHOS_DEBUG_DUMP_H__ + +struct abrolhos_sscd_info { + void *pdata; /* SSCD platform data */ + void *dev; /* SSCD platform device */ +}; + +#endif /* ABROLHOS_DEBUG_DUMP_H_ */ diff --git a/drivers/edgetpu/abrolhos-device.c b/drivers/edgetpu/abrolhos-device.c index f6a0eaf..ed1e7d5 100644 --- a/drivers/edgetpu/abrolhos-device.c +++ b/drivers/edgetpu/abrolhos-device.c @@ -84,8 +84,7 @@ u64 edgetpu_chip_tpu_timestamp(struct edgetpu_dev *etdev) void edgetpu_chip_init(struct edgetpu_dev *etdev) { int i; - struct edgetpu_platform_dev *etpdev = container_of( - etdev, struct edgetpu_platform_dev, edgetpu_dev); + struct abrolhos_platform_dev *etpdev = to_abrolhos_dev(etdev); /* Disable the CustomBlock Interrupt. */ edgetpu_dev_write_32(etdev, HOST_NONSECURE_INTRSRCMASKREG, 0x1); diff --git a/drivers/edgetpu/abrolhos-firmware.c b/drivers/edgetpu/abrolhos-firmware.c index 18a7671..2c9b87e 100644 --- a/drivers/edgetpu/abrolhos-firmware.c +++ b/drivers/edgetpu/abrolhos-firmware.c @@ -22,8 +22,8 @@ static int abrolhos_firmware_alloc_buffer( struct edgetpu_firmware_buffer *fw_buf) { struct edgetpu_dev *etdev = et_fw->etdev; - struct edgetpu_platform_dev *edgetpu_pdev = - container_of(etdev, struct edgetpu_platform_dev, edgetpu_dev); + struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev); + /* Allocate extra space the image header */ size_t buffer_size = edgetpu_pdev->fw_region_size + ABROLHOS_FW_HEADER_SIZE; @@ -68,8 +68,7 @@ static int abrolhos_firmware_prepare_run(struct edgetpu_firmware *et_fw, struct edgetpu_firmware_buffer *fw_buf) { struct edgetpu_dev *etdev = et_fw->etdev; - struct edgetpu_platform_dev *edgetpu_pdev = - container_of(etdev, struct edgetpu_platform_dev, edgetpu_dev); + struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev); void *image_vaddr, *header_vaddr; struct abrolhos_image_config *image_config; phys_addr_t image_start, image_end, carveout_start, carveout_end; diff --git a/drivers/edgetpu/abrolhos-iommu.c b/drivers/edgetpu/abrolhos-iommu.c index 58ca89c..d776a63 100644 --- a/drivers/edgetpu/abrolhos-iommu.c +++ b/drivers/edgetpu/abrolhos-iommu.c @@ -186,7 +186,7 @@ out: /* mmu_info is unused and NULL for IOMMU version, let IOMMU API supply info */ int edgetpu_mmu_attach(struct edgetpu_dev *etdev, void *mmu_info) { - struct edgetpu_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev); + struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev); struct edgetpu_iommu *etiommu; int ret; @@ -247,7 +247,7 @@ void edgetpu_mmu_reset(struct edgetpu_dev *etdev) void edgetpu_mmu_detach(struct edgetpu_dev *etdev) { - struct edgetpu_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev); + struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev); struct edgetpu_iommu *etiommu = etdev->mmu_cookie; int i, ret; diff --git a/drivers/edgetpu/abrolhos-platform.c b/drivers/edgetpu/abrolhos-platform.c index 1c2e90d..d04c0ae 100644 --- a/drivers/edgetpu/abrolhos-platform.c +++ b/drivers/edgetpu/abrolhos-platform.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Platform device driver for the Google Edge TPU ML accelerator. + * Abrolhos device driver for the Google EdgeTPU ML accelerator. * * Copyright (C) 2019 Google, Inc. */ @@ -30,29 +30,26 @@ #include "edgetpu-mmu.h" #include "edgetpu-telemetry.h" -#define MAX_SEGS 1 - static const struct of_device_id edgetpu_of_match[] = { { .compatible = "google,darwinn", }, { /* end of list */ }, }; MODULE_DEVICE_TABLE(of, edgetpu_of_match); -static void edgetpu_sscd_release(struct device *dev) +static void sscd_release(struct device *dev) { pr_debug(DRIVER_NAME " release\n"); } -static struct sscd_platform_data edgetpu_sscd_pdata; -static struct platform_device edgetpu_sscd_dev = { +static struct sscd_platform_data sscd_pdata; +static struct platform_device sscd_dev = { .name = DRIVER_NAME, .driver_override = SSCD_NAME, .id = -1, .dev = { - .platform_data = &edgetpu_sscd_pdata, - .release = edgetpu_sscd_release, + .platform_data = &sscd_pdata, + .release = sscd_release, }, }; - /* * Log and trace buffers at the beginning of the remapped region, * pool memory afterwards. @@ -60,7 +57,7 @@ static struct platform_device edgetpu_sscd_dev = { #define EDGETPU_POOL_MEM_OFFSET (EDGETPU_TELEMETRY_BUFFER_SIZE * 2) -static void abrolhos_get_telemetry_mem(struct edgetpu_platform_dev *etpdev, +static void abrolhos_get_telemetry_mem(struct abrolhos_platform_dev *etpdev, enum edgetpu_telemetry_type type, struct edgetpu_coherent_mem *mem) { @@ -75,7 +72,8 @@ static void abrolhos_get_telemetry_mem(struct edgetpu_platform_dev *etpdev, } /* Setup the firmware region carveout. */ -static int edgetpu_platform_setup_fw_region(struct edgetpu_platform_dev *etpdev) +static int +edgetpu_platform_setup_fw_region(struct abrolhos_platform_dev *etpdev) { struct edgetpu_dev *etdev = &etpdev->edgetpu_dev; struct platform_device *gsa_pdev; @@ -163,7 +161,7 @@ out_unmap: } static void edgetpu_platform_cleanup_fw_region( - struct edgetpu_platform_dev *etpdev) + struct abrolhos_platform_dev *etpdev) { gsa_unload_tpu_fw_image(etpdev->gsa_dev); @@ -183,68 +181,7 @@ void edgetpu_setup_mmu(struct edgetpu_dev *etdev) dev_warn(etdev->dev, "failed to attach IOMMU: %d\n", ret); } -static int edgetpu_sscd_generate_coredump(void) -{ - struct sscd_platform_data *pdata = &edgetpu_sscd_pdata; - static struct sscd_segment segs[MAX_SEGS]; - char msg[128]; - int cnt; - - if (!pdata->sscd_report) { - pr_err(DRIVER_NAME " failed to generate coredump\n"); - return -1; - } - - /* - * TODO (b/156049774): - * Replace with dump information when it's available - */ - cnt = scnprintf(msg, sizeof(msg), "HELLO TPU!"); - segs[0].addr = (void *)&msg; - segs[0].size = cnt; - - pr_debug(DRIVER_NAME " report: %d segments", MAX_SEGS); - return pdata->sscd_report(&edgetpu_sscd_dev, segs, MAX_SEGS, - 0, "edgetpu_coredump"); -} - -static ssize_t edgetpu_coredump_store(struct file *filep, - const char __user *ubuf, size_t size, loff_t *offp) -{ - int generate_coredump, ret; - - ret = kstrtoint_from_user(ubuf, size, 0, &generate_coredump); - if (ret) - return ret; - if (generate_coredump) { - ret = edgetpu_sscd_generate_coredump(); - if (ret) { - pr_err(DRIVER_NAME " failed to generate coredump: %d\n", - ret); - return ret; - } - } - - return size; -}; - -static const struct file_operations coredump_ops = { - .owner = THIS_MODULE, - .write = edgetpu_coredump_store, -}; - -static void edgetpu_sscd_init(struct edgetpu_dev *etdev) -{ - /* - * TODO (b/156049774): - * Remove debugfs file after dump information is available and - * edgetpu_sscd_generate_coredump is triggered by a crash - */ - debugfs_create_file("coredump", 0220, etdev->d_entry, etdev, - &coredump_ops); -} - -static int abrolhos_parse_ssmt(struct edgetpu_platform_dev *etpdev) +static int abrolhos_parse_ssmt(struct abrolhos_platform_dev *etpdev) { struct edgetpu_dev *etdev = &etpdev->edgetpu_dev; struct platform_device *pdev = to_platform_device(etdev->dev); @@ -270,13 +207,12 @@ static int abrolhos_parse_ssmt(struct edgetpu_platform_dev *etpdev) static int edgetpu_platform_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct edgetpu_platform_dev *edgetpu_pdev; + struct abrolhos_platform_dev *edgetpu_pdev; struct resource *r; struct edgetpu_mapped_resource regs; int ret; - edgetpu_pdev = - devm_kzalloc(dev, sizeof(*edgetpu_pdev), GFP_KERNEL); + edgetpu_pdev = devm_kzalloc(dev, sizeof(*edgetpu_pdev), GFP_KERNEL); if (!edgetpu_pdev) return -ENOMEM; @@ -379,8 +315,6 @@ static int edgetpu_platform_probe(struct platform_device *pdev) dev_dbg(dev, "Creating thermal device\n"); edgetpu_pdev->edgetpu_dev.thermal = devm_tpu_thermal_create(dev); - edgetpu_sscd_init(&edgetpu_pdev->edgetpu_dev); - dev_info(dev, "%s edgetpu initialized. Build: %s\n", edgetpu_pdev->edgetpu_dev.dev_name, GIT_REPO_TAG); @@ -388,6 +322,9 @@ static int edgetpu_platform_probe(struct platform_device *pdev) /* Turn the device off unless a client request is already received. */ edgetpu_pm_shutdown(&edgetpu_pdev->edgetpu_dev, false); + edgetpu_pdev->sscd_info.pdata = &sscd_pdata; + edgetpu_pdev->sscd_info.dev = &sscd_dev; + return ret; out_tel_exit: edgetpu_telemetry_exit(&edgetpu_pdev->edgetpu_dev); @@ -406,8 +343,7 @@ out_shutdown: static int edgetpu_platform_remove(struct platform_device *pdev) { struct edgetpu_dev *etdev = platform_get_drvdata(pdev); - struct edgetpu_platform_dev *edgetpu_pdev = container_of( - etdev, struct edgetpu_platform_dev, edgetpu_dev); + struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev); abrolhos_edgetpu_firmware_destroy(etdev); if (edgetpu_pdev->irq >= 0) @@ -442,7 +378,7 @@ static int __init edgetpu_platform_init(void) return ret; /* Register SSCD platform device */ - ret = platform_device_register(&edgetpu_sscd_dev); + ret = platform_device_register(&sscd_dev); if (ret) pr_err(DRIVER_NAME " SSCD platform device registration failed: %d\n", ret); @@ -452,11 +388,11 @@ static int __init edgetpu_platform_init(void) static void __exit edgetpu_platform_exit(void) { platform_driver_unregister(&edgetpu_platform_driver); - platform_device_unregister(&edgetpu_sscd_dev); + platform_device_unregister(&sscd_dev); edgetpu_exit(); } -MODULE_DESCRIPTION("Google Edge TPU platform driver"); +MODULE_DESCRIPTION("Google EdgeTPU platform driver"); MODULE_LICENSE("GPL v2"); module_init(edgetpu_platform_init); module_exit(edgetpu_platform_exit); diff --git a/drivers/edgetpu/abrolhos-platform.h b/drivers/edgetpu/abrolhos-platform.h index a9eca06..ecd3742 100644 --- a/drivers/edgetpu/abrolhos-platform.h +++ b/drivers/edgetpu/abrolhos-platform.h @@ -1,29 +1,30 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Platform device driver for the Google Edge TPU ML accelerator. + * Abrolhos device driver for the Google EdgeTPU ML accelerator. * * Copyright (C) 2019 Google, Inc. */ -#ifndef __EDGETPU_PLATFORM_H__ -#define __EDGETPU_PLATFORM_H__ +#ifndef __ABROLHOS_PLATFORM_H__ +#define __ABROLHOS_PLATFORM_H__ #include <linux/device.h> #include <linux/io.h> #include <linux/kernel.h> #include <linux/types.h> -#include "edgetpu-internal.h" +#include "abrolhos-debug-dump.h" #include "abrolhos-pm.h" +#include "edgetpu-internal.h" #define to_abrolhos_dev(etdev) \ - container_of(etdev, struct edgetpu_platform_dev, edgetpu_dev) + container_of(etdev, struct abrolhos_platform_dev, edgetpu_dev) struct edgetpu_platform_pwr { struct mutex policy_lock; enum tpu_pwr_state curr_policy; }; -struct edgetpu_platform_dev { +struct abrolhos_platform_dev { struct edgetpu_dev edgetpu_dev; struct edgetpu_platform_pwr platform_pwr; int irq; @@ -39,6 +40,7 @@ struct edgetpu_platform_dev { void __iomem *ssmt_base; struct edgetpu_coherent_mem log_mem; struct edgetpu_coherent_mem trace_mem; + struct abrolhos_sscd_info sscd_info; }; -#endif /* __EDGETPU_PLATFORM_H__ */ +#endif /* __ABROLHOS_PLATFORM_H__ */ diff --git a/drivers/edgetpu/abrolhos-pm.c b/drivers/edgetpu/abrolhos-pm.c index 6dd9ea4..04b42b5 100644 --- a/drivers/edgetpu/abrolhos-pm.c +++ b/drivers/edgetpu/abrolhos-pm.c @@ -107,8 +107,7 @@ static int abrolhos_pwr_state_get(void *data, u64 *val) static int abrolhos_pwr_policy_set(void *data, u64 val) { - struct edgetpu_platform_dev *edgetpu_pdev = - (struct edgetpu_platform_dev *)data; + struct abrolhos_platform_dev *edgetpu_pdev = (typeof(edgetpu_pdev))data; struct edgetpu_platform_pwr *platform_pwr = &edgetpu_pdev->platform_pwr; int ret; @@ -129,8 +128,7 @@ static int abrolhos_pwr_policy_set(void *data, u64 val) static int abrolhos_pwr_policy_get(void *data, u64 *val) { - struct edgetpu_platform_dev *edgetpu_pdev = - (struct edgetpu_platform_dev *)data; + struct abrolhos_platform_dev *edgetpu_pdev = (typeof(edgetpu_pdev))data; struct edgetpu_platform_pwr *platform_pwr = &edgetpu_pdev->platform_pwr; mutex_lock(&platform_pwr->policy_lock); @@ -365,8 +363,7 @@ static void abrolhos_power_down(struct edgetpu_pm *etpm); static int abrolhos_power_up(struct edgetpu_pm *etpm) { struct edgetpu_dev *etdev = etpm->etdev; - struct edgetpu_platform_dev *edgetpu_pdev = container_of( - etpm->etdev, struct edgetpu_platform_dev, edgetpu_dev); + struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev); struct device *dev = etdev->dev; int ret = abrolhos_pwr_state_set(dev, abrolhos_get_initial_pwr_state(dev)); @@ -438,7 +435,7 @@ static int abrolhos_power_up(struct edgetpu_pm *etpm) static void abrolhos_pm_shutdown_firmware(struct edgetpu_dev *etdev, - struct edgetpu_platform_dev *edgetpu_pdev) + struct abrolhos_platform_dev *edgetpu_pdev) { if (!edgetpu_pchannel_power_down(etdev, false)) return; @@ -467,42 +464,41 @@ abrolhos_pm_shutdown_firmware(struct edgetpu_dev *etdev, static void abrolhos_power_down(struct edgetpu_pm *etpm) { - struct edgetpu_platform_dev *edgetpu_pdev = container_of( - etpm->etdev, struct edgetpu_platform_dev, edgetpu_dev); + struct edgetpu_dev *etdev = etpm->etdev; + struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev); u64 val; int res; - etdev_info(etpm->etdev, "Powering down\n"); + etdev_info(etdev, "Powering down\n"); - if (abrolhos_pwr_state_get(etpm->etdev->dev, &val)) { - etdev_warn(etpm->etdev, "Failed to read current power state\n"); + if (abrolhos_pwr_state_get(etdev->dev, &val)) { + etdev_warn(etdev, "Failed to read current power state\n"); val = TPU_ACTIVE_NOM; } if (val == TPU_OFF) { - etdev_dbg(etpm->etdev, - "Device already off, skipping shutdown\n"); + etdev_dbg(etdev, "Device already off, skipping shutdown\n"); return; } - if (etpm->etdev->kci && - edgetpu_firmware_status_locked(etpm->etdev) == FW_VALID) { - abrolhos_pm_shutdown_firmware(etpm->etdev, edgetpu_pdev); - cancel_work_sync(&etpm->etdev->kci->work); + if (etdev->kci && edgetpu_firmware_status_locked(etdev) == FW_VALID) { + /* Update usage stats before we power off fw. */ + edgetpu_kci_update_usage(etdev); + abrolhos_pm_shutdown_firmware(etdev, edgetpu_pdev); + cancel_work_sync(&etdev->kci->work); } res = gsa_send_tpu_cmd(edgetpu_pdev->gsa_dev, GSA_TPU_SHUTDOWN); if (res < 0) - etdev_warn(etpm->etdev, "GSA shutdown request failed (%d)\n", - res); - abrolhos_pwr_state_set(etpm->etdev->dev, TPU_OFF); + etdev_warn(etdev, "GSA shutdown request failed (%d)\n", res); + abrolhos_pwr_state_set(etdev->dev, TPU_OFF); } static int abrolhos_pm_after_create(struct edgetpu_pm *etpm) { int ret; - struct device *dev = etpm->etdev->dev; - struct edgetpu_platform_dev *edgetpu_pdev = container_of( - etpm->etdev, struct edgetpu_platform_dev, edgetpu_dev); + struct edgetpu_dev *etdev = etpm->etdev; + struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev); + struct device *dev = etdev->dev; ret = abrolhos_pwr_state_init(dev); if (ret) @@ -515,28 +511,33 @@ static int abrolhos_pm_after_create(struct edgetpu_pm *etpm) mutex_init(&edgetpu_pdev->platform_pwr.policy_lock); abrolhos_pwr_debugfs_dir = debugfs_create_dir("power", edgetpu_fs_debugfs_dir()); - debugfs_create_file("state", 0660, abrolhos_pwr_debugfs_dir, - dev, &fops_tpu_pwr_state); - debugfs_create_file("vdd_tpu", 0660, abrolhos_pwr_debugfs_dir, - dev, &fops_tpu_vdd_tpu); - debugfs_create_file("vdd_tpu_m", 0660, abrolhos_pwr_debugfs_dir, - dev, &fops_tpu_vdd_tpu_m); - debugfs_create_file("vdd_int_m", 0660, abrolhos_pwr_debugfs_dir, - dev, &fops_tpu_vdd_int_m); - debugfs_create_file("core_rate", 0660, abrolhos_pwr_debugfs_dir, - dev, &fops_tpu_core_rate); - debugfs_create_file("ctl_rate", 0660, abrolhos_pwr_debugfs_dir, - dev, &fops_tpu_ctl_rate); - debugfs_create_file("axi_rate", 0660, abrolhos_pwr_debugfs_dir, - dev, &fops_tpu_axi_rate); - debugfs_create_file("apb_rate", 0440, abrolhos_pwr_debugfs_dir, - dev, &fops_tpu_apb_rate); - debugfs_create_file("uart_rate", 0440, abrolhos_pwr_debugfs_dir, - dev, &fops_tpu_uart_rate); + if (!abrolhos_pwr_debugfs_dir) { + etdev_warn(etdev, "Failed to create debug FS power"); + /* don't fail the procedure on debug FS creation fails */ + return 0; + } + debugfs_create_file("state", 0660, abrolhos_pwr_debugfs_dir, dev, + &fops_tpu_pwr_state); + debugfs_create_file("vdd_tpu", 0660, abrolhos_pwr_debugfs_dir, dev, + &fops_tpu_vdd_tpu); + debugfs_create_file("vdd_tpu_m", 0660, abrolhos_pwr_debugfs_dir, dev, + &fops_tpu_vdd_tpu_m); + debugfs_create_file("vdd_int_m", 0660, abrolhos_pwr_debugfs_dir, dev, + &fops_tpu_vdd_int_m); + debugfs_create_file("core_rate", 0660, abrolhos_pwr_debugfs_dir, dev, + &fops_tpu_core_rate); + debugfs_create_file("ctl_rate", 0660, abrolhos_pwr_debugfs_dir, dev, + &fops_tpu_ctl_rate); + debugfs_create_file("axi_rate", 0660, abrolhos_pwr_debugfs_dir, dev, + &fops_tpu_axi_rate); + debugfs_create_file("apb_rate", 0440, abrolhos_pwr_debugfs_dir, dev, + &fops_tpu_apb_rate); + debugfs_create_file("uart_rate", 0440, abrolhos_pwr_debugfs_dir, dev, + &fops_tpu_uart_rate); debugfs_create_file("policy", 0660, abrolhos_pwr_debugfs_dir, - edgetpu_pdev, &fops_tpu_pwr_policy); + edgetpu_pdev, &fops_tpu_pwr_policy); debugfs_create_file("core_pwr", 0660, abrolhos_pwr_debugfs_dir, - edgetpu_pdev, &fops_tpu_core_pwr); + edgetpu_pdev, &fops_tpu_core_pwr); return 0; } diff --git a/drivers/edgetpu/abrolhos-thermal.c b/drivers/edgetpu/abrolhos-thermal.c index 62f10d2..efb5620 100644 --- a/drivers/edgetpu/abrolhos-thermal.c +++ b/drivers/edgetpu/abrolhos-thermal.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Edge TPU thermal driver for Abrolhos. + * EdgeTPU thermal driver for Abrolhos. * * Copyright (C) 2020 Google, Inc. */ @@ -54,15 +54,6 @@ static const struct edgetpu_state_pwr state_pwr_map[] = { { TPU_OFF, 0 }, }; -#define find_state_pwr(i, cmp_left, cmp_right, list, out_left, out_right) \ - do { \ - if (cmp_left == cmp_right) { \ - out_left = out_right; \ - return 0; \ - } \ - i++; \ - } while (i < ARRAY_SIZE(list)) - static int edgetpu_get_max_state(struct thermal_cooling_device *cdev, unsigned long *state) { @@ -70,9 +61,8 @@ static int edgetpu_get_max_state(struct thermal_cooling_device *cdev, return 0; } -/* Set cooling state - * Re-using code from abrohlos-platform. - * TODO: move to external call +/* + * Set cooling state. */ static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state_original) @@ -81,7 +71,7 @@ static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev, struct edgetpu_thermal *cooling = cdev->devdata; struct device *dev = cooling->dev; - if (WARN_ON(state_original >= ARRAY_SIZE(state_mapping))) { + if (state_original >= ARRAY_SIZE(state_mapping)) { dev_err(dev, "%s: invalid cooling state %lu\n", __func__, state_original); return -EINVAL; @@ -96,18 +86,21 @@ static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev, */ #if 0 ret = exynos_acpm_set_policy(TPU_ACPM_DOMAIN, pwr_state); -#endif +#else ret = 0; +#endif if (ret) { dev_err(dev, "error setting tpu policy: %d\n", ret); - mutex_unlock(&cooling->lock); - return ret; + goto out; } cooling->cooling_state = state_original; + } else { + ret = -EALREADY; } +out: mutex_unlock(&cooling->lock); - return 0; + return ret; } static int edgetpu_get_cur_state(struct thermal_cooling_device *cdev, @@ -118,12 +111,14 @@ static int edgetpu_get_cur_state(struct thermal_cooling_device *cdev, *state = cooling->cooling_state; if (*state >= ARRAY_SIZE(state_mapping)) { - dev_warn(cooling->dev, "Unknown cooling state: %lu, resetting\n", *state); + dev_warn(cooling->dev, + "Unknown cooling state: %lu, resetting\n", *state); mutex_lock(&cooling->lock); ret = exynos_acpm_set_policy(TPU_ACPM_DOMAIN, TPU_ACTIVE_OD); if (ret) { - dev_err(cooling->dev, "error setting tpu policy: %d\n", ret); + dev_err(cooling->dev, "error setting tpu policy: %d\n", + ret); mutex_unlock(&cooling->lock); return ret; } @@ -139,13 +134,16 @@ static int edgetpu_get_cur_state(struct thermal_cooling_device *cdev, static int edgetpu_state2power_internal(unsigned long state, u32 *power, struct device *dev) { - int i = 0; + int i; - find_state_pwr(i, state, state_pwr_map[i].state, state_pwr_map, *power, - state_pwr_map[i].power); + for (i = 0; i < ARRAY_SIZE(state_pwr_map); i++) { + if (state == state_pwr_map[i].state) { + *power = state_pwr_map[i].power; + return 0; + } + } dev_err(dev, "Unknown state req for: %lu\n", state); *power = 0; - WARN_ON(1); return -EINVAL; } @@ -192,7 +190,6 @@ static int edgetpu_power2state(struct thermal_cooling_device *cdev, } dev_err(cooling->dev, "No power2state mapping found: %d\n", power); - WARN_ON(1); return -EINVAL; } @@ -207,7 +204,8 @@ static struct thermal_cooling_device_ops edgetpu_cooling_ops = { static void tpu_thermal_exit_cooling(struct edgetpu_thermal *thermal) { - thermal_cooling_device_unregister(thermal->cdev); + if (!IS_ERR_OR_NULL(thermal->cdev)) + thermal_cooling_device_unregister(thermal->cdev); } static void tpu_thermal_exit(struct edgetpu_thermal *thermal) @@ -246,10 +244,14 @@ tpu_thermal_cooling_register(struct edgetpu_thermal *thermal, char *type) static int tpu_thermal_init(struct edgetpu_thermal *thermal, struct device *dev) { int err; + struct dentry *d; + d = debugfs_create_dir("cooling", edgetpu_fs_debugfs_dir()); + /* don't let debugfs creation failure abort the init procedure */ + if (!d) + dev_warn(dev, "failed to create debug fs for cooling"); thermal->dev = dev; - thermal->cooling_root = - debugfs_create_dir("cooling", edgetpu_fs_debugfs_dir()); + thermal->cooling_root = d; err = tpu_thermal_cooling_register(thermal, EDGETPU_COOLING_NAME); if (err) { diff --git a/drivers/edgetpu/abrolhos-usage-stats.c b/drivers/edgetpu/abrolhos-usage-stats.c new file mode 100644 index 0000000..1fd1fc2 --- /dev/null +++ b/drivers/edgetpu/abrolhos-usage-stats.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "edgetpu-usage-stats.c" diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c index 872c5f2..9735b28 100644 --- a/drivers/edgetpu/edgetpu-core.c +++ b/drivers/edgetpu/edgetpu-core.c @@ -30,6 +30,7 @@ #include "edgetpu-mcp.h" #include "edgetpu-mmu.h" #include "edgetpu-telemetry.h" +#include "edgetpu-usage-stats.h" #include "edgetpu.h" static atomic_t single_dev_count = ATOMIC_INIT(-1); @@ -219,6 +220,8 @@ int edgetpu_device_add(struct edgetpu_dev *etdev, mutex_init(&etdev->open.lock); mutex_init(&etdev->groups_lock); + INIT_LIST_HEAD(&etdev->groups); + etdev->n_groups = 0; etdev->group_join_lockout = false; mutex_init(&etdev->state_lock); etdev->state = ETDEV_STATE_NOFW; @@ -241,6 +244,8 @@ int edgetpu_device_add(struct edgetpu_dev *etdev, } edgetpu_setup_mmu(etdev); + edgetpu_usage_stats_init(etdev); + etdev->kci = devm_kzalloc(etdev->dev, sizeof(*etdev->kci), GFP_KERNEL); if (!etdev->kci) { ret = -ENOMEM; @@ -279,6 +284,7 @@ remove_kci: /* releases the resources of KCI */ edgetpu_mailbox_remove_all(etdev->mailbox_manager); detach_mmu: + edgetpu_usage_stats_exit(etdev); edgetpu_mmu_detach(etdev); remove_dev: edgetpu_mark_probe_fail(etdev); @@ -291,6 +297,7 @@ void edgetpu_device_remove(struct edgetpu_dev *etdev) edgetpu_chip_exit(etdev); edgetpu_debug_dump_exit(etdev); edgetpu_mailbox_remove_all(etdev->mailbox_manager); + edgetpu_usage_stats_exit(etdev); edgetpu_mmu_detach(etdev); edgetpu_fs_remove(etdev); } diff --git a/drivers/edgetpu/edgetpu-debug-dump.c b/drivers/edgetpu/edgetpu-debug-dump.c index 6f83645..d8ccde9 100644 --- a/drivers/edgetpu/edgetpu-debug-dump.c +++ b/drivers/edgetpu/edgetpu-debug-dump.c @@ -5,6 +5,8 @@ * * Copyright (C) 2020 Google, Inc. */ +#include <linux/workqueue.h> + #include "edgetpu-config.h" #include "edgetpu-debug-dump.h" #include "edgetpu-iremap-pool.h" @@ -16,51 +18,6 @@ static inline u64 word_align_offset(u64 offset) (((offset % sizeof(u64)) == 0) ? 0 : 1); } -int edgetpu_debug_dump_init(struct edgetpu_dev *etdev) -{ -#ifdef CONFIG_ABROLHOS - size_t size; - int ret; - struct edgetpu_debug_dump_setup *dump_setup; - - size = EDGETPU_DEBUG_DUMP_MEM_SIZE; - - /* - * Allocate buffers for various dump segments and map them to FW - * accessible regions - */ - ret = edgetpu_iremap_alloc(etdev, size, &etdev->debug_dump_mem, - EDGETPU_CONTEXT_KCI); - if (ret) { - etdev_err(etdev, "Debug dump seg alloc failed"); - etdev->debug_dump_mem.vaddr = NULL; - return ret; - } - dump_setup = - (struct edgetpu_debug_dump_setup *)etdev->debug_dump_mem.vaddr; - dump_setup->dump_mem_size = size; - memset(dump_setup, 0, dump_setup->dump_mem_size); - return ret; -#else - return 0; -#endif /* CONFIG_ABROLHOS */ -} - -void edgetpu_debug_dump_exit(struct edgetpu_dev *etdev) -{ -#ifdef CONFIG_ABROLHOS - if (!etdev->debug_dump_mem.vaddr) { - etdev_dbg(etdev, "Debug dump not allocated"); - return; - } - /* - * Free the memory assigned for debug dump - */ - edgetpu_iremap_free(etdev, &etdev->debug_dump_mem, - EDGETPU_CONTEXT_KCI); -#endif /* CONFIG_ABROLHOS */ -} - int edgetpu_get_debug_dump(struct edgetpu_dev *etdev, u64 type) { int ret; @@ -86,6 +43,47 @@ int edgetpu_get_debug_dump(struct edgetpu_dev *etdev, u64 type) return ret; } +static void edgetpu_debug_dump_work(struct work_struct *work) +{ + struct edgetpu_dev *etdev; + struct edgetpu_debug_dump_setup *dump_setup; + struct edgetpu_debug_dump *debug_dump; + int ret; + u64 offset, dump_reason; + + etdev = container_of(work, struct edgetpu_dev, debug_dump_work); + dump_setup = + (struct edgetpu_debug_dump_setup *)etdev->debug_dump_mem.vaddr; + offset = sizeof(struct edgetpu_debug_dump_setup); + debug_dump = (struct edgetpu_debug_dump *)((u64 *)dump_setup + + word_align_offset(offset)); + + if (!etdev->debug_dump_handlers) { + etdev_err(etdev, + "Failed to generate coredump as handler is NULL"); + goto debug_dump_work_done; + } + + dump_reason = dump_setup->dump_req_reason; + if (dump_reason >= DUMP_REQ_REASON_NUM || + !etdev->debug_dump_handlers[dump_reason]) { + etdev_err(etdev, + "Failed to generate coredump as handler is NULL for dump request reason: 0x%llx", + dump_reason); + goto debug_dump_work_done; + } + + ret = etdev->debug_dump_handlers[dump_reason] + ((void *)etdev, (void *)dump_setup); + if (ret) { + etdev_err(etdev, "Failed to generate coredump: %d\n", ret); + goto debug_dump_work_done; + } + +debug_dump_work_done: + debug_dump->host_dump_available_to_read = false; +} + void edgetpu_debug_dump_resp_handler(struct edgetpu_dev *etdev) { struct edgetpu_debug_dump_setup *dump_setup; @@ -104,9 +102,8 @@ void edgetpu_debug_dump_resp_handler(struct edgetpu_dev *etdev) if (!debug_dump->host_dump_available_to_read) return; - /* - * TODO (b/156049774): Dump segments may be collected here and exposed - * to SSCD. - */ - debug_dump->host_dump_available_to_read = false; + if (!etdev->debug_dump_work.func) + INIT_WORK(&etdev->debug_dump_work, edgetpu_debug_dump_work); + + schedule_work(&etdev->debug_dump_work); } diff --git a/drivers/edgetpu/edgetpu-debug-dump.h b/drivers/edgetpu/edgetpu-debug-dump.h index 7313021..ec33668 100644 --- a/drivers/edgetpu/edgetpu-debug-dump.h +++ b/drivers/edgetpu/edgetpu-debug-dump.h @@ -10,7 +10,7 @@ #include "edgetpu-internal.h" -#define DEBUG_DUMP_HOST_CONTRACT_VERSION 1 +#define DEBUG_DUMP_HOST_CONTRACT_VERSION 2 enum edgetpu_dump_type_bit_position { DUMP_TYPE_CRASH_REASON_BIT = 0, @@ -24,6 +24,13 @@ enum edgetpu_dump_type_bit_position { }; +enum edgetpu_dump_request_reason { + DUMP_REQ_REASON_DEFAULT = 0, + DUMP_REQ_REASON_WDT_TIMEOUT = 1, + DUMP_REQ_REASON_BY_USER = 2, + DUMP_REQ_REASON_NUM = 3 +}; + struct edgetpu_crash_reason { u64 code; /* code that captures the reset reason */ }; @@ -57,6 +64,7 @@ struct edgetpu_debug_dump_setup { /* types of dumps requested by host */ u64 type; u64 dump_mem_size; /* total size of memory allocated to dump */ + u64 dump_req_reason; /* debug dump request reason */ u64 reserved[2]; }; diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c index f9d681e..f323a3c 100644 --- a/drivers/edgetpu/edgetpu-device-group.c +++ b/drivers/edgetpu/edgetpu-device-group.c @@ -12,6 +12,7 @@ #include <linux/eventfd.h> #include <linux/iommu.h> #include <linux/kconfig.h> +#include <linux/list.h> #include <linux/mm.h> #include <linux/refcount.h> #include <linux/scatterlist.h> @@ -32,6 +33,7 @@ #include "edgetpu-sw-watchdog.h" #include "edgetpu-usr.h" #include "edgetpu.h" +#include "mm-backport.h" #ifdef EDGETPU_HAS_P2P_MAILBOX #include "edgetpu-p2p-mailbox.h" @@ -83,6 +85,7 @@ static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param) struct edgetpu_dev *etdev = edgetpu_device_group_nth_etdev(group, i); etdev_dbg(etdev, "%s: leave group %u", __func__, group->workload_id); + edgetpu_kci_update_usage(etdev); edgetpu_kci_leave_group(etdev->kci); return 0; } @@ -126,13 +129,20 @@ static void edgetpu_group_kci_close_device(struct edgetpu_device_group *group) } /* - * Asynchronously sends LEAVE_GROUP KCI to all devices in @group. + * Handle KCI chores for device group disband. + * + * For multi-chip architectures: asynchronously send LEAVE_GROUP KCI to all + * devices in @group (and GET_USAGE to update usage stats). + * + * For single-chip, multiple client architectures: send KCI CLOSE_DEVICE + * to the device (and GET_USAGE to update usage stats). * * Caller holds group->lock. */ static void edgetpu_device_group_kci_leave(struct edgetpu_device_group *group) { #if IS_ENABLED(CONFIG_ABROLHOS) + edgetpu_kci_update_usage(group->etdev); return edgetpu_group_kci_close_device(group); #else /* !CONFIG_ABROLHOS */ struct kci_worker_param *params = @@ -417,34 +427,30 @@ static bool edgetpu_group_check_contiguity(struct edgetpu_device_group *group) } /* - * Finds an empty slot of @etdev->groups and assigns @group to it. + * Inserts @group to the list @etdev->groups. * - * Returns the non-negative index of etdev->groups on success. - * Returns -EBUSY if no empty slot found. + * Returns 0 on success. + * Returns -EAGAIN if group join is currently disabled. */ static int edgetpu_dev_add_group(struct edgetpu_dev *etdev, struct edgetpu_device_group *group) { - int i; + struct edgetpu_list_group *l = kmalloc(sizeof(*l), GFP_KERNEL); + if (!l) + return -ENOMEM; mutex_lock(&etdev->groups_lock); if (etdev->group_join_lockout) { mutex_unlock(&etdev->groups_lock); + kfree(l); return -EAGAIN; } - for (i = 0; i < EDGETPU_NGROUPS; i++) { - if (!etdev->groups[i]) - break; - } + l->grp = edgetpu_device_group_get(group); + list_add_tail(&l->list, &etdev->groups); + etdev->n_groups++; - if (i >= EDGETPU_NGROUPS) { - mutex_unlock(&etdev->groups_lock); - return -EBUSY; - } - etdev->groups[i] = edgetpu_device_group_get(group); mutex_unlock(&etdev->groups_lock); - - return i; + return 0; } void edgetpu_device_group_put(struct edgetpu_device_group *group) @@ -458,23 +464,16 @@ void edgetpu_device_group_put(struct edgetpu_device_group *group) /* caller must hold @etdev->groups_lock. */ static bool edgetpu_in_any_group_locked(struct edgetpu_dev *etdev) { - int i; - - for (i = 0; i < EDGETPU_NGROUPS; i++) { - if (etdev->groups[i]) - return true; - } - - return false; + return etdev->n_groups; } /* caller must hold the client's etdev state_lock. */ void edgetpu_device_group_leave_locked(struct edgetpu_client *client) { struct edgetpu_device_group *group; + struct edgetpu_list_group *l; struct edgetpu_list_client *cur, *nxt; bool will_disband = false; - int i; mutex_lock(&client->group_lock); group = client->group; @@ -530,10 +529,12 @@ void edgetpu_device_group_leave_locked(struct edgetpu_client *client) mutex_unlock(&client->group_lock); /* remove the group from the client device */ mutex_lock(&client->etdev->groups_lock); - for (i = 0; i < EDGETPU_NGROUPS; i++) { - if (client->etdev->groups[i] == group) { - edgetpu_device_group_put(client->etdev->groups[i]); - client->etdev->groups[i] = NULL; + list_for_each_entry(l, &client->etdev->groups, list) { + if (l->grp == group) { + list_del(&l->list); + edgetpu_device_group_put(l->grp); + kfree(l); + client->etdev->n_groups--; break; } } @@ -553,7 +554,6 @@ static int edgetpu_device_group_add_locked(struct edgetpu_device_group *group, struct edgetpu_client *client) { struct edgetpu_list_client *c; - int i; int ret = 0; mutex_lock(&client->group_lock); @@ -581,10 +581,9 @@ static int edgetpu_device_group_add_locked(struct edgetpu_device_group *group, goto out; } - i = edgetpu_dev_add_group(client->etdev, group); - if (i < 0) { + ret = edgetpu_dev_add_group(client->etdev, group); + if (ret) { kfree(c); - ret = i; goto out; } @@ -1447,14 +1446,14 @@ out: void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev) { - int i; + struct edgetpu_list_group *l; + struct edgetpu_device_group *group; mutex_lock(&etdev->groups_lock); - for (i = 0; i < EDGETPU_NGROUPS; i++) { - if (etdev->groups[i]) - edgetpu_group_notify(etdev->groups[i], - EDGETPU_EVENT_FATAL_ERROR); - } + + etdev_for_each_group(etdev, l, group) + edgetpu_group_notify(group, EDGETPU_EVENT_FATAL_ERROR); + mutex_unlock(&etdev->groups_lock); } diff --git a/drivers/edgetpu/edgetpu-device-group.h b/drivers/edgetpu/edgetpu-device-group.h index 3c68dd4..87a0987 100644 --- a/drivers/edgetpu/edgetpu-device-group.h +++ b/drivers/edgetpu/edgetpu-device-group.h @@ -106,6 +106,23 @@ struct edgetpu_device_group { }; /* + * Entry of edgetpu_dev#groups. + * + * Files other than edgetpu-device-group.c shouldn't need to access this + * structure. Use macro etdev_for_each_group to access the groups under an + * etdev. + */ +struct edgetpu_list_group { + struct list_head list; + struct edgetpu_device_group *grp; +}; + +/* Macro to loop through etdev->groups. */ +#define etdev_for_each_group(etdev, l, g) \ + for (l = list_entry(etdev->groups.next, typeof(*l), list), g = l->grp; \ + &l->list != &etdev->groups; \ + l = list_entry(l->list.next, typeof(*l), list), g = l->grp) +/* * Returns if the group is waiting for members to join. * * Must be called with lock held. diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c index ce0cd50..6edff9e 100644 --- a/drivers/edgetpu/edgetpu-firmware.c +++ b/drivers/edgetpu/edgetpu-firmware.c @@ -595,43 +595,41 @@ static const struct attribute_group edgetpu_firmware_attr_group = { .attrs = dev_attrs, }; -static void edgetpu_firmware_wdt_timeout_action(void *data) +/* + * Can only be called with etdev->state == ETDEV_STATE_FWLOADING. + */ +static void edgetpu_abort_clients(struct edgetpu_dev *etdev) { - int ret, i, num_clients = 0; - struct edgetpu_dev *etdev = data; + int i, num_clients = 0; struct edgetpu_device_group *group; - struct edgetpu_client *clients[EDGETPU_NGROUPS]; + struct edgetpu_list_group *g; + struct edgetpu_client **clients; struct edgetpu_list_client *c; - struct edgetpu_firmware *et_fw = etdev->firmware; - - /* Don't attempt f/w restart if device is off. */ - if (!edgetpu_is_powered(etdev)) - return; - - mutex_lock(&etdev->state_lock); - if (etdev->state == ETDEV_STATE_FWLOADING) { - mutex_unlock(&etdev->state_lock); - return; - } - etdev->state = ETDEV_STATE_FWLOADING; - mutex_unlock(&etdev->state_lock); /* * We don't hold etdev->groups_lock here because - * 1. All group operations should be protected by "state GOOD" and + * 1. All group operations (functions in edgetpu-device-group.c) + * are skipped when "etdev->state is not GOOD", we shall be the + * only one accessing @etdev->groups, and * 2. to prevent LOCKDEP from reporting deadlock with * edgetpu_device_group_add_locked, which nested holds group->lock * then etdev->groups_lock. */ - for (i = 0; i < EDGETPU_NGROUPS; i++) { - group = etdev->groups[i]; - if (!group) - continue; + clients = kmalloc_array(etdev->n_groups, sizeof(*clients), GFP_KERNEL); + if (!clients) { + /* + * Just give up aborting clients in this case, this should never + * happen after all. + */ + edgetpu_fatal_error_notify(etdev); + return; + } + etdev_for_each_group(etdev, g, group) { mutex_lock(&group->lock); list_for_each_entry(c, &group->clients, list) { if (etdev == c->client->etdev) { clients[num_clients++] = - edgetpu_client_get(c->client); + edgetpu_client_get(c->client); break; } } @@ -646,6 +644,28 @@ static void edgetpu_firmware_wdt_timeout_action(void *data) edgetpu_device_group_leave_locked(clients[i]); edgetpu_client_put(clients[i]); } + kfree(clients); +} + +static void edgetpu_firmware_wdt_timeout_action(void *data) +{ + int ret; + struct edgetpu_dev *etdev = data; + struct edgetpu_firmware *et_fw = etdev->firmware; + + /* Don't attempt f/w restart if device is off. */ + if (!edgetpu_is_powered(etdev)) + return; + + mutex_lock(&etdev->state_lock); + if (etdev->state == ETDEV_STATE_FWLOADING) { + mutex_unlock(&etdev->state_lock); + return; + } + etdev->state = ETDEV_STATE_FWLOADING; + mutex_unlock(&etdev->state_lock); + + edgetpu_abort_clients(etdev); ret = edgetpu_firmware_lock(etdev); /* diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c index 998704c..047b713 100644 --- a/drivers/edgetpu/edgetpu-fs.c +++ b/drivers/edgetpu/edgetpu-fs.c @@ -880,17 +880,13 @@ static const struct file_operations statusregs_ops = { static int mappings_show(struct seq_file *s, void *data) { struct edgetpu_dev *etdev = s->private; - int i; + struct edgetpu_list_group *l; + struct edgetpu_device_group *group; mutex_lock(&etdev->groups_lock); - for (i = 0; i < EDGETPU_NGROUPS; i++) { - struct edgetpu_device_group *group = etdev->groups[i]; - - if (!group) - continue; + etdev_for_each_group(etdev, l, group) edgetpu_group_mappings_show(group, s); - } mutex_unlock(&etdev->groups_lock); edgetpu_kci_mappings_show(etdev, s); diff --git a/drivers/edgetpu/edgetpu-internal.h b/drivers/edgetpu/edgetpu-internal.h index 34da92d..237fdd2 100644 --- a/drivers/edgetpu/edgetpu-internal.h +++ b/drivers/edgetpu/edgetpu-internal.h @@ -28,10 +28,12 @@ #include <linux/refcount.h> #include <linux/scatterlist.h> #include <linux/types.h> +#include <linux/workqueue.h> #include "edgetpu.h" #include "edgetpu-pm.h" #include "edgetpu-thermal.h" +#include "edgetpu-usage-stats.h" #define etdev_err(etdev, fmt, ...) dev_err((etdev)->etcdev, fmt, ##__VA_ARGS__) #define etdev_warn(etdev, fmt, ...) \ @@ -135,6 +137,8 @@ struct edgetpu_kci; struct edgetpu_telemetry_ctx; struct edgetpu_mempool; +typedef int(*edgetpu_debug_dump_handlers)(void *etdev, void *dump_setup); + #define EDGETPU_DEVICE_NAME_MAX 64 /* ioremapped resource */ @@ -168,8 +172,9 @@ struct edgetpu_dev { struct dentry *d_entry; /* debugfs dir for this device */ struct mutex state_lock; /* protects state of this device */ enum edgetpu_dev_state state; - struct mutex groups_lock; /* protects groups and lockout */ - struct edgetpu_device_group *groups[EDGETPU_NGROUPS]; + struct mutex groups_lock; /* protects groups, n_groups, and lockout */ + struct list_head groups; + uint n_groups; /* number of entries in @groups */ bool group_join_lockout; /* disable group join while reinit */ void *mmu_cookie; /* mmu driver private data */ void *dram_cookie; /* on-device DRAM private data */ @@ -178,6 +183,7 @@ struct edgetpu_dev { struct edgetpu_firmware *firmware; /* firmware management */ struct edgetpu_telemetry_ctx *telemetry; struct edgetpu_thermal *thermal; + struct edgetpu_usage_stats *usage_stats; /* usage stats private data */ struct edgetpu_pm *pm; /* Power management interface */ /* Memory pool in instruction remap region */ struct edgetpu_mempool *iremap_pool; @@ -185,10 +191,14 @@ struct edgetpu_dev { uint mcp_die_index; /* physical die index w/in multichip pkg */ u8 mcp_pkg_type; /* multichip pkg type */ struct edgetpu_sw_wdt *etdev_sw_wdt; /* software watchdog */ + bool reset_needed; /* error recovery requests full chip reset. */ /* version read from the firmware binary file */ struct edgetpu_fw_version fw_version; atomic_t job_count; /* times joined to a device group */ struct edgetpu_coherent_mem debug_dump_mem; /* debug dump memory */ + /* debug dump handlers */ + edgetpu_debug_dump_handlers *debug_dump_handlers; + struct work_struct debug_dump_work; }; extern const struct file_operations edgetpu_fops; diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c index 0a42ce6..609d411 100644 --- a/drivers/edgetpu/edgetpu-kci.c +++ b/drivers/edgetpu/edgetpu-kci.c @@ -19,6 +19,7 @@ #include "edgetpu-iremap-pool.h" #include "edgetpu-mmu.h" #include "edgetpu-telemetry.h" +#include "edgetpu-usage-stats.h" /* the index of mailbox for kernel should always be zero */ #define KERNEL_MAILBOX_INDEX 0 @@ -31,7 +32,7 @@ /* Set extra ludicrously high to 60 seconds for (slow) Palladium emulation. */ #define KCI_TIMEOUT (60000) #else -/* 5 secs. TODO(134408592): Define a timeout for TPU CPU responses */ +/* 5 secs. */ #define KCI_TIMEOUT (5000) #endif @@ -730,6 +731,44 @@ enum edgetpu_fw_flavor edgetpu_kci_fw_info( return flavor; } +void edgetpu_kci_update_usage(struct edgetpu_dev *etdev) +{ +#define EDGETPU_USAGE_BUFFER_SIZE 4096 + struct edgetpu_command_element cmd = { + .code = KCI_CODE_GET_USAGE, + .dma = { + .address = 0, + .size = 0, + }, + }; + struct edgetpu_coherent_mem mem; + struct edgetpu_kci_response_element resp; + int ret; + + ret = edgetpu_iremap_alloc(etdev, EDGETPU_USAGE_BUFFER_SIZE, &mem, + EDGETPU_CONTEXT_KCI); + + if (ret) { + etdev_warn_once(etdev, "%s: failed to allocate usage buffer", + __func__); + return; + } + + cmd.dma.address = mem.tpu_addr; + cmd.dma.size = EDGETPU_USAGE_BUFFER_SIZE; + memset(mem.vaddr, 0, sizeof(struct usage_tracker_header)); + ret = edgetpu_kci_send_cmd_return_resp(etdev->kci, &cmd, &resp); + + if (ret == KCI_ERROR_UNIMPLEMENTED || ret == KCI_ERROR_UNAVAILABLE) + etdev_dbg(etdev, "firmware does not report usage\n"); + else if (ret == KCI_ERROR_OK) + edgetpu_usage_stats_process_buffer(etdev, mem.vaddr); + else + etdev_warn_once(etdev, "%s: error %d", __func__, ret); + + edgetpu_iremap_free(etdev, &mem, EDGETPU_CONTEXT_KCI); +} + /* debugfs mappings dump */ void edgetpu_kci_mappings_show(struct edgetpu_dev *etdev, struct seq_file *s) { diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h index fe11a3b..aa77d9a 100644 --- a/drivers/edgetpu/edgetpu-kci.h +++ b/drivers/edgetpu/edgetpu-kci.h @@ -95,6 +95,7 @@ enum edgetpu_kci_code { KCI_CODE_OPEN_DEVICE = 9, KCI_CODE_CLOSE_DEVICE = 10, KCI_CODE_FIRMWARE_INFO = 11, + KCI_CODE_GET_USAGE = 12, }; /* @@ -244,6 +245,9 @@ int edgetpu_kci_ack(struct edgetpu_kci *kci); enum edgetpu_fw_flavor edgetpu_kci_fw_info( struct edgetpu_kci *kci, struct edgetpu_fw_info *fw_info); +/* Retrieve usage tracking data from firmware, update info on host. */ +void edgetpu_kci_update_usage(struct edgetpu_dev *etdev); + /* * Sends the "Map Log Buffer" command and waits for remote response. * diff --git a/drivers/edgetpu/edgetpu-mailbox.c b/drivers/edgetpu/edgetpu-mailbox.c index f2fb859..90b3ed8 100644 --- a/drivers/edgetpu/edgetpu-mailbox.c +++ b/drivers/edgetpu/edgetpu-mailbox.c @@ -710,14 +710,13 @@ void edgetpu_mailbox_reinit_vii(struct edgetpu_device_group *group) void edgetpu_mailbox_restore_active_vii_queues(struct edgetpu_dev *etdev) { - int i; + struct edgetpu_list_group *l; struct edgetpu_device_group *group; u32 mailbox_ids = 0; mutex_lock(&etdev->groups_lock); - for (i = 0; i < EDGETPU_NGROUPS; i++) { - group = etdev->groups[i]; - if (group && !edgetpu_group_mailbox_detached_locked(group)) { + etdev_for_each_group(etdev, l, group) { + if (!edgetpu_group_mailbox_detached_locked(group)) { edgetpu_mailbox_reinit_vii(group); if (edgetpu_device_group_is_finalized(group)) mailbox_ids |= diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c index 8905cf9..b700c19 100644 --- a/drivers/edgetpu/edgetpu-pm.c +++ b/drivers/edgetpu/edgetpu-pm.c @@ -201,11 +201,11 @@ static int pchannel_state_change_request(struct edgetpu_dev *etdev, int state) if (val & PDENY) { edgetpu_dev_write_32(etdev, EDGETPU_REG_POWER_CONTROL, val & !state); - etdev_err(etdev, "p-channel state change request denied\n"); + etdev_dbg(etdev, "p-channel state change request denied\n"); deny = true; } if (ret) { - etdev_err(etdev, "p-channel state change request timeout\n"); + etdev_dbg(etdev, "p-channel state change request timeout\n"); return ret; } /* Phase 4. Drive PREQ to 0 */ @@ -226,7 +226,8 @@ int edgetpu_pchannel_power_down(struct edgetpu_dev *etdev, bool wait_on_pactive) edgetpu_sw_wdt_stop(etdev); ret = edgetpu_kci_shutdown(etdev->kci); if (ret) { - etdev_err(etdev, "request power down routing failed\n"); + etdev_err(etdev, "p-channel power down routing failed: %d", + ret); return ret; } if (wait_on_pactive) { @@ -241,6 +242,10 @@ int edgetpu_pchannel_power_down(struct edgetpu_dev *etdev, bool wait_on_pactive) tries--; } while (ret && tries); + if (ret) + etdev_err(etdev, "p-channel shutdown state change failed: %d", + ret); + return ret; } diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.c b/drivers/edgetpu/edgetpu-sw-watchdog.c index 397c021..2db2b47 100644 --- a/drivers/edgetpu/edgetpu-sw-watchdog.c +++ b/drivers/edgetpu/edgetpu-sw-watchdog.c @@ -25,6 +25,23 @@ static void sw_wdt_handler_work(struct work_struct *work) et_action_work->edgetpu_sw_wdt_handler(et_action_work->data); } +void edgetpu_watchdog_bite(struct edgetpu_dev *etdev, bool reset) +{ + if (!etdev->etdev_sw_wdt) + return; + /* + * Stop sw wdog delayed worker, to reduce chance this explicit call + * races with a sw wdog timeout. May be in IRQ context, no sync, + * worker may already be active. If we race with a sw wdog restart + * and need a chip reset, hopefully the P-channel reset will fail + * and the bigger hammer chip reset will kick in at that point. + */ + cancel_delayed_work(&etdev->etdev_sw_wdt->dwork); + etdev_err(etdev, "watchdog %s", reset ? "reset" : "restart"); + etdev->reset_needed = reset; + schedule_work(&etdev->etdev_sw_wdt->et_action_work.work); +} + /* * Ping the f/w for a response. Reschedule the work for next beat * in case of response or schedule a worker for action callback in case of diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.h b/drivers/edgetpu/edgetpu-sw-watchdog.h index c278912..7b214b2 100644 --- a/drivers/edgetpu/edgetpu-sw-watchdog.h +++ b/drivers/edgetpu/edgetpu-sw-watchdog.h @@ -48,4 +48,11 @@ void edgetpu_sw_wdt_set_handler(struct edgetpu_dev *etdev, void edgetpu_sw_wdt_modify_heartbeat(struct edgetpu_dev *etdev, unsigned long hrtbeat_ms); +/* + * Schedule sw watchdog action immediately. Called on fatal errors. + * @reset: true if error recovery requires a full chip reset, not just + * firmware restart. + */ +void edgetpu_watchdog_bite(struct edgetpu_dev *etdev, bool reset); + #endif /* __EDGETPU_SW_WDT_H__ */ diff --git a/drivers/edgetpu/edgetpu-thermal.h b/drivers/edgetpu/edgetpu-thermal.h index c9d38bc..4e97f07 100644 --- a/drivers/edgetpu/edgetpu-thermal.h +++ b/drivers/edgetpu/edgetpu-thermal.h @@ -1,10 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Edge TPU thermal driver header. + * EdgeTPU thermal driver header. * * Copyright (C) 2020 Google, Inc. */ - #ifndef __EDGETPU_THERMAL_H__ #define __EDGETPU_THERMAL_H__ diff --git a/drivers/edgetpu/edgetpu-usage-stats.c b/drivers/edgetpu/edgetpu-usage-stats.c new file mode 100644 index 0000000..b7b309c --- /dev/null +++ b/drivers/edgetpu/edgetpu-usage-stats.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * EdgeTPU usage stats + * + * Copyright (C) 2020 Google, Inc. + */ + +#include <linux/slab.h> +#include <linux/sysfs.h> + +#include "edgetpu-internal.h" +#include "edgetpu-usage-stats.h" + +#if IS_ENABLED(CONFIG_ABROLHOS) + +#include "abrolhos-pm.h" + +static enum tpu_pwr_state tpu_states_arr[] = { + TPU_ACTIVE_SUD, + TPU_ACTIVE_UD, + TPU_ACTIVE_NOM, + TPU_ACTIVE_OD, +}; + +#else /* !CONFIG_ABROLHOS */ + +/* All execution times will be added to the same state. */ +static uint32_t tpu_states_arr[] = { + 0, +}; + +#endif /* CONFIG_ABROLHOS */ + +#define NUM_TPU_STATES ARRAY_SIZE(tpu_states_arr) + +struct uid_entry { + int32_t uid; + uint64_t time_in_state[NUM_TPU_STATES]; + struct hlist_node node; +}; + +static int tpu_state_map(uint32_t state) +{ + int i; + + for (i = (NUM_TPU_STATES - 1); i >= 0; i--) { + if (state >= tpu_states_arr[i]) + return i; + } + + return 0; +} + +/* Caller must hold usage_stats lock */ +static struct uid_entry * +find_uid_entry_locked(int32_t uid, struct edgetpu_usage_stats *ustats) +{ + struct uid_entry *uid_entry; + + hash_for_each_possible(ustats->uid_hash_table, uid_entry, node, uid) { + if (uid_entry->uid == uid) + return uid_entry; + } + + return NULL; +} + +int edgetpu_usage_stats_add(int32_t uid, uint32_t state, uint32_t duration, + struct edgetpu_dev *etdev) +{ + struct edgetpu_usage_stats *ustats = etdev->usage_stats; + struct uid_entry *uid_entry; + + if (!ustats) + return 0; + + etdev_dbg(etdev, "%s: uid=%u state=%u dur=%u", __func__, uid, state, + duration); + mutex_lock(&ustats->usage_stats_lock); + + /* Find the uid in uid_hash_table first */ + uid_entry = find_uid_entry_locked(uid, ustats); + if (uid_entry) { + uid_entry->time_in_state[tpu_state_map(state)] += duration; + mutex_unlock(&ustats->usage_stats_lock); + return 0; + } + + /* Allocate memory for this uid */ + uid_entry = kzalloc(sizeof(*uid_entry), GFP_KERNEL); + if (!uid_entry) { + mutex_unlock(&ustats->usage_stats_lock); + return -ENOMEM; + } + + uid_entry->uid = uid; + uid_entry->time_in_state[tpu_state_map(state)] += duration; + + /* Add uid_entry to the uid_hash_table */ + hash_add(ustats->uid_hash_table, &uid_entry->node, uid); + + mutex_unlock(&ustats->usage_stats_lock); + + return 0; +} + +void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf) +{ + struct usage_tracker_header *header = buf; + struct usage_tracker_metric *metric = + (struct usage_tracker_metric *)(header + 1); + int i; + + etdev_dbg(etdev, "%s: n=%u sz=%u", __func__, + header->num_metrics, header->metric_size); + if (header->metric_size != sizeof(struct usage_tracker_metric)) { + etdev_dbg(etdev, "%s: expected sz=%zu, discard", __func__, + sizeof(struct usage_tracker_metric)); + return; + } + + for (i = 0; i < header->num_metrics; i++) { + switch (metric->type) { + case metric_type_tpu_usage: + { + struct tpu_usage *tpu_usage = + &metric->tpu_usage; + + edgetpu_usage_stats_add( + tpu_usage->uid, tpu_usage->power_state, + tpu_usage->duration_us, etdev); + } + break; + default: + etdev_dbg(etdev, "%s: %d: skip unknown type=%u", + __func__, i, metric->type); + break; + } + + metric++; + } +} + +static ssize_t usage_stats_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + struct edgetpu_usage_stats *ustats = etdev->usage_stats; + int i; + int ret = 0; + unsigned int bkt; + struct uid_entry *uid_entry; + + /* uid: TPU_ACTIVE_SUD TPU_ACTIVE_UD TPU_ACTIVE_NOM TPU_ACTIVE_OD */ + ret += scnprintf(buf, PAGE_SIZE, "uid:"); + + for (i = 0; i < NUM_TPU_STATES; i++) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, " %d", + tpu_states_arr[i]); + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + + mutex_lock(&ustats->usage_stats_lock); + + hash_for_each(ustats->uid_hash_table, bkt, uid_entry, node) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%d:", + uid_entry->uid); + + for (i = 0; i < NUM_TPU_STATES; i++) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, " %lld", + uid_entry->time_in_state[i]); + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + } + + mutex_unlock(&ustats->usage_stats_lock); + + return ret; +} + +static void usage_stats_remove_uids(struct edgetpu_usage_stats *ustats) +{ + unsigned int bkt; + struct uid_entry *uid_entry; + struct hlist_node *tmp; + + mutex_lock(&ustats->usage_stats_lock); + + hash_for_each_safe(ustats->uid_hash_table, bkt, tmp, uid_entry, node) { + hash_del(&uid_entry->node); + kfree(uid_entry); + } + + mutex_unlock(&ustats->usage_stats_lock); +} + +/* Write to clear all entries in uid_hash_table */ +static ssize_t usage_stats_clear(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + struct edgetpu_usage_stats *ustats = etdev->usage_stats; + + usage_stats_remove_uids(ustats); + + return count; +} + +static DEVICE_ATTR(usage_stats, 0644, usage_stats_show, usage_stats_clear); + +void edgetpu_usage_stats_init(struct edgetpu_dev *etdev) +{ + struct edgetpu_usage_stats *ustats; + int ret; + + ustats = devm_kzalloc(etdev->dev, sizeof(*etdev->usage_stats), + GFP_KERNEL); + if (!ustats) { + etdev_warn(etdev, + "failed to allocate memory for usage stats\n"); + return; + } + + hash_init(ustats->uid_hash_table); + mutex_init(&ustats->usage_stats_lock); + + etdev->usage_stats = ustats; + + ret = device_create_file(etdev->dev, &dev_attr_usage_stats); + if (ret) + etdev_warn(etdev, "failed to create the usage_stats file\n"); + + etdev_dbg(etdev, "%s init\n", __func__); +} + +void edgetpu_usage_stats_exit(struct edgetpu_dev *etdev) +{ + struct edgetpu_usage_stats *ustats = etdev->usage_stats; + + if (ustats) { + usage_stats_remove_uids(ustats); + device_remove_file(etdev->dev, &dev_attr_usage_stats); + } + + etdev_dbg(etdev, "%s exit\n", __func__); +} diff --git a/drivers/edgetpu/edgetpu-usage-stats.h b/drivers/edgetpu/edgetpu-usage-stats.h new file mode 100644 index 0000000..42d75df --- /dev/null +++ b/drivers/edgetpu/edgetpu-usage-stats.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * EdgeTPU usage stats header + * + * Copyright (C) 2020 Google, Inc. + */ +#ifndef __EDGETPU_USAGE_STATS_H__ +#define __EDGETPU_USAGE_STATS_H__ + +#include <linux/hashtable.h> +#include <linux/mutex.h> + +/* Header struct in the metric buffer. */ +/* Must be kept in sync with firmware struct UsageTrackerHeader */ +struct usage_tracker_header { + uint32_t num_metrics; /* Number of metrics being reported */ + uint32_t metric_size; /* Size of each metric struct */ +}; + +/* + * Encapsulate TPU core usage information of a specific application for a + * specific power state. + * Must be kept in sync with firmware struct TpuUsage. + */ +struct tpu_usage { + /* Unique identifier of the application. */ + int32_t uid; + /* The power state of the device (values are chip dependent) */ + uint32_t power_state; + /* Duration of usage in microseconds. */ + uint32_t duration_us; +}; + +/* Must be kept in sync with firmware enum class UsageTrackerMetric::Type */ +enum usage_tracker_metric_type { + metric_type_reserved = 0, + metric_type_tpu_usage = 1, +}; + +/* + * Encapsulates a single metric reported to the kernel. + * Must be kept in sync with firmware struct UsageTrackerMetric. + */ +struct usage_tracker_metric { + uint32_t type; + uint8_t reserved[4]; + union { + struct tpu_usage tpu_usage; + }; +}; + +#define UID_HASH_BITS 3 + +struct edgetpu_usage_stats { + DECLARE_HASHTABLE(uid_hash_table, UID_HASH_BITS); + struct mutex usage_stats_lock; +}; + +int edgetpu_usage_stats_add(int32_t uid, uint32_t state, uint32_t duration, + struct edgetpu_dev *etdev); +void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf); +void edgetpu_usage_stats_init(struct edgetpu_dev *etdev); +void edgetpu_usage_stats_exit(struct edgetpu_dev *etdev); + +#endif /* __EDGETPU_USAGE_STATS_H__ */ diff --git a/drivers/edgetpu/mm-backport.h b/drivers/edgetpu/mm-backport.h new file mode 100644 index 0000000..2e2f9a7 --- /dev/null +++ b/drivers/edgetpu/mm-backport.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Backport mm APIs. + * + * Copyright (C) 2021 Google, Inc. + */ +#ifndef __MM_BACKPORT_H__ +#define __MM_BACKPORT_H__ + +#include <linux/mm.h> + +/* + * Define pin_user_pages* which are introduced in Linux 5.6. + * + * We simply define pin_user_pages* as get_user_pages* here so our driver can + * prefer PIN over GET when possible. + */ +#ifndef FOLL_PIN + +/* define as zero to prevent older get_user_pages* returning EINVAL */ +#define FOLL_LONGTERM 0 + +#define pin_user_pages_fast get_user_pages_fast +#define unpin_user_page put_page + +#endif /* FOLL_PIN */ + +#endif /* __MM_BACKPORT_H__ */ |