From fc35ce43860e37450a734f05e8314b1665f052cf Mon Sep 17 00:00:00 2001 From: Whi copybara merger Date: Tue, 1 Feb 2022 20:49:26 -0800 Subject: [Copybara Auto Merge] Merge branch 'whitechapel' into android-gs-pixel-5.101 edgetpu: initialize telemetry buffer size Bug: 216696239 edgetpu: change err logging during fw dbg dump buffer init Bug: 216522017 edgetpu: Modify wdt ref count based on mailbox activation Bug: 213415021 edgetpu: fixup comment typos edgetpu: fix memory allocation in ext test Bug: 215071893 edgetpu: always reset CPU on power down Bug: 214251686 edgetpu: Send single kci for mboxes with the same VCID. Bug: 201503258 edgetpu: dump etdev info Bug: 214028088 edgetpu: dump client info Bug: 214028088 edgetpu: debug dump mappings with headers Bug: 214027183 edgetpu: dbg dump remove queues edgetpu: dbg dump groups info Bug: 214028088 edgetpu: dbg dump introduce sscd context Bug: 214028088 edgetpu: define kernel info structs for debug dump Bug: 214027183 Bug: 214028088 edgetpu: Set debug dump buffer in FW after handshake Bug: 176556808 edgetpu: move wakelock releasing to client put GitOrigin-RevId: f5d0f49ce2fdd8af1a59137eb6f2af9acf3529de Change-Id: I442991d28371f57b8da3dbfb96d25ef833db6935 --- drivers/edgetpu/abrolhos/config-mailbox.h | 2 +- drivers/edgetpu/edgetpu-core.c | 11 +- drivers/edgetpu/edgetpu-debug-dump.c | 26 +- drivers/edgetpu/edgetpu-debug-dump.h | 11 +- drivers/edgetpu/edgetpu-device-group.c | 41 ++- drivers/edgetpu/edgetpu-dump-info.h | 101 ++++++ drivers/edgetpu/edgetpu-firmware.c | 3 + drivers/edgetpu/edgetpu-fs.c | 4 +- drivers/edgetpu/edgetpu-kci.c | 15 +- drivers/edgetpu/edgetpu-kci.h | 24 +- drivers/edgetpu/edgetpu-mailbox.c | 70 ++-- drivers/edgetpu/edgetpu-mailbox.h | 17 + drivers/edgetpu/edgetpu-telemetry.c | 1 + drivers/edgetpu/edgetpu-telemetry.h | 3 +- drivers/edgetpu/mobile-debug-dump.c | 568 ++++++++++++++++++++---------- drivers/edgetpu/mobile-debug-dump.h | 10 +- drivers/edgetpu/mobile-pm.c | 4 + 17 files changed, 627 insertions(+), 284 deletions(-) create mode 100644 drivers/edgetpu/edgetpu-dump-info.h diff --git a/drivers/edgetpu/abrolhos/config-mailbox.h b/drivers/edgetpu/abrolhos/config-mailbox.h index b5a6c88..6e7956c 100644 --- a/drivers/edgetpu/abrolhos/config-mailbox.h +++ b/drivers/edgetpu/abrolhos/config-mailbox.h @@ -17,7 +17,7 @@ /* * The TZ mailbox is not managed by the kernel, but we still need to tell firmware to enable it, - * so it's index is placed after the kernel managed mailboxes. + * so its index is placed after the kernel managed mailboxes. */ #define EDGETPU_TZ_MAILBOX_ID 8 diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c index 9bb84ac..b84d626 100644 --- a/drivers/edgetpu/edgetpu-core.c +++ b/drivers/edgetpu/edgetpu-core.c @@ -558,8 +558,10 @@ void edgetpu_client_put(struct edgetpu_client *client) { if (!client) return; - if (refcount_dec_and_test(&client->count)) + if (refcount_dec_and_test(&client->count)) { + edgetpu_wakelock_free(client->wakelock); kfree(client); + } } void edgetpu_client_remove(struct edgetpu_client *client) @@ -594,13 +596,6 @@ void edgetpu_client_remove(struct edgetpu_client *client) edgetpu_device_group_leave(client); /* invoke chip-dependent removal handler before releasing resources */ edgetpu_chip_client_remove(client); - edgetpu_wakelock_free(client->wakelock); - /* - * It should be impossible to access client->wakelock after this cleanup - * procedure. Set to NULL to cause kernel panic if use-after-free does - * happen. - */ - client->wakelock = NULL; /* Clean up all the per die event fds registered by the client */ if (client->perdie_events & diff --git a/drivers/edgetpu/edgetpu-debug-dump.c b/drivers/edgetpu/edgetpu-debug-dump.c index ab5f9e3..86928a6 100644 --- a/drivers/edgetpu/edgetpu-debug-dump.c +++ b/drivers/edgetpu/edgetpu-debug-dump.c @@ -49,29 +49,33 @@ int edgetpu_get_debug_dump(struct edgetpu_dev *etdev, u64 type) { int ret; struct edgetpu_debug_dump_setup *dump_setup; + bool init_fw_dump_buffer = false; if (!etdev->debug_dump_mem.vaddr) { - etdev_err(etdev, "Debug dump not allocated"); + etdev_dbg(etdev, "Debug dump not allocated"); return -EINVAL; } - if (!edgetpu_pm_get_if_powered(etdev->pm)) { - etdev_warn(etdev, "Device not powered, skip debug dump"); - return -ENODEV; + if (type) { + dump_setup = + (struct edgetpu_debug_dump_setup *)etdev->debug_dump_mem.vaddr; + dump_setup->type = type; + } else { + init_fw_dump_buffer = true; } - dump_setup = - (struct edgetpu_debug_dump_setup *)etdev->debug_dump_mem.vaddr; - dump_setup->type = type; /* Signal the type of dump and buffer address to firmware */ ret = edgetpu_kci_get_debug_dump(etdev->kci, etdev->debug_dump_mem.tpu_addr, - etdev->debug_dump_mem.size); + etdev->debug_dump_mem.size, init_fw_dump_buffer); etdev_dbg(etdev, "Sent debug dump request, tpu addr: %llx", (u64)etdev->debug_dump_mem.tpu_addr); - if (ret) - etdev_err(etdev, "KCI dump info req failed: %d", ret); + if (ret) { + if (init_fw_dump_buffer) + etdev_err(etdev, "failed to init dump buffer in FW"); + + etdev_err(etdev, "Debug dump KCI req failed: %d", ret); + } - edgetpu_pm_put(etdev->pm); return ret; } diff --git a/drivers/edgetpu/edgetpu-debug-dump.h b/drivers/edgetpu/edgetpu-debug-dump.h index 6a30ef8..5291388 100644 --- a/drivers/edgetpu/edgetpu-debug-dump.h +++ b/drivers/edgetpu/edgetpu-debug-dump.h @@ -20,8 +20,12 @@ enum edgetpu_dump_type_bit_position { DUMP_TYPE_CPU_BIT = 4, DUMP_TYPE_CSRS_BIT = 5, - DUMP_TYPE_MAX_BIT = 63 + DUMP_TYPE_KERNEL_ETDEV_BIT = 32, + DUMP_TYPE_KERNEL_CLIENTS_BIT = 33, + DUMP_TYPE_KERNEL_GROUPS_BIT = 34, + DUMP_TYPE_KERNEL_MAPPINGS_BIT = 35, + DUMP_TYPE_MAX_BIT = 63 }; enum edgetpu_dump_reason { @@ -85,6 +89,11 @@ void edgetpu_debug_dump_exit(struct edgetpu_dev *etdev); /* * Send KCI request to get fw debug dump segments. + * + * This function can be called with @type set to 0 to simply set the dump buffer address and size + * in the FW without dumping any segments. + * + * The caller must ensure that the device is powered on. */ int edgetpu_get_debug_dump(struct edgetpu_dev *etdev, u64 type); diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c index c5d768c..64fbbfa 100644 --- a/drivers/edgetpu/edgetpu-device-group.c +++ b/drivers/edgetpu/edgetpu-device-group.c @@ -91,10 +91,14 @@ static int edgetpu_kci_join_group_worker(struct kci_worker_param *param) struct edgetpu_device_group *group = param->group; uint i = param->idx; struct edgetpu_dev *etdev = edgetpu_device_group_nth_etdev(group, i); + int ret; etdev_dbg(etdev, "%s: join group %u %u/%u", __func__, group->workload_id, i + 1, group->n_clients); - return edgetpu_kci_join_group(etdev->kci, group->n_clients, i); + ret = edgetpu_kci_join_group(etdev->kci, group->n_clients, i); + if (!ret) + edgetpu_sw_wdt_inc_active_ref(etdev); + return ret; } static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param) @@ -104,6 +108,7 @@ static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param) struct edgetpu_dev *etdev = edgetpu_device_group_nth_etdev(group, i); etdev_dbg(etdev, "%s: leave group %u", __func__, group->workload_id); + edgetpu_sw_wdt_dec_active_ref(etdev); edgetpu_kci_update_usage(etdev); edgetpu_kci_leave_group(etdev->kci); return 0; @@ -119,17 +124,24 @@ static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param) static int edgetpu_group_activate(struct edgetpu_device_group *group) { u8 mailbox_id; - int ret; + int ret, i; + struct edgetpu_dev *etdev; if (edgetpu_group_mailbox_detached_locked(group)) return 0; + mailbox_id = edgetpu_group_context_id_locked(group); ret = edgetpu_mailbox_activate(group->etdev, mailbox_id, group->vcid, !group->activated); - if (ret) + if (ret) { etdev_err(group->etdev, "activate mailbox for VCID %d failed with %d", group->vcid, ret); - else + } else { group->activated = true; + for (i = 0; i < group->n_clients; i++) { + etdev = edgetpu_device_group_nth_etdev(group, i); + edgetpu_sw_wdt_inc_active_ref(etdev); + } + } atomic_inc(&group->etdev->job_count); return ret; } @@ -142,9 +154,16 @@ static int edgetpu_group_activate(struct edgetpu_device_group *group) static void edgetpu_group_deactivate(struct edgetpu_device_group *group) { u8 mailbox_id; + int i; + struct edgetpu_dev *etdev; if (edgetpu_group_mailbox_detached_locked(group)) return; + + for (i = 0; i < group->n_clients; i++) { + etdev = edgetpu_device_group_nth_etdev(group, i); + edgetpu_sw_wdt_dec_active_ref(etdev); + } mailbox_id = edgetpu_group_context_id_locked(group); edgetpu_mailbox_deactivate(group->etdev, mailbox_id); } @@ -450,15 +469,8 @@ void edgetpu_group_notify(struct edgetpu_device_group *group, uint event_id) */ static void edgetpu_device_group_release(struct edgetpu_device_group *group) { - int i; - struct edgetpu_dev *etdev; - edgetpu_group_clear_events(group); if (is_finalized_or_errored(group)) { - for (i = 0; i < group->n_clients; i++) { - etdev = edgetpu_device_group_nth_etdev(group, i); - edgetpu_sw_wdt_dec_active_ref(etdev); - } edgetpu_device_group_kci_leave(group); /* * Mappings clear should be performed after had a handshake with @@ -793,8 +805,7 @@ bool edgetpu_device_group_is_leader(struct edgetpu_device_group *group, int edgetpu_device_group_finalize(struct edgetpu_device_group *group) { - int ret = 0, i; - struct edgetpu_dev *etdev; + int ret = 0; bool mailbox_attached = false; struct edgetpu_client *leader; @@ -870,10 +881,6 @@ int edgetpu_device_group_finalize(struct edgetpu_device_group *group) group->status = EDGETPU_DEVICE_GROUP_FINALIZED; - for (i = 0; i < group->n_clients; i++) { - etdev = edgetpu_device_group_nth_etdev(group, i); - edgetpu_sw_wdt_inc_active_ref(etdev); - } mutex_unlock(&group->lock); return 0; diff --git a/drivers/edgetpu/edgetpu-dump-info.h b/drivers/edgetpu/edgetpu-dump-info.h new file mode 100644 index 0000000..9a97c72 --- /dev/null +++ b/drivers/edgetpu/edgetpu-dump-info.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Structures used for debug dump segments. + * + * Copyright (C) 2022 Google LLC + */ + +#ifndef __EDGETPU_DUMP_INFO_H__ +#define __EDGETPU_DUMP_INFO_H__ + +/* + * Note: A copy of this file is maintained in the debug dump parser project, do not include other + * headers. + */ + +/* + * +------------+------------------+ + * | type ETDEV | edgetpu_dev_info | + * +------------+------------------+ + */ + +struct edgetpu_dev_info { + uint32_t state; + uint32_t vcid_pool; + uint32_t job_count; + uint32_t firmware_crash_count; + uint32_t watchdog_timeout_count; + uint32_t reserved[11]; +}; + +/* + * +--------------+----------------------------+------------------------------+ + * | type CLIENTS | edgetpu_client_info_header | array of edgetpu_client_info | + * +--------------+----------------------------+------------------------------+ + */ + +struct edgetpu_client_info { + uint32_t pid; + uint32_t tgid; + uint64_t perdie_events; + /* wakelock->req_count. ~0u if wakelock is unavailable. */ + uint32_t wakelock_req_count; + /* workload_id of the group this client belongs to. ~0u if client->group is NULL. */ + uint32_t group_workload_id; + uint32_t reserved[10]; +}; + +struct edgetpu_client_info_header { + uint32_t n_clients; + struct edgetpu_client_info clients[]; +}; + +/* + * +-------------+---------------------------+-----------------------------+ + * | type GROUPS | edgetpu_group_info_header | array of edgetpu_group_info | + * +-------------+---------------------------+-----------------------------+ + */ + +struct edgetpu_group_info { + uint32_t workload_id; + uint16_t vcid; + uint8_t status; + uint8_t queues_attached; /* whether has VII queues attached */ + uint32_t context_id; + uint64_t size_host_mappings; /* total size of host mappings, in bytes */ + uint64_t size_dmabuf_mappings; /* total size of dmabuf mappings, in bytes */ + uint32_t reserved[9]; +}; + +struct edgetpu_group_info_header { + uint32_t n_groups; + struct edgetpu_group_info groups[]; +}; + +/* + * +---------------+-----------------------------+-------------------------------+ + * | type MAPPINGS | edgetpu_mapping_info_header | array of edgetpu_mapping_info | + * +---------------+-----------------------------+-------------------------------+ + */ + +#define MAPPING_TYPE_HOST 1 +#define MAPPING_TYPE_DMABUF 2 + +struct edgetpu_mapping_info { + uint64_t host_address; + uint64_t device_address; + uint64_t size; + uint32_t flags; + uint32_t dir; + uint32_t reserved[8]; +}; + +struct edgetpu_mapping_info_header { + uint32_t n_mappings; + uint32_t group_workload_id; + uint8_t mapping_type; + uint8_t padding[7]; + struct edgetpu_mapping_info mappings[]; +}; + +#endif /* __EDGETPU_DUMP_INFO_H__ */ diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c index 31f0f4c..1ef1354 100644 --- a/drivers/edgetpu/edgetpu-firmware.c +++ b/drivers/edgetpu/edgetpu-firmware.c @@ -16,6 +16,7 @@ #include #include "edgetpu.h" +#include "edgetpu-debug-dump.h" #include "edgetpu-device-group.h" #include "edgetpu-firmware.h" #include "edgetpu-firmware-util.h" @@ -171,6 +172,8 @@ static int edgetpu_firmware_handshake(struct edgetpu_firmware *et_fw) if (ret) etdev_warn(etdev, "telemetry KCI error: %d", ret); + /* Set debug dump buffer in FW */ + edgetpu_get_debug_dump(etdev, 0); } return 0; } diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c index 32b8fde..05bd9c9 100644 --- a/drivers/edgetpu/edgetpu-fs.c +++ b/drivers/edgetpu/edgetpu-fs.c @@ -704,14 +704,14 @@ static int edgetpu_ioctl_test_external(struct edgetpu_client *client, if (copy_from_user(&test_ext, argp, sizeof(test_ext))) return -EFAULT; - if (hweight32(test_ext.mbox_bmap) > 1) + if (hweight32(test_ext.mbox_bmap) != 1) return -EINVAL; client_info.attr = (struct edgetpu_mailbox_attr __user *)test_ext.attrs; client_info.tpu_fd = test_ext.fd; client_info.mbox_map = test_ext.mbox_bmap; - info = kmalloc(sizeof(*info) + sizeof(struct edgetpu_ext_mailbox_descriptor *), GFP_KERNEL); + info = kmalloc(sizeof(*info) + sizeof(struct edgetpu_ext_mailbox_descriptor), GFP_KERNEL); if (!info) return -ENOMEM; diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c index 9339953..138242e 100644 --- a/drivers/edgetpu/edgetpu-kci.c +++ b/drivers/edgetpu/edgetpu-kci.c @@ -1005,13 +1005,14 @@ int edgetpu_kci_shutdown(struct edgetpu_kci *kci) } int edgetpu_kci_get_debug_dump(struct edgetpu_kci *kci, tpu_addr_t tpu_addr, - size_t size) + size_t size, bool init_buffer) { struct edgetpu_command_element cmd = { .code = KCI_CODE_GET_DEBUG_DUMP, .dma = { .address = tpu_addr, .size = size, + .flags = init_buffer, }, }; @@ -1020,17 +1021,17 @@ int edgetpu_kci_get_debug_dump(struct edgetpu_kci *kci, tpu_addr_t tpu_addr, return edgetpu_kci_send_cmd(kci, &cmd); } -int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_id, s16 vcid, bool first_open) +int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_map, s16 vcid, bool first_open) { const struct edgetpu_kci_open_device_detail detail = { - .mailbox_id = mailbox_id, + .mailbox_map = mailbox_map, .vcid = vcid, - .flags = first_open, + .flags = (mailbox_map << 1) | first_open, }; struct edgetpu_command_element cmd = { .code = KCI_CODE_OPEN_DEVICE, .dma = { - .flags = BIT(mailbox_id), + .flags = mailbox_map, }, }; @@ -1042,12 +1043,12 @@ int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_id, s16 vcid, b return edgetpu_kci_send_cmd_with_data(kci, &cmd, &detail, sizeof(detail)); } -int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_id) +int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_map) { struct edgetpu_command_element cmd = { .code = KCI_CODE_CLOSE_DEVICE, .dma = { - .flags = BIT(mailbox_id), + .flags = mailbox_map, }, }; diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h index acdf31a..cc8bff3 100644 --- a/drivers/edgetpu/edgetpu-kci.h +++ b/drivers/edgetpu/edgetpu-kci.h @@ -207,8 +207,8 @@ struct edgetpu_kci_device_group_detail { }; struct edgetpu_kci_open_device_detail { - /* The ID of mailbox to be opened. */ - u16 mailbox_id; + /* The bit map of mailboxes to be opened. */ + u16 mailbox_map; /* * Virtual context ID @mailbox_id is associated to. * For device groups with @mailbox_detachable attribute the mailbox attached to the group @@ -363,25 +363,29 @@ void edgetpu_kci_mappings_show(struct edgetpu_dev *etdev, struct seq_file *s); /* Send shutdown request to firmware */ int edgetpu_kci_shutdown(struct edgetpu_kci *kci); -/* Request dump of inaccessible segments from firmware */ +/* Request dump of inaccessible segments from firmware. + * + * @init_buffer flag is used to indicate that the req is only sent to set the dump buffer address + * and size in FW. + */ int edgetpu_kci_get_debug_dump(struct edgetpu_kci *kci, tpu_addr_t tpu_addr, - size_t size); + size_t size, bool init_buffer); /* - * Inform the firmware to prepare to serve the VII of @mailbox_id. + * Inform the firmware to prepare to serve VII mailboxes included in @mailbox_map. * * You usually shouldn't call this directly - consider using - * edgetpu_mailbox_activate() instead. + * edgetpu_mailbox_activate() or edgetpu_mailbox_activate_bulk() instead. */ -int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_id, s16 vcid, bool first_open); +int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_map, s16 vcid, bool first_open); /* - * Inform the firmware the VII with @mailbox_id is closed. + * Inform the firmware that the VII mailboxes included in @mailbox_map are closed. * * You usually shouldn't call this directly - consider using - * edgetpu_mailbox_deactivate() instead. + * edgetpu_mailbox_deactivate() or edgetpu_mailbox_deactivate_bulk() instead. */ -int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_id); +int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_map); /* Cancel work queues or wait until they're done */ void edgetpu_kci_cancel_work_queues(struct edgetpu_kci *kci); diff --git a/drivers/edgetpu/edgetpu-mailbox.c b/drivers/edgetpu/edgetpu-mailbox.c index 03a958b..007a512 100644 --- a/drivers/edgetpu/edgetpu-mailbox.c +++ b/drivers/edgetpu/edgetpu-mailbox.c @@ -973,10 +973,11 @@ static void edgetpu_mailbox_external_free(struct edgetpu_device_group *group) static int edgetpu_mailbox_external_alloc_enable(struct edgetpu_client *client, struct edgetpu_external_mailbox_req *req) { - int ret = 0, i, id; + int ret = 0, i; struct edgetpu_external_mailbox *ext_mailbox = NULL; struct edgetpu_device_group *group; int vcid; + u32 mbox_map = 0; mutex_lock(&client->group_lock); if (!client->group || !edgetpu_device_group_is_leader(client->group, client)) { @@ -1011,21 +1012,13 @@ static int edgetpu_mailbox_external_alloc_enable(struct edgetpu_client *client, ext_mailbox = group->ext_mailbox; vcid = group->vcid; - for (i = 0; i < ext_mailbox->count; i++) { - id = ext_mailbox->descriptors[i].mailbox->mailbox_id; - etdev_dbg(group->etdev, "Enabling mailbox: %d\n", id); - ret = edgetpu_mailbox_activate(group->etdev, id, vcid, false); - if (ret) { - etdev_err(group->etdev, "Activate mailbox %d failed: %d", id, ret); - break; - } - } + for (i = 0; i < ext_mailbox->count; i++) + mbox_map |= BIT(ext_mailbox->descriptors[i].mailbox->mailbox_id); + + ret = edgetpu_mailbox_activate_bulk(group->etdev, mbox_map, vcid, false); if (ret) { - while (i--) { - id = ext_mailbox->descriptors[i].mailbox->mailbox_id; - edgetpu_mailbox_deactivate(group->etdev, id); - } + etdev_err(group->etdev, "Activate mailbox bulk failed: %d", ret); /* * Deactivate only fails if f/w is unresponsive which will put group * in errored state or mailbox physically disabled before requesting @@ -1072,18 +1065,18 @@ static int edgetpu_mailbox_external_disable_free(struct edgetpu_client *client) void edgetpu_mailbox_external_disable_free_locked(struct edgetpu_device_group *group) { - u32 i, id; + u32 i, mbox_map = 0; struct edgetpu_external_mailbox *ext_mailbox; ext_mailbox = group->ext_mailbox; if (!ext_mailbox) return; - for (i = 0; i < ext_mailbox->count; i++) { - id = ext_mailbox->descriptors[i].mailbox->mailbox_id; - etdev_dbg(group->etdev, "Disabling mailbox: %d\n", id); - edgetpu_mailbox_deactivate(group->etdev, id); - } + for (i = 0; i < ext_mailbox->count; i++) + mbox_map |= BIT(ext_mailbox->descriptors[i].mailbox->mailbox_id); + + etdev_dbg(group->etdev, "Disabling mailboxes in map: %x\n", mbox_map); + edgetpu_mailbox_deactivate_bulk(group->etdev, mbox_map); /* * Deactivate only fails if f/w is unresponsive which will put group * in errored state or mailbox physically disabled before requesting @@ -1156,18 +1149,19 @@ int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, int mailbox_id) return edgetpu_mailbox_external_disable_by_id(client, mailbox_id); } -int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid, bool first_open) +int edgetpu_mailbox_activate_bulk(struct edgetpu_dev *etdev, u32 mailbox_map, s16 vcid, + bool first_open) { struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices; - const u32 bit = BIT(mailbox_id); int ret = 0; mutex_lock(&eh->lock); - if (bit & ~eh->fw_state) - ret = edgetpu_kci_open_device(etdev->kci, mailbox_id, vcid, first_open); + if (mailbox_map & ~eh->fw_state) + ret = edgetpu_kci_open_device(etdev->kci, mailbox_map & ~eh->fw_state, vcid, + first_open); if (!ret) { - eh->state |= bit; - eh->fw_state |= bit; + eh->state |= mailbox_map; + eh->fw_state |= mailbox_map; } mutex_unlock(&eh->lock); /* @@ -1178,28 +1172,38 @@ int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid if (ret == -ETIMEDOUT) edgetpu_watchdog_bite(etdev, false); return ret; + } -void edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id) +int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid, bool first_open) +{ + return edgetpu_mailbox_activate_bulk(etdev, BIT(mailbox_id), vcid, first_open); +} + +void edgetpu_mailbox_deactivate_bulk(struct edgetpu_dev *etdev, u32 mailbox_map) { struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices; - const u32 bit = BIT(mailbox_id); int ret = 0; mutex_lock(&eh->lock); - if (bit & eh->fw_state) - ret = edgetpu_kci_close_device(etdev->kci, mailbox_id); + if (mailbox_map & eh->fw_state) + ret = edgetpu_kci_close_device(etdev->kci, mailbox_map & eh->fw_state); if (ret) - etdev_err(etdev, "Deactivate mailbox %d failed: %d", mailbox_id, ret); + etdev_err(etdev, "Deactivate mailbox for map %x failed: %d", mailbox_map, ret); /* * Always clears the states, FW should never reject CLOSE_DEVICE requests unless it's * unresponsive. */ - eh->state &= ~bit; - eh->fw_state &= ~bit; + eh->state &= ~mailbox_map; + eh->fw_state &= ~mailbox_map; mutex_unlock(&eh->lock); } +void edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id) +{ + edgetpu_mailbox_deactivate_bulk(etdev, BIT(mailbox_id)); +} + void edgetpu_handshake_clear_fw_state(struct edgetpu_handshake *eh) { mutex_lock(&eh->lock); diff --git a/drivers/edgetpu/edgetpu-mailbox.h b/drivers/edgetpu/edgetpu-mailbox.h index 1d284e6..11c9768 100644 --- a/drivers/edgetpu/edgetpu-mailbox.h +++ b/drivers/edgetpu/edgetpu-mailbox.h @@ -356,6 +356,15 @@ int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, int mailbox_id, */ int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, int mailbox_id); +/* + * Activates all mailboxes included in @mailbox_map, OPEN_DEVICE KCI will be sent. + * + * Returns what edgetpu_kci_open_device() returned. + * Caller ensures device is powered on. + */ +int edgetpu_mailbox_activate_bulk(struct edgetpu_dev *etdev, u32 mailbox_map, s16 vcid, + bool first_open); + /* * Activates @mailbox_id, OPEN_DEVICE KCI will be sent. * @@ -366,10 +375,18 @@ int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, int mailbox_id); * Caller ensures device is powered on. */ int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid, bool first_open); + +/* + * Similar to edgetpu_mailbox_activate_bulk() but sends CLOSE_DEVICE KCI with the @mailbox_map + * instead. + */ +void edgetpu_mailbox_deactivate_bulk(struct edgetpu_dev *etdev, u32 mailbox_map); + /* * Similar to edgetpu_mailbox_activate() but sends CLOSE_DEVICE KCI instead. */ void edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id); + /* Sets @eh->fw_state to 0. */ void edgetpu_handshake_clear_fw_state(struct edgetpu_handshake *eh); /* diff --git a/drivers/edgetpu/edgetpu-telemetry.c b/drivers/edgetpu/edgetpu-telemetry.c index f18cef8..87820e0 100644 --- a/drivers/edgetpu/edgetpu-telemetry.c +++ b/drivers/edgetpu/edgetpu-telemetry.c @@ -329,6 +329,7 @@ static int telemetry_init(struct edgetpu_dev *etdev, struct edgetpu_telemetry *t tel->header = (struct edgetpu_telemetry_header *)vaddr; tel->header->head = 0; + tel->header->size = 0; tel->header->tail = 0; tel->header->entries_dropped = 0; diff --git a/drivers/edgetpu/edgetpu-telemetry.h b/drivers/edgetpu/edgetpu-telemetry.h index 2c89aff..f849c8c 100644 --- a/drivers/edgetpu/edgetpu-telemetry.h +++ b/drivers/edgetpu/edgetpu-telemetry.h @@ -45,7 +45,8 @@ enum edgetpu_telemetry_type { struct edgetpu_telemetry_header { u32 head; - u32 reserved0[15]; /* Place head and tail into different cache lines */ + u32 size; + u32 reserved0[14]; /* Place head and tail into different cache lines */ u32 tail; u32 entries_dropped; /* Number of entries dropped due to buffer full */ u32 reserved1[14]; /* Pad to 128 bytes in total */ diff --git a/drivers/edgetpu/mobile-debug-dump.c b/drivers/edgetpu/mobile-debug-dump.c index 3bb7b3b..4ef02ac 100644 --- a/drivers/edgetpu/mobile-debug-dump.c +++ b/drivers/edgetpu/mobile-debug-dump.c @@ -3,9 +3,11 @@ * Implements methods common to the family of EdgeTPUs for mobile devices to retrieve host side * debug dump segments and report them to SSCD. * - * Copyright (C) 2021 Google, Inc. + * Copyright (C) 2021-2022 Google LLC */ +#include +#include #include #include #include @@ -14,12 +16,110 @@ #include "edgetpu-config.h" #include "edgetpu-device-group.h" +#include "edgetpu-dump-info.h" +#include "edgetpu-internal.h" #include "edgetpu-mailbox.h" +#include "edgetpu-mapping.h" #include "edgetpu-mobile-platform.h" +#include "edgetpu-wakelock.h" #include "mobile-debug-dump.h" #include "edgetpu-debug-dump.c" +#define SET_FIELD(info, obj, __field) ((info)->__field = (obj)->__field) + +/* Helper structure to hold the segments to be reported to SSCD. */ +struct sscd_segments_context { + size_t n_segs; /* current number of recorded segments */ + size_t capacity; /* number of segments allocated */ + struct sscd_segment *segs; + /* + * Array with the same length as @segs, indicates whether segs[i].addr should be freed on + * context releasing. + */ + bool *free_on_release; + struct mobile_sscd_info *sscd_info; +}; + +static int sscd_ctx_init(struct sscd_segments_context *ctx, struct mobile_sscd_info *sscd_info) +{ + struct sscd_platform_data *pdata = sscd_info->pdata; + + if (!pdata->sscd_report) + return -ENOENT; + ctx->n_segs = 0; + ctx->capacity = 0; + ctx->segs = NULL; + ctx->free_on_release = NULL; + ctx->sscd_info = sscd_info; + return 0; +} + +static void sscd_ctx_release(struct sscd_segments_context *ctx) +{ + int i; + + for (i = 0; i < ctx->n_segs; i++) + if (ctx->free_on_release[i]) + kfree(ctx->segs[i].addr); + kfree(ctx->segs); + kfree(ctx->free_on_release); +} + +/* + * Pushes the segment. + * + * If @free_on_release is true, kfree(@seg->addr) is called when releasing @ctx. + * + * Returns 0 on success. + */ +static int sscd_ctx_push_segment(struct sscd_segments_context *ctx, struct sscd_segment *seg, + bool free_on_release) +{ + void *ptr1, *ptr2; + size_t new_cap; + + if (ctx->n_segs >= ctx->capacity) { + new_cap = ctx->capacity << 1; + if (!new_cap) + new_cap = 1; + ptr1 = krealloc(ctx->segs, new_cap * sizeof(*ctx->segs), GFP_KERNEL); + if (!ptr1) + return -ENOMEM; + ptr2 = krealloc(ctx->free_on_release, new_cap * sizeof(*ctx->free_on_release), + GFP_KERNEL); + if (!ptr2) { + kfree(ptr1); + return -ENOMEM; + } + ctx->segs = ptr1; + ctx->free_on_release = ptr2; + ctx->capacity = new_cap; + } + + ctx->segs[ctx->n_segs] = *seg; + ctx->free_on_release[ctx->n_segs] = free_on_release; + ctx->n_segs++; + return 0; +} + +/* + * Passes dump data to SSCD daemon and releases @ctx. + * + * Returns what sscd_report returned. Note that @ctx is always released no matter what is returned. + */ +static int sscd_ctx_report_and_release(struct sscd_segments_context *ctx, const char *crash_info) +{ + struct sscd_platform_data *pdata = ctx->sscd_info->pdata; + struct platform_device *sscd_dev = ctx->sscd_info->dev; + int ret; + + ret = pdata->sscd_report(sscd_dev, ctx->segs, ctx->n_segs, SSCD_FLAGS_ELFARM64HDR, + crash_info); + sscd_ctx_release(ctx); + return ret; +} + static void sscd_release(struct device *dev) { pr_debug(DRIVER_NAME " release\n"); @@ -28,130 +128,278 @@ static void sscd_release(struct device *dev) static struct sscd_platform_data sscd_pdata; static struct platform_device sscd_dev; -/* - * Collects the mapping information of all the host mapping and dmabuf mapping buffers of all - * @groups as an array of struct mobile_sscd_mappings_dump and populates the @sscd_seg. - * - * Returns the pointer to the first element of the mappings dump array. The allocated array should - * be freed by the caller after the sscd segment is reported. - * Returns a negative errno in case of failure. - * Returns NULL when there is no mapping allocated in groups. - */ -static struct mobile_sscd_mappings_dump * -mobile_sscd_collect_mappings_segment(struct edgetpu_device_group **groups, size_t num_groups, - struct sscd_segment *sscd_seg) +static int mobile_sscd_collect_mappings_info(struct edgetpu_mapping_root *root, u32 workload_id, + u8 type, struct sscd_segments_context *ctx) { - struct mobile_sscd_mappings_dump *mappings_dump; - struct edgetpu_mapping_root *mappings; + int ret = 0; + struct edgetpu_dump_segment *seg_hdr; + struct edgetpu_mapping_info_header *hdr; + struct edgetpu_mapping_info *info; + size_t seg_size; + void *buffer = NULL; struct rb_node *node; - void *resized_arr; - size_t idx = 0, mappings_num = 0, new_size = 0, count; - - mappings_dump = NULL; - for (idx = 0; idx < num_groups; idx++) { - mutex_lock(&groups[idx]->lock); - count = groups[idx]->host_mappings.count + groups[idx]->dmabuf_mappings.count; - if (count == 0) { - mutex_unlock(&groups[idx]->lock); - continue; - } - new_size += count * sizeof(*mappings_dump); - resized_arr = krealloc(mappings_dump, new_size, GFP_KERNEL); - if (!resized_arr) { - kfree(mappings_dump); - mutex_unlock(&groups[idx]->lock); - return ERR_PTR(-ENOMEM); - } - mappings_dump = resized_arr; - mappings = &groups[idx]->host_mappings; - for (node = rb_first(&mappings->rb); node; node = rb_next(node)) { - struct edgetpu_mapping *map = - container_of(node, struct edgetpu_mapping, node); + mutex_lock(&root->lock); - mappings_dump[mappings_num].host_address = map->host_address; - mappings_dump[mappings_num].device_address = map->device_address; - mappings_dump[mappings_num].size = (u64)map->map_size; - mappings_num++; - } - mappings = &groups[idx]->dmabuf_mappings; - for (node = rb_first(&mappings->rb); node; node = rb_next(node)) { - struct edgetpu_mapping *map = - container_of(node, struct edgetpu_mapping, node); - - mappings_dump[mappings_num].host_address = map->host_address; - mappings_dump[mappings_num].device_address = map->device_address; - mappings_dump[mappings_num].size = (u64)map->map_size; - mappings_num++; - } - mutex_unlock(&groups[idx]->lock); + if (!root->count) + goto out_unlock; + seg_size = sizeof(*seg_hdr) + sizeof(*hdr) + sizeof(*info) * root->count; + buffer = kzalloc(seg_size, GFP_KERNEL); + if (!buffer) { + ret = -ENOMEM; + goto out_unlock; } - sscd_seg->addr = mappings_dump; - sscd_seg->size = new_size; - sscd_seg->vaddr = mappings_dump; + seg_hdr = buffer; + seg_hdr->type = BIT_ULL(DUMP_TYPE_KERNEL_MAPPINGS_BIT); + seg_hdr->size = seg_size - sizeof(*seg_hdr); + hdr = (typeof(hdr))(seg_hdr + 1); + hdr->n_mappings = root->count; + hdr->group_workload_id = workload_id; + hdr->mapping_type = type; + info = hdr->mappings; + for (node = rb_first(&root->rb); node; node = rb_next(node)) { + struct edgetpu_mapping *map = container_of(node, struct edgetpu_mapping, node); + + SET_FIELD(info, map, host_address); + SET_FIELD(info, map, device_address); + SET_FIELD(info, map, flags); + SET_FIELD(info, map, dir); + info->size = (u64)map->map_size; + info++; + } - return mappings_dump; +out_unlock: + mutex_unlock(&root->lock); + if (buffer) { + struct sscd_segment seg = { + .addr = buffer, + .size = seg_size, + }; + + ret = sscd_ctx_push_segment(ctx, &seg, true); + if (ret) + kfree(buffer); + } + return ret; } /* - * Collects the VII cmd and resp queues of all @groups that @etdev belongs to and the KCI cmd and - * resp queues and populates them as @sscd_seg_arr elements. + * For each group, collects the mappings information include host mapping and dmabuf mapping buffers + * and records to @ctx. * - * Returns the total number of queues collected since some queues may have been released for groups - * with detached mailboxes. The return value is less than or equal to the total number of queues - * expected based on @num_groups i.e. (2 * @num_groups +2). + * Returns a negative errno in case of failure. */ -static size_t mobile_sscd_collect_cmd_resp_queues(struct edgetpu_dev *etdev, - struct edgetpu_device_group **groups, - size_t num_groups, - struct sscd_segment *sscd_seg_arr) +static int mobile_sscd_collect_group_mappings_info(struct edgetpu_device_group **groups, + size_t num_groups, + struct sscd_segments_context *ctx) { - struct edgetpu_kci *kci; - size_t idx; - u16 num_queues = 0; - - /* Collect VII cmd and resp queues */ - for (idx = 0; idx < num_groups; idx++) { - mutex_lock(&groups[idx]->lock); - if (!edgetpu_group_mailbox_detached_locked(groups[idx])) { - sscd_seg_arr[num_queues].addr = - (void *)groups[idx]->vii.cmd_queue_mem.vaddr; - sscd_seg_arr[num_queues].size = groups[idx]->vii.cmd_queue_mem.size; - sscd_seg_arr[num_queues].paddr = - (void *)groups[idx]->vii.cmd_queue_mem.tpu_addr; - sscd_seg_arr[num_queues].vaddr = - (void *)groups[idx]->vii.cmd_queue_mem.vaddr; - num_queues++; - - sscd_seg_arr[num_queues].addr = - (void *)groups[idx]->vii.resp_queue_mem.vaddr; - sscd_seg_arr[num_queues].size = groups[idx]->vii.resp_queue_mem.size; - sscd_seg_arr[num_queues].paddr = - (void *)groups[idx]->vii.resp_queue_mem.tpu_addr; - sscd_seg_arr[num_queues].vaddr = - (void *)groups[idx]->vii.resp_queue_mem.vaddr; - num_queues++; - } - mutex_unlock(&groups[idx]->lock); + int i, ret; + struct edgetpu_device_group *group; + + for (i = 0; i < num_groups; i++) { + group = groups[i]; + ret = mobile_sscd_collect_mappings_info(&group->host_mappings, group->workload_id, + MAPPING_TYPE_HOST, ctx); + if (ret) + return ret; + ret = mobile_sscd_collect_mappings_info(&group->dmabuf_mappings, group->workload_id, + MAPPING_TYPE_DMABUF, ctx); + if (ret) + return ret; } + return 0; +} + +static int mobile_sscd_collect_etdev_info(struct edgetpu_dev *etdev, struct sscd_segments_context *ctx) +{ + struct edgetpu_dump_segment *seg_hdr; + struct edgetpu_dev_info *info; + const size_t seg_size = sizeof(*seg_hdr) + sizeof(*info); + void *buffer; + struct sscd_segment seg = { + .size = seg_size, + }; + + buffer = kzalloc(seg_size, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + seg.addr = buffer; + seg_hdr = buffer; + seg_hdr->type = BIT_ULL(DUMP_TYPE_KERNEL_ETDEV_BIT); + seg_hdr->size = seg_size - sizeof(*seg_hdr); + info = (typeof(info))(seg_hdr + 1); + SET_FIELD(info, etdev, state); + SET_FIELD(info, etdev, vcid_pool); + info->job_count = atomic_read(&etdev->job_count); + SET_FIELD(info, etdev, firmware_crash_count); + SET_FIELD(info, etdev, watchdog_timeout_count); + return sscd_ctx_push_segment(ctx, &seg, true); +} + +static int mobile_sscd_collect_clients_info(struct edgetpu_client **clients, size_t num_clients, + struct sscd_segments_context *ctx) +{ + int i; + struct edgetpu_dump_segment *seg_hdr; + struct edgetpu_client_info_header *hdr; + struct edgetpu_client_info *info; + struct edgetpu_client *client; + const size_t seg_size = sizeof(*seg_hdr) + sizeof(*hdr) + sizeof(*info) * num_clients; + void *buffer; + struct sscd_segment seg = { + .size = seg_size, + }; - /* Collect KCI cmd and resp queues */ - kci = etdev->kci; - sscd_seg_arr[num_queues].addr = (void *)kci->cmd_queue_mem.vaddr; - sscd_seg_arr[num_queues].size = MAX_QUEUE_SIZE * sizeof(struct edgetpu_command_element); - sscd_seg_arr[num_queues].paddr = (void *)kci->cmd_queue_mem.tpu_addr; - sscd_seg_arr[num_queues].vaddr = (void *)kci->cmd_queue_mem.vaddr; - num_queues++; - - sscd_seg_arr[num_queues].addr = (void *)kci->resp_queue_mem.vaddr; - sscd_seg_arr[num_queues].size = - MAX_QUEUE_SIZE * sizeof(struct edgetpu_kci_response_element); - sscd_seg_arr[num_queues].paddr = (void *)kci->resp_queue_mem.tpu_addr; - sscd_seg_arr[num_queues].vaddr = (void *)kci->resp_queue_mem.vaddr; - num_queues++; - - return num_queues; + if (!num_clients) + return 0; + buffer = kzalloc(seg_size, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + seg.addr = buffer; + seg_hdr = buffer; + seg_hdr->type = BIT_ULL(DUMP_TYPE_KERNEL_CLIENTS_BIT); + seg_hdr->size = seg_size - sizeof(*seg_hdr); + hdr = (typeof(hdr))(seg_hdr + 1); + info = hdr->clients; + for (i = 0; i < num_clients; i++) { + client = clients[i]; + SET_FIELD(info, client, pid); + SET_FIELD(info, client, tgid); + SET_FIELD(info, client, perdie_events); + info->wakelock_req_count = + NO_WAKELOCK(client->wakelock) ? ~0u : client->wakelock->req_count; + mutex_lock(&client->group_lock); + info->group_workload_id = client->group ? client->group->workload_id : ~0u; + mutex_unlock(&client->group_lock); + info++; + } + hdr->n_clients = num_clients; + return sscd_ctx_push_segment(ctx, &seg, true); +} + +static int mobile_sscd_collect_groups_info(struct edgetpu_device_group **groups, size_t num_groups, + struct sscd_segments_context *ctx) +{ + int i; + struct edgetpu_dump_segment *seg_hdr; + struct edgetpu_group_info_header *hdr; + struct edgetpu_group_info *info; + struct edgetpu_device_group *group; + const size_t seg_size = sizeof(*seg_hdr) + sizeof(*hdr) + sizeof(*info) * num_groups; + void *buffer; + struct sscd_segment seg = { + .size = seg_size, + }; + + if (!num_groups) + return 0; + buffer = kzalloc(seg_size, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + seg.addr = buffer; + seg_hdr = buffer; + seg_hdr->type = BIT_ULL(DUMP_TYPE_KERNEL_GROUPS_BIT); + seg_hdr->size = seg_size - sizeof(*seg_hdr); + hdr = (typeof(hdr))(seg_hdr + 1); + info = hdr->groups; + for (i = 0; i < num_groups; i++) { + group = groups[i]; + SET_FIELD(info, group, workload_id); + SET_FIELD(info, group, vcid); + SET_FIELD(info, group, status); + SET_FIELD(info, group, context_id); + info->size_host_mappings = edgetpu_mappings_total_size(&group->host_mappings); + info->size_dmabuf_mappings = edgetpu_mappings_total_size(&group->dmabuf_mappings); + mutex_lock(&group->lock); + info->queues_attached = edgetpu_group_finalized_and_attached(group); + mutex_unlock(&group->lock); + info++; + } + hdr->n_groups = num_groups; + return sscd_ctx_push_segment(ctx, &seg, true); +} + +static struct edgetpu_client **edgetpu_get_clients(struct edgetpu_dev *etdev, size_t *p_num_clients) +{ + struct edgetpu_client **clients; + struct edgetpu_list_device_client *lc; + size_t num_clients = 0, i = 0; + + mutex_lock(&etdev->clients_lock); + for_each_list_device_client(etdev, lc) + num_clients++; + clients = kmalloc_array(num_clients, sizeof(*clients), GFP_KERNEL); + if (!clients) { + mutex_unlock(&etdev->clients_lock); + return ERR_PTR(-ENOMEM); + } + + for_each_list_device_client(etdev, lc) + clients[i++] = edgetpu_client_get(lc->client); + mutex_unlock(&etdev->clients_lock); + *p_num_clients = num_clients; + return clients; +} + +static struct edgetpu_device_group **edgetpu_get_groups(struct edgetpu_dev *etdev, + size_t *p_num_groups) +{ + struct edgetpu_device_group **groups; + struct edgetpu_device_group *group; + struct edgetpu_list_group *g; + size_t num_groups = 0; + + mutex_lock(&etdev->groups_lock); + groups = kmalloc_array(etdev->n_groups, sizeof(*groups), GFP_KERNEL); + if (!groups) { + mutex_unlock(&etdev->groups_lock); + return ERR_PTR(-ENOMEM); + } + + etdev_for_each_group(etdev, g, group) + groups[num_groups++] = edgetpu_device_group_get(group); + mutex_unlock(&etdev->groups_lock); + *p_num_groups = num_groups; + return groups; +} + +static int mobile_collect_device_info(struct edgetpu_dev *etdev, struct sscd_segments_context *ctx) +{ + struct edgetpu_device_group **groups; + struct edgetpu_client **clients; + size_t num_groups = 0, num_clients = 0; + int i, ret; + + clients = edgetpu_get_clients(etdev, &num_clients); + if (IS_ERR(clients)) + return PTR_ERR(clients); + groups = edgetpu_get_groups(etdev, &num_groups); + if (IS_ERR(groups)) { + ret = PTR_ERR(groups); + goto out_put_clients; + } + + ret = mobile_sscd_collect_etdev_info(etdev, ctx); + if (ret) + goto out_put_groups; + ret = mobile_sscd_collect_clients_info(clients, num_clients, ctx); + if (ret) + goto out_put_groups; + ret = mobile_sscd_collect_groups_info(groups, num_groups, ctx); + if (ret) + goto out_put_groups; + ret = mobile_sscd_collect_group_mappings_info(groups, num_groups, ctx); + +out_put_groups: + for (i = 0; i < num_groups; i++) + edgetpu_device_group_put(groups[i]); + kfree(groups); +out_put_clients: + for (i = 0; i < num_clients; i++) + edgetpu_client_put(clients[i]); + kfree(clients); + return ret; } static int mobile_sscd_generate_coredump(void *p_etdev, void *p_dump_setup) @@ -159,20 +407,12 @@ static int mobile_sscd_generate_coredump(void *p_etdev, void *p_dump_setup) struct edgetpu_dev *etdev; struct edgetpu_debug_dump_setup *dump_setup; struct edgetpu_mobile_platform_dev *pdev; - struct sscd_platform_data *pdata; - struct platform_device *sscd_dev; - struct sscd_segment *segs; + struct sscd_segments_context sscd_ctx; struct edgetpu_debug_dump *debug_dump; struct edgetpu_crash_reason *crash_reason; struct edgetpu_dump_segment *dump_seg; - struct edgetpu_device_group *group; - struct edgetpu_device_group **groups; - struct edgetpu_list_group *g; - struct mobile_sscd_mappings_dump *mappings_dump = NULL; char crash_info[128]; - int sscd_dump_segments_num; int i, ret; - size_t num_groups = 0, num_queues = 0; u64 offset; if (!p_etdev || !p_dump_setup) @@ -181,12 +421,9 @@ static int mobile_sscd_generate_coredump(void *p_etdev, void *p_dump_setup) etdev = (struct edgetpu_dev *)p_etdev; dump_setup = (struct edgetpu_debug_dump_setup *)p_dump_setup; pdev = to_mobile_dev(etdev); - pdata = (struct sscd_platform_data *)pdev->sscd_info.pdata; - sscd_dev = (struct platform_device *)pdev->sscd_info.dev; - if (!pdata->sscd_report) { - etdev_err(etdev, "failed to generate coredump"); - return -ENOENT; - } + ret = sscd_ctx_init(&sscd_ctx, &pdev->sscd_info); + if (ret) + goto err; debug_dump = (struct edgetpu_debug_dump *)(dump_setup + 1); @@ -196,79 +433,40 @@ static int mobile_sscd_generate_coredump(void *p_etdev, void *p_dump_setup) scnprintf(crash_info, sizeof(crash_info), "[edgetpu_coredump] error code: %#llx", crash_reason->code); - mutex_lock(&etdev->groups_lock); - groups = kmalloc_array(etdev->n_groups, sizeof(*groups), GFP_KERNEL); - if (!groups) { - mutex_unlock(&etdev->groups_lock); - return -ENOMEM; - } - - etdev_for_each_group(etdev, g, group) { - if (edgetpu_device_group_is_disbanded(group)) - continue; - groups[num_groups++] = edgetpu_device_group_get(group); - } - mutex_unlock(&etdev->groups_lock); - - /* Allocate memory for dump segments */ - sscd_dump_segments_num = debug_dump->dump_segments_num; - sscd_dump_segments_num += 2 * num_groups; /* VII cmd and resp queues */ - sscd_dump_segments_num += num_groups ? 1 : 0; /* Mappings info */ - sscd_dump_segments_num += 2; /* KCI cmd and resp queues */ - - segs = kmalloc_array(sscd_dump_segments_num, sizeof(struct sscd_segment), GFP_KERNEL); - if (!segs) { - ret = -ENOMEM; - goto out_sscd_generate_coredump; - } - /* Populate sscd segments */ dump_seg = (struct edgetpu_dump_segment *)((u8 *)dump_setup + debug_dump->dump_segments_offset); offset = debug_dump->dump_segments_offset; for (i = 0; i < debug_dump->dump_segments_num; i++) { - segs[i].addr = dump_seg; - segs[i].size = sizeof(struct edgetpu_dump_segment) + dump_seg->size; - segs[i].paddr = (void *)(etdev->debug_dump_mem.tpu_addr + offset); - segs[i].vaddr = (void *)(etdev->debug_dump_mem.vaddr + offset); + struct sscd_segment seg = { + .addr = dump_seg, + .size = sizeof(struct edgetpu_dump_segment) + dump_seg->size, + .paddr = (void *)(etdev->debug_dump_mem.tpu_addr + offset), + .vaddr = (void *)(etdev->debug_dump_mem.vaddr + offset), + }; + + ret = sscd_ctx_push_segment(&sscd_ctx, &seg, false); + if (ret) + goto err_release; offset += sizeof(struct edgetpu_dump_segment) + dump_seg->size; dump_seg = (struct edgetpu_dump_segment *)((u8 *)dump_setup + ALIGN(offset, sizeof(uint64_t))); } - if (num_groups) { - mappings_dump = mobile_sscd_collect_mappings_segment(groups, num_groups, &segs[i]); - if (IS_ERR(mappings_dump)) { - ret = PTR_ERR(mappings_dump); - goto out_sscd_generate_coredump; - } - /* increase @i if mappings present */ - if (mappings_dump) - i++; - else - sscd_dump_segments_num--; - } - - num_queues = mobile_sscd_collect_cmd_resp_queues(etdev, groups, num_groups, &segs[i]); + ret = mobile_collect_device_info(etdev, &sscd_ctx); + if (ret) + goto err_release; - /* - * Adjust num of segments as some groups may have a detached mailbox. - * Subtract number of VII and KCI queues according to num_groups. - */ - sscd_dump_segments_num -= (2 * num_groups + 2); - sscd_dump_segments_num += num_queues; /* Add actual number of valid VII and KCI queues */ + ret = sscd_ctx_report_and_release(&sscd_ctx, crash_info); + if (ret) + goto err; - /* Pass dump data to SSCD daemon */ - etdev_dbg(etdev, "report: %d segments", sscd_dump_segments_num); - ret = pdata->sscd_report(sscd_dev, segs, sscd_dump_segments_num, SSCD_FLAGS_ELFARM64HDR, - crash_info); -out_sscd_generate_coredump: - for (i = 0; i < num_groups; i++) - edgetpu_device_group_put(groups[i]); - kfree(mappings_dump); - kfree(segs); - kfree(groups); + return 0; +err_release: + sscd_ctx_release(&sscd_ctx); +err: + etdev_err(etdev, "failed to generate coredump: %d", ret); return ret; } diff --git a/drivers/edgetpu/mobile-debug-dump.h b/drivers/edgetpu/mobile-debug-dump.h index 0ebd5f2..00355d4 100644 --- a/drivers/edgetpu/mobile-debug-dump.h +++ b/drivers/edgetpu/mobile-debug-dump.h @@ -3,7 +3,7 @@ * Module that defines structure to retrieve debug dump segments * specific to the family of EdgeTPUs for mobile devices. * - * Copyright (C) 2021 Google, Inc. + * Copyright (C) 2021-2022 Google LLC */ #ifndef __MOBILE_DEBUG_DUMP_H__ @@ -16,10 +16,4 @@ struct mobile_sscd_info { void *dev; /* SSCD platform device */ }; -struct mobile_sscd_mappings_dump { - u64 host_address; - u64 device_address; - u64 size; -}; - -#endif /* MOBILE_DEBUG_DUMP_H_ */ +#endif /* __MOBILE_DEBUG_DUMP_H__ */ diff --git a/drivers/edgetpu/mobile-pm.c b/drivers/edgetpu/mobile-pm.c index b753231..a21bed2 100644 --- a/drivers/edgetpu/mobile-pm.c +++ b/drivers/edgetpu/mobile-pm.c @@ -562,7 +562,11 @@ static int mobile_power_down(struct edgetpu_pm *etpm) etdev->state = ETDEV_STATE_NOFW; } edgetpu_kci_cancel_work_queues(etdev->kci); + } + + if (etdev->firmware) { res = edgetpu_mobile_firmware_reset_cpu(etdev, true); + /* TODO(b/198181290): remove -EIO once gsaproxy wakelock is implemented */ if (res == -EAGAIN || res == -EIO) return -EAGAIN; -- cgit v1.2.3