Merge branch 'whitechapel' into android-gs-pixel-mainline

* whitechapel: (48 commits) edgetpu: support PBHA bits for host DRAM mappings edgetpu: re-apply changes from bad merge edgetpu: abrolhos: set up SSMT registers edgetpu: abrolhos: load firmware through GSA edgetpu: Check device power state on wdt timeout edgetpu: add API to ask MMU using device DRAM edgetpu: notify runtime about fatal errors edgetpu: fix CSR map count format specifiers edgetpu: fix vm_pgoff dbg print format specifier edgetpu: stop watchdog on device power down edgetpu: add locking on edgetpu-pm edgetpu: add IOCTLs to control firmware power state edgetpu: call group_leave for in-group clients ... Signed-off-by: Nrithya Kanakasabapathy <nrithya@google.com> Signed-off-by: Erick Reyes <erickreyes@google.com> Change-Id: Ic892e61b4c86aff04ad259e061acdb2c4e40c1f5
author: Nrithya Kanakasabapathy <nrithya@google.com> 2020-10-07 23:08:08 -0700
committer: Erick Reyes <erickreyes@google.com> 2020-10-09 18:22:40 -0700
commit: b96c6d828313120c0dc07b4754c6642a139e876e (patch)
tree: 89ccf54339565c2966c7066ed5a910acd15325af
parent: 14f470f43738ff1aa682aed5394a4d5d86f557bd (diff)
download: abrolhos-b96c6d828313120c0dc07b4754c6642a139e876e.tar.gz
33 files changed, 1502 insertions, 1163 deletions
diff --git a/drivers/edgetpu/Kbuild b/drivers/edgetpu/Kbuild
index f07419b..e3e355a 100644
--- a/drivers/edgetpu/Kbuild
+++ b/drivers/edgetpu/Kbuild
@@ -11,7 +11,7 @@ else
 endif
 
 edgetpu-fw-objs := edgetpu-firmware.o edgetpu-firmware-util.o edgetpu-shared-fw.o
-edgetpu-objs	:= edgetpu-mailbox.o edgetpu-kci.o edgetpu-telemetry.o edgetpu-mapping.o edgetpu-dmabuf.o edgetpu-async.o edgetpu-pm.o edgetpu-iremap-pool.o $(edgetpu-fw-objs)
+edgetpu-objs	:= edgetpu-mailbox.o edgetpu-kci.o edgetpu-telemetry.o edgetpu-mapping.o edgetpu-dmabuf.o edgetpu-async.o edgetpu-pm.o edgetpu-iremap-pool.o edgetpu-sw-watchdog.o $(edgetpu-fw-objs)
 abrolhos-y	:= abrolhos-device.o abrolhos-device-group.o abrolhos-direct.o abrolhos-core.o abrolhos-platform.o abrolhos-iommu.o abrolhos-firmware.o abrolhos-thermal.o abrolhos-pm.o $(edgetpu-objs)
 CFLAGS_abrolhos-direct.o := -DCONFIG_ABROLHOS=1
 CFLAGS_abrolhos-core.o := -DCONFIG_ABROLHOS=1
diff --git a/drivers/edgetpu/Makefile b/drivers/edgetpu/Makefile
index 0712fac..cb0c6db 100644
--- a/drivers/edgetpu/Makefile
+++ b/drivers/edgetpu/Makefile
@@ -14,7 +14,7 @@ else
 endif
 
 edgetpu-fw-objs := edgetpu-firmware-util.o edgetpu-shared-fw.o edgetpu-firmware.o
-edgetpu-objs	:= edgetpu-core.o edgetpu-mailbox.o edgetpu-kci.o edgetpu-device-group.o edgetpu-telemetry.o edgetpu-mapping.o edgetpu-dmabuf.o edgetpu-async.o edgetpu-pm.o edgetpu-iremap-pool.o $(edgetpu-fw-objs)
+edgetpu-objs	:= edgetpu-core.o edgetpu-mailbox.o edgetpu-kci.o edgetpu-device-group.o edgetpu-telemetry.o edgetpu-mapping.o edgetpu-dmabuf.o edgetpu-async.o edgetpu-pm.o edgetpu-iremap-pool.o edgetpu-sw-watchdog.o $(edgetpu-fw-objs)
 
 abrolhos-objs	:= abrolhos-device.o abrolhos-firmware.o edgetpu-direct.o abrolhos-platform.o abrolhos-iommu.o abrolhos-thermal.o abrolhos-pm.o $(edgetpu-objs)
 
diff --git a/drivers/edgetpu/abrolhos-device.c b/drivers/edgetpu/abrolhos-device.c
index cdd1e4f..f5444eb 100644
--- a/drivers/edgetpu/abrolhos-device.c
+++ b/drivers/edgetpu/abrolhos-device.c
@@ -10,10 +10,19 @@
 #include "edgetpu-config.h"
 #include "edgetpu-internal.h"
 #include "edgetpu-mailbox.h"
+#include "abrolhos-platform.h"
 #include "edgetpu-telemetry.h"
 
 #define HOST_NONSECURE_INTRSRCMASKREG	0x000f0004
 
+#define SSMT_NS_READ_STREAM_VID_OFFSET(n) (0x1000u + (0x4u * (n)))
+#define SSMT_NS_WRITE_STREAM_VID_OFFSET(n) (0x1200u + (0x4u * (n)))
+
+#define SSMT_NS_READ_STREAM_VID_REG(base, n)                                   \
+	((base) + SSMT_NS_READ_STREAM_VID_OFFSET(n))
+#define SSMT_NS_WRITE_STREAM_VID_REG(base, n)                                  \
+	((base) + SSMT_NS_WRITE_STREAM_VID_OFFSET(n))
+
 /*
  * The interrupt handler for mailboxes.
  *
@@ -67,17 +76,25 @@ irqreturn_t edgetpu_chip_irq_handler(int irq, void *arg)
 
 void edgetpu_chip_init(struct edgetpu_dev *etdev)
 {
+	int i;
+	struct edgetpu_platform_dev *etpdev = container_of(
+			etdev, struct edgetpu_platform_dev, edgetpu_dev);
+
 	/* Disable the CustomBlock Interrupt. */
 	edgetpu_dev_write_32(etdev, HOST_NONSECURE_INTRSRCMASKREG, 0x1);
-}
 
-void edgetpu_chip_exit(struct edgetpu_dev *etdev)
-{
+	if (!etpdev->ssmt_base)
+		return;
+
+	/* Setup non-secure SCIDs, assume VID = SCID */
+	for (i = 0; i < EDGETPU_NCONTEXTS; i++) {
+		writel(i, SSMT_NS_READ_STREAM_VID_REG(etpdev->ssmt_base, i));
+		writel(i, SSMT_NS_WRITE_STREAM_VID_REG(etpdev->ssmt_base, i));
+	}
 }
 
-bool edgetpu_chip_bypassed(struct edgetpu_dev *etdev)
+void edgetpu_chip_exit(struct edgetpu_dev *etdev)
 {
-	return false;
 }
 
 void edgetpu_mark_probe_fail(struct edgetpu_dev *etdev)
diff --git a/drivers/edgetpu/abrolhos-firmware.c b/drivers/edgetpu/abrolhos-firmware.c
index ea157ab..c6f5e0b 100644
--- a/drivers/edgetpu/abrolhos-firmware.c
+++ b/drivers/edgetpu/abrolhos-firmware.c
@@ -6,6 +6,9 @@
  */
 
 #include <linux/dma-mapping.h>
+#include <linux/gsa/gsa_tpu.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
 
 #include "abrolhos-firmware.h"
 #include "abrolhos-platform.h"
@@ -15,25 +18,27 @@
 #include "edgetpu-kci.h"
 #include "edgetpu-mailbox.h"
 
-/*
- * Sets the reset state of the R52 core.
- * @val: 1 to put the core in reset state, 0 to release core from reset state.
- */
-static void r52_reset(struct edgetpu_dev *etdev, u64 val)
-{
-	edgetpu_dev_write_64(etdev, EDGETPU_REG_RESET_CONTROL, val);
-}
+#define ABROLHOS_FW_HEADER_SIZE		SZ_4K
 
 static int abrolhos_firmware_alloc_buffer(
 		struct edgetpu_firmware *et_fw,
 		struct edgetpu_firmware_buffer *fw_buf)
 {
 	struct edgetpu_dev *etdev = et_fw->etdev;
-	struct edgetpu_platform_dev *edgetpu_pdev = container_of(
-			etdev, struct edgetpu_platform_dev, edgetpu_dev);
-
-	fw_buf->vaddr = edgetpu_pdev->fw_region_vaddr;
-	fw_buf->alloc_size = edgetpu_pdev->fw_region_size;
+	struct edgetpu_platform_dev *edgetpu_pdev =
+		container_of(etdev, struct edgetpu_platform_dev, edgetpu_dev);
+	/* Allocate extra space the image header */
+	size_t buffer_size =
+		edgetpu_pdev->fw_region_size + ABROLHOS_FW_HEADER_SIZE;
+
+	fw_buf->vaddr = kzalloc(buffer_size, GFP_KERNEL);
+	if (!fw_buf->vaddr) {
+		etdev_err(etdev, "%s: failed to allocate buffer (%zu bytes)\n",
+			  __func__, buffer_size);
+		return -ENOMEM;
+	}
+	fw_buf->dma_addr = 0;
+	fw_buf->alloc_size = buffer_size;
 	fw_buf->used_size_align = 16;
 	return 0;
 }
@@ -42,7 +47,9 @@ static void abrolhos_firmware_free_buffer(
 		struct edgetpu_firmware *et_fw,
 		struct edgetpu_firmware_buffer *fw_buf)
 {
+	kfree(fw_buf->vaddr);
 	fw_buf->vaddr = NULL;
+	fw_buf->dma_addr = 0;
 	fw_buf->alloc_size = 0;
 	fw_buf->used_size_align = 0;
 }
@@ -64,32 +71,89 @@ static int abrolhos_firmware_prepare_run(struct edgetpu_firmware *et_fw,
 					 struct edgetpu_firmware_buffer *fw_buf)
 {
 	struct edgetpu_dev *etdev = et_fw->etdev;
-
-	/* Clear Substream ID (aka SCID) for instruction remapped addresses */
-	u32 sec_reg = edgetpu_dev_read_32(
-		etdev, EDGETPU_REG_INSTRUCTION_REMAP_SECURITY);
-	sec_reg &= ~(0x0F << 10);
-	edgetpu_dev_write_32(etdev, EDGETPU_REG_INSTRUCTION_REMAP_SECURITY,
-			     sec_reg);
-
-	/* Clear Substream ID (aka SCID) for all other addresses */
-	sec_reg = edgetpu_dev_read_32(etdev, EDGETPU_REG_SECURITY);
-	sec_reg &= ~(0x0F << 10);
-	edgetpu_dev_write_32(etdev, EDGETPU_REG_SECURITY, sec_reg);
-
-	r52_reset(etdev, 1);
-
-	/* Reset KCI mailbox before start f/w, don't process anything old. */
+	struct edgetpu_platform_dev *edgetpu_pdev =
+		container_of(etdev, struct edgetpu_platform_dev, edgetpu_dev);
+	void *image_vaddr, *header_vaddr;
+	dma_addr_t header_dma_addr;
+	int ret, tpu_state;
+
+	if (fw_buf->used_size < ABROLHOS_FW_HEADER_SIZE) {
+		etdev_err(etdev, "Invalid buffer size: %zu < %d\n",
+			  fw_buf->used_size, ABROLHOS_FW_HEADER_SIZE);
+		return -EINVAL;
+	}
+
+	tpu_state = gsa_send_tpu_cmd(edgetpu_pdev->gsa_dev, GSA_TPU_GET_STATE);
+
+	if (tpu_state < GSA_TPU_STATE_INACTIVE) {
+		etdev_warn(etdev, "GSA failed to retrieve current status: %d\n",
+			   tpu_state);
+		etdev_warn(etdev, "Assuming device is inactive\n");
+		tpu_state = GSA_TPU_STATE_INACTIVE;
+	}
+
+	etdev_dbg(etdev, "GSA Reports TPU state: %d\n", tpu_state);
+
+	if (tpu_state > GSA_TPU_STATE_INACTIVE) {
+		ret = gsa_unload_tpu_fw_image(edgetpu_pdev->gsa_dev);
+		if (ret) {
+			etdev_warn(etdev, "GSA release failed: %d\n", ret);
+			return -EIO;
+		}
+	}
+
+	image_vaddr = memremap(edgetpu_pdev->fw_region_paddr,
+			       edgetpu_pdev->fw_region_size, MEMREMAP_WC);
+
+	if (!image_vaddr) {
+		etdev_err(etdev, "memremap failed\n");
+		return -ENOMEM;
+	}
+
+	/* Skip the header */
+	memcpy(image_vaddr, fw_buf->vaddr + ABROLHOS_FW_HEADER_SIZE,
+	       fw_buf->used_size - ABROLHOS_FW_HEADER_SIZE);
+
+	/* Allocate coherent memory for the image header */
+	header_vaddr = dma_alloc_coherent(edgetpu_pdev->gsa_dev,
+					  ABROLHOS_FW_HEADER_SIZE,
+					  &header_dma_addr, GFP_KERNEL);
+	if (!header_vaddr) {
+		etdev_err(etdev,
+			  "Failed to allocate coherent memory for header\n");
+		ret = -ENOMEM;
+		goto out_unmap;
+	}
+
+	memcpy(header_vaddr, fw_buf->vaddr, ABROLHOS_FW_HEADER_SIZE);
+	etdev_dbg(etdev,
+		  "Requesting GSA image load. meta = %llX payload = %llX",
+		  header_dma_addr, (u64)edgetpu_pdev->fw_region_paddr);
+
+	ret = gsa_load_tpu_fw_image(edgetpu_pdev->gsa_dev, header_dma_addr,
+				    edgetpu_pdev->fw_region_paddr);
+	if (ret) {
+		etdev_err(etdev, "GSA authentication failed: %d\n", ret);
+		ret = -EIO;
+		goto out_free_gsa;
+	}
+
+	/* Reset KCI mailbox before starting f/w, don't process anything old.*/
 	edgetpu_mailbox_reset(etdev->kci->mailbox);
 
-	/* Remap TPU CPU instructions to the carveout IOVA. */
-	edgetpu_dev_write_64(etdev, EDGETPU_REG_INSTRUCTION_REMAP_NEW_BASE,
-			     FW_IOVA);
-	edgetpu_dev_write_64(etdev, EDGETPU_REG_INSTRUCTION_REMAP_CONTROL, 1);
+	tpu_state = gsa_send_tpu_cmd(edgetpu_pdev->gsa_dev, GSA_TPU_START);
 
-	r52_reset(etdev, 0);
+	if (tpu_state < 0) {
+		etdev_err(etdev, "GSA start firmware failed: %d\n", tpu_state);
+		ret = -EIO;
+	}
 
-	return 0;
+out_free_gsa:
+	dma_free_coherent(edgetpu_pdev->gsa_dev, ABROLHOS_FW_HEADER_SIZE,
+			  header_vaddr, header_dma_addr);
+out_unmap:
+	memunmap(image_vaddr);
+	return ret;
 }
 
 static const struct edgetpu_firmware_handlers abrolhos_firmware_handlers = {
@@ -119,8 +183,9 @@ int edgetpu_chip_firmware_run(struct edgetpu_dev *etdev, const char *name,
 unsigned long edgetpu_chip_firmware_iova(struct edgetpu_dev *etdev)
 {
 	/*
-	 * TODO(b/129761817): Fetch the correct address, it won't be a constant
-	 * value after GSA gets involved.
+	 * There is no IOVA in Abrolhos, since firmware the IOMMU is
+	 * bypassed and the only translation in effect is the one
+	 * done by instruction remap registers
 	 */
-	return FW_IOVA;
+	return EDGETPU_INSTRUCTION_REMAP_BASE;
 }
diff --git a/drivers/edgetpu/abrolhos-iommu.c b/drivers/edgetpu/abrolhos-iommu.c
index 5134f6d..39b1e91 100644
--- a/drivers/edgetpu/abrolhos-iommu.c
+++ b/drivers/edgetpu/abrolhos-iommu.c
@@ -21,8 +21,6 @@
 #include <linux/iommu-ext.h>
 #endif
 
-/* 1 context per VII/group plus 1 for KCI */
-#define NCONTEXTS			(EDGETPU_NGROUPS + 1)
 
 struct edgetpu_iommu_domain {
 	struct iommu_domain *iommu_domain;
@@ -31,7 +29,7 @@ struct edgetpu_iommu_domain {
 
 struct edgetpu_iommu {
 	struct iommu_group *iommu_group;
-	struct edgetpu_iommu_domain domain[NCONTEXTS];
+	struct edgetpu_iommu_domain domain[EDGETPU_NCONTEXTS];
 	bool context_0_default;		/* is context 0 domain the default? */
 };
 
@@ -48,7 +46,7 @@ static int edgetpu_iommu_fault_handler(struct iommu_domain *domain,
 	struct edgetpu_iommu_domain *etdomain =
 		(struct edgetpu_iommu_domain *)token;
 
-	dev_err(dev, "IOMMU fault on address %08lX. PASID = %d flags = %08X",
+	dev_err(dev, "IOMMU fault on address %08lX. PASID = %u flags = %08X",
 		iova, etdomain->pasid, flags);
 	// Tell the IOMMU driver we are OK with this fault
 	return 0;
@@ -104,27 +102,30 @@ int edgetpu_mmu_attach(struct edgetpu_dev *etdev, void *mmu_info)
 	if (!iommu_dev_feature_enabled(etdev->dev, IOMMU_DEV_FEAT_AUX))
 		return i ? 0 : -EINVAL;
 
-	for (; i < NCONTEXTS; i++) {
-		unsigned int pasid;
+	for (; i < EDGETPU_NCONTEXTS; i++) {
+		int pasid, ret;
 
 		domain = iommu_domain_alloc(etdev->dev->bus);
 
 		if (!domain) {
-			dev_warn(etdev->dev, "iommu domain %d alloc failed\n",
-				 i);
+			etdev_warn(etdev, "iommu domain %d alloc failed\n", i);
 			break;
 		}
-		iommu_aux_attach_device(domain, etdev->dev);
+		ret = iommu_aux_attach_device(domain, etdev->dev);
+		if (ret) {
+			etdev_warn(etdev, "Attach IOMMU aux failed: %d", ret);
+			iommu_domain_free(domain);
+			continue;
+		}
 		pasid = iommu_aux_get_pasid(domain, etdev->dev);
-		if (!pasid || pasid >= NCONTEXTS) {
-			dev_warn(etdev->dev,
-				 "Invalid PASID %d returned from iommu\n",
-				 pasid);
+		if (pasid <= 0 || pasid >= EDGETPU_NCONTEXTS) {
+			etdev_warn(etdev,
+				   "Invalid PASID %d returned from iommu\n",
+				   pasid);
 			iommu_aux_detach_device(domain, etdev->dev);
 			iommu_domain_free(domain);
 		} else if (etiommu->domain[pasid].iommu_domain) {
-			dev_warn(etdev->dev, "PASID %d already in use\n",
-				 pasid);
+			etdev_warn(etdev, "PASID %d already in use\n", pasid);
 			iommu_aux_detach_device(domain, etdev->dev);
 			iommu_domain_free(domain);
 		} else {
@@ -150,7 +151,8 @@ void edgetpu_mmu_detach(struct edgetpu_dev *etdev)
 
 	edgetpu_mmu_reset(etdev);
 
-	for (i = etiommu->context_0_default ? 1 : 0; i < NCONTEXTS; i++) {
+	for (i = etiommu->context_0_default ? 1 : 0; i < EDGETPU_NCONTEXTS;
+	     i++) {
 		if (etiommu->domain[i].iommu_domain) {
 			if (i) {
 				iommu_aux_detach_device(
@@ -193,7 +195,7 @@ static int get_iommu_map_params(struct edgetpu_dev *etdev,
 	int i;
 	struct scatterlist *sg;
 
-	if (pasid >= NCONTEXTS) {
+	if (pasid >= EDGETPU_NCONTEXTS) {
 		dev_err(etdev->dev, "Invalid context_id %d\n", context_id);
 		return -EINVAL;
 	}
@@ -213,6 +215,7 @@ static int get_iommu_map_params(struct edgetpu_dev *etdev,
 	for_each_sg(map->sgt.sgl, sg, map->sgt.orig_nents, i)
 		size += sg->length;
 
+	prot |= IOMMU_PBHA_PROT(EDGEPTU_MAP_PBHA_VALUE(map->flags));
 	params->prot = prot;
 	params->size = size;
 	params->domain = domain;
@@ -238,8 +241,8 @@ int edgetpu_mmu_map(struct edgetpu_dev *etdev, struct edgetpu_mapping *map,
 			      "%s: 64-bit addressing is not supported",
 			      __func__);
 
-	ret = dma_map_sg(etdev->dev, map->sgt.sgl, map->sgt.nents,
-			 edgetpu_host_dma_dir(map->dir));
+	ret = dma_map_sg_attrs(etdev->dev, map->sgt.sgl, map->sgt.nents,
+			       edgetpu_host_dma_dir(map->dir), map->dma_attrs);
 	if (!ret)
 		return -EINVAL;
 	map->sgt.nents = ret;
@@ -354,7 +357,7 @@ int edgetpu_mmu_add_translation(struct edgetpu_dev *etdev, unsigned long iova,
 	struct edgetpu_iommu *etiommu = etdev->mmu_cookie;
 	uint pasid = context_id_to_pasid(context_id);
 
-	if (pasid >= NCONTEXTS)
+	if (pasid >= EDGETPU_NCONTEXTS)
 		return -EINVAL;
 
 	domain = etiommu->domain[pasid].iommu_domain;
@@ -376,7 +379,7 @@ void edgetpu_mmu_remove_translation(struct edgetpu_dev *etdev,
 	struct edgetpu_iommu *etiommu = etdev->mmu_cookie;
 	uint pasid = context_id_to_pasid(context_id);
 
-	if (pasid >= NCONTEXTS)
+	if (pasid >= EDGETPU_NCONTEXTS)
 		return;
 
 	domain = etiommu->domain[pasid].iommu_domain;
@@ -401,7 +404,7 @@ tpu_addr_t edgetpu_mmu_tpu_map(struct edgetpu_dev *etdev, dma_addr_t down_addr,
 	int prot = __dma_dir_to_iommu_prot(dir);
 	uint pasid = context_id_to_pasid(context_id);
 
-	if (pasid >= NCONTEXTS)
+	if (pasid >= EDGETPU_NCONTEXTS)
 		return 0;
 	domain = etiommu->domain[pasid].iommu_domain;
 
@@ -432,7 +435,7 @@ void edgetpu_mmu_tpu_unmap(struct edgetpu_dev *etdev, tpu_addr_t tpu_addr,
 		iommu_get_domain_for_dev(etdev->dev);
 	uint pasid = context_id_to_pasid(context_id);
 
-	if (pasid >= NCONTEXTS)
+	if (pasid >= EDGETPU_NCONTEXTS)
 		return;
 	domain = etiommu->domain[pasid].iommu_domain;
 
@@ -446,3 +449,7 @@ void edgetpu_mmu_tpu_unmap(struct edgetpu_dev *etdev, tpu_addr_t tpu_addr,
 	/* Unmap the address from the context-specific domain */
 	iommu_unmap(domain, tpu_addr, size);
 }
+
+void edgetpu_mmu_use_dev_dram(struct edgetpu_dev *etdev)
+{
+}
diff --git a/drivers/edgetpu/abrolhos-platform.c b/drivers/edgetpu/abrolhos-platform.c
index a1c9a59..5f36018 100644
--- a/drivers/edgetpu/abrolhos-platform.c
+++ b/drivers/edgetpu/abrolhos-platform.c
@@ -5,13 +5,17 @@
  * Copyright (C) 2019 Google, Inc.
  */
 
+#include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/gsa/gsa_tpu.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_data/sscoredump.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -26,12 +30,29 @@
 #include "edgetpu-mmu.h"
 #include "edgetpu-telemetry.h"
 
+#define MAX_SEGS     1
+
 static const struct of_device_id edgetpu_of_match[] = {
 	{ .compatible = "google,darwinn", },
 	{ /* end of list */ },
 };
 MODULE_DEVICE_TABLE(of, edgetpu_of_match);
 
+static void edgetpu_sscd_release(struct device *dev)
+{
+	pr_debug(DRIVER_NAME " release\n");
+}
+static struct sscd_platform_data edgetpu_sscd_pdata;
+static struct platform_device edgetpu_sscd_dev = {
+	.name            = DRIVER_NAME,
+	.driver_override = SSCD_NAME,
+	.id              = -1,
+	.dev             = {
+		.platform_data = &edgetpu_sscd_pdata,
+		.release       = edgetpu_sscd_release,
+	},
+};
+
 /*
  * Log and trace buffers at the beginning of the remapped region,
  * pool memory afterwards.
@@ -57,6 +78,7 @@ static void abrolhos_get_telemetry_mem(struct edgetpu_platform_dev *etpdev,
 static int edgetpu_platform_setup_fw_region(struct edgetpu_platform_dev *etpdev)
 {
 	struct edgetpu_dev *etdev = &etpdev->edgetpu_dev;
+	struct platform_device *gsa_pdev;
 	struct device *dev = etdev->dev;
 	struct resource r;
 	struct device_node *np;
@@ -74,8 +96,7 @@ static int edgetpu_platform_setup_fw_region(struct edgetpu_platform_dev *etpdev)
 	err = of_address_to_resource(np, 0, &r);
 	of_node_put(np);
 	if (err) {
-		dev_err(dev,
-			"No memory address assigned to firmware region\n");
+		dev_err(dev, "No memory address assigned to firmware region\n");
 		return err;
 	}
 
@@ -86,30 +107,32 @@ static int edgetpu_platform_setup_fw_region(struct edgetpu_platform_dev *etpdev)
 		return -ENOSPC;
 	}
 
-	etpdev->fw_region_vaddr =
-		memremap(r.start, region_map_size, MEMREMAP_WC);
-	if (!etpdev->fw_region_vaddr) {
-		dev_err(dev, "Firmware memory remap failed\n");
-		return -EINVAL;
+	/* Get GSA device from device tree */
+	np = of_parse_phandle(dev->of_node, "gsa-device", 0);
+	if (!np) {
+		dev_err(dev, "No gsa-dev in device tree\n");
+		return -ENODEV;
 	}
+	gsa_pdev = of_find_device_by_node(np);
+	if (!gsa_pdev) {
+		dev_err(dev, "GSA device not found\n");
+		of_node_put(np);
+		return -ENODEV;
+	}
+	etpdev->gsa_dev = &gsa_pdev->dev;
+	of_node_put(np);
 
-	etpdev->shared_mem_vaddr =
-		etpdev->fw_region_vaddr + EDGETPU_REMAPPED_DATA_OFFSET;
-	etpdev->shared_mem_paddr = r.start + EDGETPU_REMAPPED_DATA_OFFSET;
-
+	etpdev->fw_region_paddr = r.start;
 	etpdev->fw_region_size = EDGETPU_FW_SIZE_MAX;
 
-	/* Add an IOMMU translation to the physical address of the region. */
-	err = edgetpu_mmu_add_translation(etdev, FW_IOVA, r.start,
-					  region_map_size,
-					  IOMMU_READ | IOMMU_WRITE |
-					  IOMMU_PRIV, EDGETPU_CONTEXT_KCI);
-	if (err) {
-		dev_err(dev, "Unable to map firmware memory into IOMMU\n");
-		memunmap(etpdev->fw_region_vaddr);
-		etpdev->fw_region_vaddr = NULL;
-		return err;
+	etpdev->shared_mem_vaddr =
+		memremap(r.start + EDGETPU_REMAPPED_DATA_OFFSET,
+			 EDGETPU_REMAPPED_DATA_SIZE, MEMREMAP_WC);
+	if (!etpdev->shared_mem_vaddr) {
+		dev_err(dev, "Shared memory remap failed\n");
+		return -EINVAL;
 	}
+	etpdev->shared_mem_paddr = r.start + EDGETPU_REMAPPED_DATA_OFFSET;
 
 	err = of_property_read_u32(dev->of_node, "csr-iova", &csr_iova);
 	/* Device did not define a CSR region */
@@ -119,8 +142,7 @@ static int edgetpu_platform_setup_fw_region(struct edgetpu_platform_dev *etpdev)
 	/* If an IOVA was found, we must also have physical address and size */
 	err = of_property_read_u32(dev->of_node, "csr-phys", &csr_phys);
 	if (err) {
-		dev_err(dev,
-			"Device tree: invalid CSR physical address\n");
+		dev_err(dev, "Device tree: invalid CSR physical address\n");
 		goto out_unmap;
 	}
 
@@ -131,7 +153,7 @@ static int edgetpu_platform_setup_fw_region(struct edgetpu_platform_dev *etpdev)
 	}
 
 	dev_dbg(dev, "Mapping device CSRs: %X -> %X (%d bytes)\n", csr_iova,
-		 csr_phys, csr_size);
+		csr_phys, csr_size);
 	/* Add an IOMMU translation for the Mailbox CSRs */
 	err = edgetpu_mmu_add_translation(etdev, csr_iova, csr_phys, csr_size,
 					  IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV,
@@ -144,31 +166,28 @@ static int edgetpu_platform_setup_fw_region(struct edgetpu_platform_dev *etpdev)
 	etpdev->csr_size = csr_size;
 	return 0;
 out_unmap:
-	memunmap(etpdev->fw_region_vaddr);
-	etpdev->fw_region_vaddr = NULL;
-	edgetpu_mmu_remove_translation(etdev, FW_IOVA, region_map_size,
-				       EDGETPU_CONTEXT_KCI);
+	memunmap(etpdev->shared_mem_vaddr);
+	etpdev->shared_mem_vaddr = NULL;
 	return err;
 }
 
 static void edgetpu_platform_cleanup_fw_region(
 	struct edgetpu_platform_dev *etpdev)
 {
-	if (!etpdev->fw_region_vaddr)
-		return;
+	gsa_unload_tpu_fw_image(etpdev->gsa_dev);
 
-	edgetpu_mmu_remove_translation(&etpdev->edgetpu_dev, FW_IOVA,
-				       EDGETPU_FW_SIZE_MAX +
-					       EDGETPU_REMAPPED_DATA_SIZE,
-				       EDGETPU_CONTEXT_KCI);
 	if (etpdev->csr_iova) {
 		edgetpu_mmu_remove_translation(&etpdev->edgetpu_dev,
 					       etpdev->csr_iova,
 					       etpdev->csr_size,
 					       EDGETPU_CONTEXT_KCI);
 	}
-	memunmap(etpdev->fw_region_vaddr);
-	etpdev->fw_region_vaddr = NULL;
+	etpdev->csr_iova = 0;
+
+	if (!etpdev->shared_mem_vaddr)
+		return;
+	memunmap(etpdev->shared_mem_vaddr);
+	etpdev->shared_mem_vaddr = NULL;
 }
 
 void edgetpu_setup_mmu(struct edgetpu_dev *etdev)
@@ -181,6 +200,90 @@ void edgetpu_setup_mmu(struct edgetpu_dev *etdev)
 		dev_warn(etdev->dev, "failed to attach IOMMU: %d\n", ret);
 }
 
+static int edgetpu_sscd_generate_coredump(void)
+{
+	struct sscd_platform_data *pdata = &edgetpu_sscd_pdata;
+	static struct sscd_segment segs[MAX_SEGS];
+	char msg[128];
+	int cnt;
+
+	if (!pdata->sscd_report) {
+		pr_err(DRIVER_NAME " failed to generate coredump\n");
+		return -1;
+	}
+
+	/*
+	 * TODO (b/156049774):
+	 * Replace with dump information when it's available
+	 */
+	cnt = scnprintf(msg, sizeof(msg), "HELLO TPU!");
+	segs[0].addr = (void *)&msg;
+	segs[0].size = cnt;
+
+	pr_debug(DRIVER_NAME " report: %d segments", MAX_SEGS);
+	return pdata->sscd_report(&edgetpu_sscd_dev, segs, MAX_SEGS,
+				  0, "edgetpu_coredump");
+}
+
+static ssize_t edgetpu_coredump_store(struct file *filep,
+	const char __user *ubuf, size_t size, loff_t *offp)
+{
+	int generate_coredump, ret;
+
+	ret = kstrtoint_from_user(ubuf, size, 0, &generate_coredump);
+	if (ret)
+		return ret;
+	if (generate_coredump) {
+		ret = edgetpu_sscd_generate_coredump();
+		if (ret) {
+			pr_err(DRIVER_NAME " failed to generate coredump: %d\n",
+			       ret);
+			return ret;
+		}
+	}
+
+	return size;
+};
+
+static const struct file_operations coredump_ops = {
+	.owner = THIS_MODULE,
+	.write = edgetpu_coredump_store,
+};
+
+static void edgetpu_sscd_init(struct edgetpu_dev *etdev)
+{
+	/*
+	 * TODO (b/156049774):
+	 * Remove debugfs file after dump information is available and
+	 * edgetpu_sscd_generate_coredump is triggered by a crash
+	 */
+	debugfs_create_file("coredump", 0220, etdev->d_entry, etdev,
+			    &coredump_ops);
+}
+
+static int abrolhos_parse_ssmt(struct edgetpu_platform_dev *etpdev)
+{
+	struct edgetpu_dev *etdev = &etpdev->edgetpu_dev;
+	struct platform_device *pdev = to_platform_device(etdev->dev);
+	struct resource *res;
+	int rc;
+	void __iomem *ssmt_base;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ssmt");
+	if (!res) {
+		etdev_warn(etdev, "Failed to find SSMT register base");
+		return -EINVAL;
+	}
+	ssmt_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ssmt_base)) {
+		rc = PTR_ERR(ssmt_base);
+		etdev_warn(etdev, "Failed to map SSMT register base: %d\n", rc);
+		return rc;
+	}
+	etpdev->ssmt_base = ssmt_base;
+	return 0;
+}
+
 static int edgetpu_platform_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -247,6 +350,13 @@ static int edgetpu_platform_probe(struct platform_device *pdev)
 		goto out;
 	}
 
+	ret = abrolhos_parse_ssmt(edgetpu_pdev);
+	if (ret)
+		dev_warn(
+			dev,
+			"SSMT setup failed (%d). Context isolation not enforced\n",
+			ret);
+
 	abrolhos_get_telemetry_mem(edgetpu_pdev, EDGETPU_TELEMETRY_LOG,
 				   &log_mem);
 	abrolhos_get_telemetry_mem(edgetpu_pdev, EDGETPU_TELEMETRY_TRACE,
@@ -289,6 +399,8 @@ static int edgetpu_platform_probe(struct platform_device *pdev)
 	dev_dbg(dev, "Creating thermal device\n");
 	edgetpu_pdev->edgetpu_dev.thermal = devm_tpu_thermal_create(dev);
 
+	edgetpu_sscd_init(&edgetpu_pdev->edgetpu_dev);
+
 out:
 	dev_dbg(dev, "Probe finished, powering down\n");
 	/* Turn the device off until a client request is received */
@@ -344,12 +456,19 @@ static int __init edgetpu_platform_init(void)
 	ret = edgetpu_init();
 	if (ret)
 		return ret;
+
+	/* Register SSCD platform device */
+	ret = platform_device_register(&edgetpu_sscd_dev);
+	if (ret)
+		pr_err(DRIVER_NAME " SSCD platform device registration failed: %d\n",
+		       ret);
 	return platform_driver_register(&edgetpu_platform_driver);
 }
 
 static void __exit edgetpu_platform_exit(void)
 {
 	platform_driver_unregister(&edgetpu_platform_driver);
+	platform_device_unregister(&edgetpu_sscd_dev);
 	edgetpu_exit();
 }
 
diff --git a/drivers/edgetpu/abrolhos-platform.h b/drivers/edgetpu/abrolhos-platform.h
index a69ec4e..35451d4 100644
--- a/drivers/edgetpu/abrolhos-platform.h
+++ b/drivers/edgetpu/abrolhos-platform.h
@@ -7,6 +7,10 @@
 #ifndef __EDGETPU_PLATFORM_H__
 #define __EDGETPU_PLATFORM_H__
 
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/types.h>
+
 #include "edgetpu-internal.h"
 #include "abrolhos-pm.h"
 
@@ -19,13 +23,15 @@ struct edgetpu_platform_dev {
 	struct edgetpu_dev edgetpu_dev;
 	struct edgetpu_platform_pwr platform_pwr;
 	int irq;
-	void *fw_region_vaddr;
+	phys_addr_t fw_region_paddr;
 	size_t fw_region_size;
 	void *shared_mem_vaddr;
 	phys_addr_t shared_mem_paddr;
 	size_t shared_mem_size;
 	dma_addr_t csr_iova;
 	size_t csr_size;
+	struct device *gsa_dev;
+	void __iomem *ssmt_base;
 };
 
 #endif /* __EDGETPU_PLATFORM_H__ */
diff --git a/drivers/edgetpu/abrolhos-pm.c b/drivers/edgetpu/abrolhos-pm.c
index a895060..b5ac72f 100644
--- a/drivers/edgetpu/abrolhos-pm.c
+++ b/drivers/edgetpu/abrolhos-pm.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/gsa/gsa_tpu.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 
@@ -364,6 +365,8 @@ static int abrolhos_power_up(struct edgetpu_pm *etpm)
 	struct device *dev = etdev->dev;
 	int ret = abrolhos_pwr_state_set(dev,
 					 abrolhos_get_initial_pwr_state(dev));
+	enum edgetpu_firmware_status firmware_status;
+
 	if (ret)
 		return ret;
 
@@ -381,7 +384,18 @@ static int abrolhos_power_up(struct edgetpu_pm *etpm)
 	if (!etdev->firmware)
 		return 0;
 
-	switch (edgetpu_firmware_status_locked(etdev)) {
+	firmware_status = edgetpu_firmware_status_locked(etdev);
+	if (firmware_status == FW_LOADING)
+		goto out;
+	/* attempt firmware run */
+	mutex_lock(&etdev->state_lock);
+	if (etdev->state == ETDEV_STATE_FWLOADING) {
+		mutex_unlock(&etdev->state_lock);
+		return -EAGAIN;
+	}
+	etdev->state = ETDEV_STATE_FWLOADING;
+	mutex_unlock(&etdev->state_lock);
+	switch (firmware_status) {
 	case FW_VALID:
 		ret = edgetpu_firmware_restart_locked(etdev);
 		break;
@@ -390,11 +404,18 @@ static int abrolhos_power_up(struct edgetpu_pm *etpm)
 						  EDGETPU_DEFAULT_FIRMWARE_NAME,
 						  FW_DEFAULT);
 		break;
-	case FW_LOADING:
 	default:
 		break;
 	}
-
+	mutex_lock(&etdev->state_lock);
+	if (ret == -EIO)
+		etdev->state = ETDEV_STATE_BAD; /* f/w handshake error */
+	else if (ret)
+		etdev->state = ETDEV_STATE_NOFW; /* other errors */
+	else
+		etdev->state = ETDEV_STATE_GOOD; /* f/w handshake success */
+	mutex_unlock(&etdev->state_lock);
+out:
 	if (ret)
 		abrolhos_power_down(etpm);
 
@@ -403,9 +424,9 @@ static int abrolhos_power_up(struct edgetpu_pm *etpm)
 
 static void abrolhos_power_down(struct edgetpu_pm *etpm)
 {
-
 	struct edgetpu_platform_dev *edgetpu_pdev = container_of(
-			etpm->etdev, struct edgetpu_platform_dev, edgetpu_dev);
+		etpm->etdev, struct edgetpu_platform_dev, edgetpu_dev);
+	u64 val;
 	int res;
 	int curr_state;
 
@@ -415,6 +436,15 @@ static void abrolhos_power_down(struct edgetpu_pm *etpm)
 	if (curr_state == TPU_OFF)
 		return;
 
+	if (abrolhos_pwr_state_get(etpm->etdev->dev, &val)) {
+		etdev_warn(etpm->etdev, "Failed to read current power state\n");
+		val = TPU_ACTIVE_NOM;
+	}
+	if (val == TPU_OFF) {
+		etdev_dbg(etpm->etdev,
+			  "Device already off, skipping shutdown\n");
+		return;
+	}
 	if (etpm->etdev->kci &&
 	    edgetpu_firmware_status_locked(etpm->etdev) == FW_VALID) {
 		res = edgetpu_kci_shutdown(etpm->etdev->kci);
@@ -435,6 +465,10 @@ static void abrolhos_power_down(struct edgetpu_pm *etpm)
 			abrolhos_pwr_policy_set(edgetpu_pdev, TPU_ACTIVE_OD);
 		}
 	}
+	res = gsa_send_tpu_cmd(edgetpu_pdev->gsa_dev, GSA_TPU_SHUTDOWN);
+	if (res < 0)
+		etdev_warn(etpm->etdev, "GSA shutdown request failed (%d)\n",
+			   res);
 	abrolhos_pwr_state_set(etpm->etdev->dev, TPU_OFF);
 }
 
diff --git a/drivers/edgetpu/abrolhos-pwr.h b/drivers/edgetpu/abrolhos-pwr.h
deleted file mode 100644
index e59858e..0000000
--- a/drivers/edgetpu/abrolhos-pwr.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Power management header for Abrolhos.
- *
- * Copyright (C) 2020 Google, Inc.
- */
-#ifndef __EDGETPU_PWR_H__
-#define __EDGETPU_PWR_H__
-
-/* Can't build out of tree with acpm_dvfs unless kernel supports ACPM */
-#if IS_ENABLED(CONFIG_ACPM_DVFS)
-
-#include <linux/acpm_dvfs.h>
-
-#else
-
-static int exynos_acpm_set_rate(unsigned int id, unsigned long rate)
-{
-	return 0;
-}
-static int exynos_acpm_set_init_freq(unsigned int dfs_id, unsigned long freq)
-{
-	return 0;
-}
-static unsigned long exynos_acpm_get_rate(unsigned int id,
-					  unsigned long dbg_val)
-{
-	return 0;
-}
-static int exynos_acpm_set_policy(unsigned int id, unsigned long policy)
-{
-	return 0;
-}
-
-#endif /* IS_ENABLED(CONFIG_ACPM_DVFS) */
-
-/*
- * TPU Power States:
- * 0:			Off
- * 1:			Deep Sleep Clocks Off
- * 2:			Deep Sleep Clocks Slow
- * 3:			Deep Sleep Clocks Fast
- * 4:			Sleep Clocks Off
- * 5:			Sleep Clocks Slow
- * 6:			Retention Clocks Slow
- * 500000000:	Super Underdrive @500MHz
- * 800000000:	Underdrive @800MHz
- * 1066000000:	Nominal @1066MHz
- * 1230000000:	Overdrive @1230MHz
- */
-enum tpu_pwr_state {
-	TPU_OFF = 0,
-	TPU_DEEP_SLEEP_CLOCKS_OFF  = 1,
-	TPU_DEEP_SLEEP_CLOCKS_SLOW = 2,
-	TPU_DEEP_SLEEP_CLOCKS_FAST = 3,
-	TPU_SLEEP_CLOCKS_OFF       = 4,
-	TPU_SLEEP_CLOCKS_SLOW      = 5,
-	TPU_RETENTION_CLOCKS_SLOW  = 6,
-	TPU_ACTIVE_SUD = 500000000,
-	TPU_ACTIVE_UD  = 800000000,
-	TPU_ACTIVE_NOM = 1066000000,
-	TPU_ACTIVE_OD  = 1230000000,
-};
-
-#define TPU_POLICY_MAX	TPU_ACTIVE_OD
-
-#endif /* __EDGETPU_PWR_H__ */
diff --git a/drivers/edgetpu/abrolhos-thermal.c b/drivers/edgetpu/abrolhos-thermal.c
index f984a62..2aa0f43 100644
--- a/drivers/edgetpu/abrolhos-thermal.c
+++ b/drivers/edgetpu/abrolhos-thermal.c
@@ -82,23 +82,22 @@ static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev,
 	struct device *dev = cooling->dev;
 	unsigned long pwr_state;
 
-	if (state_original >= ARRAY_SIZE(state_mapping)) {
+	if (WARN_ON(state_original >= ARRAY_SIZE(state_mapping))) {
 		dev_err(dev, "%s: invalid cooling state %lu\n", __func__,
 			state_original);
-		WARN_ON(1);
 		return -EINVAL;
 	}
 
-	pwr_state = state_mapping[state_original];
-	cooling->pwr_state = pwr_state;
-
 	mutex_lock(&cooling->lock);
-
-	ret = exynos_acpm_set_policy(TPU_ACPM_DOMAIN, pwr_state);
-	if (ret) {
-		dev_err(dev, "error setting tpu policy: %d\n", ret);
-		mutex_unlock(&cooling->lock);
-		return ret;
+	pwr_state = state_mapping[state_original];
+	if (state_original != cooling->cooling_state) {
+		ret = exynos_acpm_set_policy(TPU_ACPM_DOMAIN, pwr_state);
+		if (ret) {
+			dev_err(dev, "error setting tpu policy: %d\n", ret);
+			mutex_unlock(&cooling->lock);
+			return ret;
+		}
+		cooling->cooling_state = state_original;
 	}
 
 	mutex_unlock(&cooling->lock);
@@ -107,18 +106,28 @@ static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev,
 
 static int edgetpu_get_cur_state(struct thermal_cooling_device *cdev,
 				 unsigned long *state)
-{	unsigned long state_original;
+{
+	int ret = 0;
 	struct edgetpu_thermal *cooling = cdev->devdata;
-	int i = 0;
 
-	state_original = exynos_acpm_get_rate(TPU_ACPM_DOMAIN, 0);
-	find_state_pwr(i, state_original, state_mapping[i], state_mapping,
-		       *state, i);
-	dev_err(cooling->dev, "Unknown get state req for: %lu\n",
-		state_original);
-	*state = 0;
-	WARN_ON(1);
-	return -EINVAL;
+	*state = cooling->cooling_state;
+	if (*state >= ARRAY_SIZE(state_mapping)) {
+		dev_warn(cooling->dev, "Unknown cooling state: %lu, resetting\n", *state);
+		mutex_lock(&cooling->lock);
+
+		ret = exynos_acpm_set_policy(TPU_ACPM_DOMAIN, TPU_ACTIVE_OD);
+		if (ret) {
+			dev_err(cooling->dev, "error setting tpu policy: %d\n", ret);
+			mutex_unlock(&cooling->lock);
+			return ret;
+		}
+
+		//setting back to "no cooling"
+		cooling->cooling_state = 0;
+		mutex_unlock(&cooling->lock);
+	}
+
+	return 0;
 }
 
 static int edgetpu_state2power_internal(unsigned long state, u32 *power,
@@ -211,12 +220,18 @@ static void devm_tpu_thermal_release(struct device *dev, void *res)
 static int
 tpu_thermal_cooling_register(struct edgetpu_thermal *thermal, char *type)
 {
+	struct device_node *cooling_node = NULL;
+
 	thermal->op_data = NULL;
 
 	mutex_init(&thermal->lock);
-	thermal->pwr_state = TPU_OFF;
+	cooling_node = of_find_node_by_name(NULL, "tpu-cooling");
+	if (!cooling_node)
+		dev_warn(thermal->dev, "failed to find cooling node\n");
+	// Initialize the cooling state as 0, means "no cooling"
+	thermal->cooling_state = 0;
 	thermal->cdev = thermal_of_cooling_device_register(
-		NULL, type, thermal, &edgetpu_cooling_ops);
+		cooling_node, type, thermal, &edgetpu_cooling_ops);
 	if (IS_ERR(thermal->cdev))
 		return PTR_ERR(thermal->cdev);
 	return 0;
diff --git a/drivers/edgetpu/edgetpu-async.h b/drivers/edgetpu/edgetpu-async.h
index 4bcce0b..6278208 100644
--- a/drivers/edgetpu/edgetpu-async.h
+++ b/drivers/edgetpu/edgetpu-async.h
@@ -11,7 +11,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 
-typedef void *(*edgetpu_async_job_t)(void *);
+typedef int (*edgetpu_async_job_t)(void *);
 
 struct edgetpu_async_ctx {
 	/* constant fields after initialized */
@@ -29,7 +29,7 @@ struct edgetpu_async_ctx {
 	 * Return values of jobs. This field is available after
 	 * edgetpu_async_wait() is called.
 	 */
-	void **ret;
+	int *ret;
 };
 
 /*
@@ -45,7 +45,7 @@ struct edgetpu_async_entry {
 	struct list_head list;
 	edgetpu_async_job_t job;
 	void *data;
-	void *ret;
+	int ret;
 };
 
 /*
diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c
index bc8b896..8c6fb4d 100644
--- a/drivers/edgetpu/edgetpu-core.c
+++ b/drivers/edgetpu/edgetpu-core.c
@@ -49,13 +49,55 @@ static int edgetpu_mmap_compat(struct edgetpu_client *client,
 	if (ret)
 		etdev_dbg(client->etdev,
 			  "Error remapping PFN range: %d\n", ret);
-
 	return ret;
 }
 
+static void edgetpu_vma_open(struct vm_area_struct *vma)
+{
+	struct edgetpu_client *client = vma->vm_private_data;
+
+	switch (vma->vm_pgoff) {
+	case 0:
+	case EDGETPU_MMAP_CSR_OFFSET >> PAGE_SHIFT:
+		mutex_lock(&client->wakelock.lock);
+		client->wakelock.csr_map_count++;
+		mutex_unlock(&client->wakelock.lock);
+		break;
+	}
+}
+
+static void edgetpu_vma_close(struct vm_area_struct *vma)
+{
+	struct edgetpu_client *client = vma->vm_private_data;
+
+	switch (vma->vm_pgoff) {
+	case 0:
+	case EDGETPU_MMAP_CSR_OFFSET >> PAGE_SHIFT:
+		mutex_lock(&client->wakelock.lock);
+		if (!client->wakelock.csr_map_count)
+			etdev_warn(client->etdev,
+				   "unbalanced vma_close on CSR mapping\n");
+		else
+			client->wakelock.csr_map_count--;
+		etdev_dbg(client->etdev,
+			  "%s: unmap CSRS. pgoff = %lX count = %u\n", __func__,
+			  vma->vm_pgoff, client->wakelock.csr_map_count);
+		mutex_unlock(&client->wakelock.lock);
+		break;
+	}
+}
+
+static const struct vm_operations_struct edgetpu_vma_ops = {
+	.open = edgetpu_vma_open,
+	.close = edgetpu_vma_close,
+};
+
+
 /* Map exported device CSRs or queue into user space. */
 int edgetpu_mmap(struct edgetpu_client *client, struct vm_area_struct *vma)
 {
+	int ret;
+
 	if (vma->vm_start & ~PAGE_MASK) {
 		etdev_dbg(client->etdev,
 			  "Base address not page-aligned: 0x%lx\n",
@@ -63,14 +105,27 @@ int edgetpu_mmap(struct edgetpu_client *client, struct vm_area_struct *vma)
 		return -EINVAL;
 	}
 
-	vma->vm_private_data = client->etdev;
+	etdev_dbg(client->etdev, "%s: mmap pgoff = %lX\n", __func__,
+		  vma->vm_pgoff);
+
+	vma->vm_private_data = client;
+	vma->vm_ops = &edgetpu_vma_ops;
 
 	/* Mark the VMA's pages as uncacheable. */
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	/* If backward compat map all CSRs */
-	if (!vma->vm_pgoff)
-		return edgetpu_mmap_compat(client, vma);
+	if (!vma->vm_pgoff) {
+		mutex_lock(&client->wakelock.lock);
+		if (!client->wakelock.req_count)
+			ret = -EAGAIN;
+		else
+			ret = edgetpu_mmap_compat(client, vma);
+		if (!ret)
+			client->wakelock.csr_map_count++;
+		mutex_unlock(&client->wakelock.lock);
+		return ret;
+	}
 
 	/* Allow mapping log and telemetry buffers without creating a group */
 	if (vma->vm_pgoff == EDGETPU_MMAP_LOG_BUFFER_OFFSET >> PAGE_SHIFT)
@@ -80,18 +135,38 @@ int edgetpu_mmap(struct edgetpu_client *client, struct vm_area_struct *vma)
 		return edgetpu_mmap_telemetry_buffer(
 			client->etdev, EDGETPU_TELEMETRY_TRACE, vma);
 
-	if (!client->group)
+	mutex_lock(&client->group_lock);
+	if (!client->group) {
+		mutex_unlock(&client->group_lock);
 		return -EINVAL;
+	}
 
-	if (vma->vm_pgoff == EDGETPU_MMAP_CSR_OFFSET >> PAGE_SHIFT)
-		return edgetpu_mmap_csr(client->group, vma);
-	if (vma->vm_pgoff == EDGETPU_MMAP_CMD_QUEUE_OFFSET >> PAGE_SHIFT)
-		return edgetpu_mmap_queue(client->group, MAILBOX_CMD_QUEUE,
-					  vma);
-	if (vma->vm_pgoff == EDGETPU_MMAP_RESP_QUEUE_OFFSET >> PAGE_SHIFT)
-		return edgetpu_mmap_queue(client->group, MAILBOX_RESP_QUEUE,
-					  vma);
-	return -EINVAL;
+	switch (vma->vm_pgoff) {
+	case EDGETPU_MMAP_CSR_OFFSET >> PAGE_SHIFT:
+		mutex_lock(&client->wakelock.lock);
+		if (!client->wakelock.req_count)
+			ret = -EAGAIN;
+		else
+			ret = edgetpu_mmap_csr(client->group, vma);
+		if (!ret)
+			client->wakelock.csr_map_count++;
+		etdev_dbg(client->etdev, "%s: mmap CSRS. count = %u ret = %d\n",
+			  __func__, client->wakelock.csr_map_count, ret);
+		mutex_unlock(&client->wakelock.lock);
+		break;
+	case EDGETPU_MMAP_CMD_QUEUE_OFFSET >> PAGE_SHIFT:
+		ret = edgetpu_mmap_queue(client->group, MAILBOX_CMD_QUEUE, vma);
+		break;
+	case EDGETPU_MMAP_RESP_QUEUE_OFFSET >> PAGE_SHIFT:
+		ret = edgetpu_mmap_queue(client->group, MAILBOX_RESP_QUEUE,
+					 vma);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	mutex_unlock(&client->group_lock);
+	return ret;
 }
 
 static struct edgetpu_mailbox_manager_desc mailbox_manager_desc = {
@@ -103,6 +178,21 @@ static struct edgetpu_mailbox_manager_desc mailbox_manager_desc = {
 	.get_resp_queue_csr_base = edgetpu_mailbox_get_resp_queue_csr_base,
 };
 
+int edgetpu_get_state_errno_locked(struct edgetpu_dev *etdev)
+{
+	switch (etdev->state) {
+	case ETDEV_STATE_BAD:
+		return -ENODEV;
+	case ETDEV_STATE_FWLOADING:
+		return -EAGAIN;
+	case ETDEV_STATE_NOFW:
+		return -EINVAL;
+	default:
+		break;
+	}
+	return 0;
+}
+
 int edgetpu_device_add(struct edgetpu_dev *etdev,
 		       const struct edgetpu_mapped_resource *regs)
 {
@@ -129,6 +219,8 @@ int edgetpu_device_add(struct edgetpu_dev *etdev,
 	mutex_init(&etdev->open.lock);
 	mutex_init(&etdev->groups_lock);
 	etdev->group_join_lockout = false;
+	mutex_init(&etdev->state_lock);
+	etdev->state = ETDEV_STATE_NOFW;
 
 	ret = edgetpu_dev_add(etdev);
 	if (ret) {
@@ -161,6 +253,10 @@ int edgetpu_device_add(struct edgetpu_dev *etdev,
 		goto detach_mmu;
 	}
 
+	ret = edgetpu_mcp_verify_membership(etdev);
+	if (ret)
+		etdev_warn(etdev, "edgetpu MCP info invalid");
+
 	ret = edgetpu_kci_init(etdev->mailbox_manager, etdev->kci);
 	if (ret) {
 		etdev_err(etdev, "edgetpu_kci_init returns %d\n", ret);
@@ -211,6 +307,10 @@ struct edgetpu_client *edgetpu_client_add(struct edgetpu_dev *etdev)
 	client->pid = current->pid;
 	client->tgid = current->tgid;
 	client->etdev = etdev;
+	mutex_init(&client->group_lock);
+	mutex_init(&client->wakelock.lock);
+	/* Initialize client wakelock state to "acquired" */
+	client->wakelock.req_count = 1;
 	refcount_set(&client->count, 1);
 	return client;
 }
@@ -233,7 +333,16 @@ void edgetpu_client_remove(struct edgetpu_client *client)
 {
 	if (IS_ERR_OR_NULL(client))
 		return;
-	edgetpu_device_group_leave(client);
+	/*
+	 * A quick check without holding client->group_lock.
+	 *
+	 * If client doesn't belong to a group then we are fine to not proceed.
+	 * If there is a race that the client belongs to a group but is removing
+	 * by another process - this will be detected by the check with holding
+	 * client->group_lock later.
+	 */
+	if (client->group)
+		edgetpu_device_group_leave(client);
 }
 
 int edgetpu_register_irq(struct edgetpu_dev *etdev, int irq)
diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c
index eac37e6..aa5c965 100644
--- a/drivers/edgetpu/edgetpu-device-group.c
+++ b/drivers/edgetpu/edgetpu-device-group.c
@@ -27,6 +27,7 @@
 #include "edgetpu-mapping.h"
 #include "edgetpu-mcp.h"
 #include "edgetpu-mmu.h"
+#include "edgetpu-sw-watchdog.h"
 #include "edgetpu-usr.h"
 #include "edgetpu.h"
 
@@ -73,7 +74,7 @@ static int edgetpu_kci_join_group_worker(struct kci_worker_param *param)
 	return edgetpu_kci_join_group(etdev->kci, etdev, group->n_clients, i);
 }
 
-static void edgetpu_kci_leave_group_worker(struct kci_worker_param *param)
+static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param)
 {
 	struct edgetpu_device_group *group = param->group;
 	uint i = param->idx;
@@ -81,6 +82,7 @@ static void edgetpu_kci_leave_group_worker(struct kci_worker_param *param)
 
 	etdev_dbg(etdev, "%s: leave group %u", __func__, group->workload_id);
 	edgetpu_kci_leave_group(etdev->kci);
+	return 0;
 }
 
 #endif /* CONFIG_ABROLHOS */
@@ -143,6 +145,7 @@ edgetpu_device_group_kci_finalized(struct edgetpu_device_group *group)
 	struct edgetpu_async_ctx *ctx_for_leave = edgetpu_async_alloc_ctx();
 	uint i;
 	int ret, val;
+	struct edgetpu_dev *etdev;
 
 	if (!params || !ctx || !ctx_for_leave) {
 		ret = -ENOMEM;
@@ -151,6 +154,17 @@ edgetpu_device_group_kci_finalized(struct edgetpu_device_group *group)
 	for (i = 0; i < group->n_clients; i++) {
 		params[i].group = group;
 		params[i].idx = i;
+		etdev = edgetpu_device_group_nth_etdev(group, i);
+		/*
+		 * fast fail.
+		 * It is safe to access @state field here without holding the
+		 * lock as any unresponsive state will lead us to KCI timeout
+		 * anyway.
+		 */
+		if (etdev->state != ETDEV_STATE_GOOD) {
+			ret = edgetpu_get_state_errno_locked(etdev);
+			goto out_free;
+		}
 		ret = edgetpu_async_add_job(
 			ctx, &params[i],
 			(edgetpu_async_job_t)edgetpu_kci_join_group_worker);
@@ -338,7 +352,7 @@ static bool edgetpu_clients_groupable(const struct edgetpu_client *client1,
 static bool edgetpu_group_check_contiguity(struct edgetpu_device_group *group)
 {
 	struct edgetpu_mcp *mcp = edgetpu_mcp_of_etdev(group->etdev);
-	uint i, j;
+	uint i;
 	uint fr, to;
 	uint mcp_n = 0;
 
@@ -349,23 +363,8 @@ static bool edgetpu_group_check_contiguity(struct edgetpu_device_group *group)
 		to = edgetpu_device_group_nth_etdev(group, i)->mcp_die_index;
 		if (fr + 1 == to)
 			continue;
-		/* no bypassed dies' info */
-		if (!mcp)
+		if (!mcp_n || (fr + 1) % mcp_n != to)
 			return false;
-		mutex_lock(&mcp->lock);
-		/* check if all dies between (fr, to) are bypassed */
-		for (j = (fr + 1) % mcp_n; j != to; j = (j + 1) % mcp_n) {
-			if (IS_ERR_OR_NULL(mcp->etdevs[j]) ||
-			    !edgetpu_chip_bypassed(mcp->etdevs[j])) {
-				mutex_unlock(&mcp->lock);
-				etdev_dbg(
-					group->etdev,
-					"contiguity check failed: die %u is not probed or not bypassed",
-					j);
-				return false;
-			}
-		}
-		mutex_unlock(&mcp->lock);
 	}
 
 	return true;
@@ -410,68 +409,112 @@ void edgetpu_device_group_put(struct edgetpu_device_group *group)
 		kfree(group);
 }
 
-struct edgetpu_device_group *edgetpu_device_group_alloc(
-		struct edgetpu_client *client,
-		const struct edgetpu_mailbox_attr *attr)
+/* caller must hold @etdev->groups_lock. */
+static bool edgetpu_in_any_group_locked(struct edgetpu_dev *etdev)
 {
-	static uint cur_workload_id;
-	int ret;
-	struct edgetpu_device_group *group;
+	int i;
 
-	/* the client already belongs to a group */
-	if (client->group)
-		return ERR_PTR(-EINVAL);
-	if (edgetpu_chip_bypassed(client->etdev))
-		return ERR_PTR(-EINVAL);
+	for (i = 0; i < EDGETPU_NGROUPS; i++) {
+		if (etdev->groups[i])
+			return true;
+	}
 
-	group = kzalloc(sizeof(*group), GFP_KERNEL);
-	if (!group)
-		return ERR_PTR(-ENOMEM);
+	return false;
+}
 
-	refcount_set(&group->ref_count, 1);
-	group->workload_id = cur_workload_id++;
-	INIT_LIST_HEAD(&group->clients);
-	group->n_clients = 0;
-	group->status = EDGETPU_DEVICE_GROUP_WAITING;
-	group->etdev = client->etdev;
-	mutex_init(&group->lock);
-	rwlock_init(&group->events.lock);
-	edgetpu_mapping_init(&group->host_mappings);
-	edgetpu_mapping_init(&group->dmabuf_mappings);
-	/* adds @client as the first entry */
-	ret = edgetpu_device_group_add(group, client);
-	if (ret) {
-		etdev_dbg(group->etdev, "%s: group %u add failed ret=%d",
-			  __func__, group->workload_id, ret);
-		goto error_put_group;
+/* caller must hold the client's etdev state_lock. */
+void edgetpu_device_group_leave_locked(struct edgetpu_client *client)
+{
+	struct edgetpu_device_group *group;
+	struct edgetpu_list_client *cur, *nxt;
+	bool will_disband = false;
+	int i;
+
+	mutex_lock(&client->group_lock);
+	group = client->group;
+	if (!group) {
+		mutex_unlock(&client->group_lock);
+		return;
 	}
 
-	ret = edgetpu_mailbox_init_vii(&group->vii, group, attr);
-	if (ret) {
-		etdev_dbg(group->etdev, "%s: group %u init vii failed ret=%d",
-			  __func__, group->workload_id, ret);
-		edgetpu_device_group_leave(client);
-		goto error_put_group;
+	mutex_lock(&group->lock);
+	/*
+	 * Disband the group if the leader leaves, or it's finalized and any
+	 * member leaves.
+	 */
+	if (edgetpu_device_group_is_waiting(group)) {
+		if (edgetpu_device_group_leader(group) == client)
+			will_disband = true;
+	} else if (edgetpu_device_group_is_finalized(group)) {
+		will_disband = true;
 	}
 
-	return group;
+	if (will_disband)
+		/* release the group before removing any members */
+		edgetpu_device_group_release(group);
 
-error_put_group:
-	edgetpu_device_group_put(group);
-	return ERR_PTR(ret);
+	/* removes the client from the list */
+	for_each_list_client_safe(cur, nxt, group) {
+		if (cur->client == client) {
+			list_del(&cur->list);
+			kfree(cur);
+			edgetpu_client_put(client);
+			group->n_clients--;
+		} else {
+			/*
+			 * Don't modify wdt heartbeat if state is not GOOD.
+			 * Safe to access etdev->state without state_lock as
+			 * racing state change may again restart wdt.
+			 */
+			if (will_disband &&
+			    cur->client->etdev->state == ETDEV_STATE_GOOD) {
+				/*
+				 * set time interval for sw wdt to DORMANT
+				 * state of all other clients in group.
+				 */
+				edgetpu_sw_wdt_modify_heartbeat(
+						cur->client->etdev,
+						EDGETPU_DORMANT_DEV_BEAT_MS);
+			}
+		}
+	}
+	edgetpu_device_group_put(client->group);
+	client->group = NULL;
+	mutex_unlock(&group->lock);
+	mutex_unlock(&client->group_lock);
+	/* remove the group from the client device */
+	mutex_lock(&client->etdev->groups_lock);
+	for (i = 0; i < EDGETPU_NGROUPS; i++) {
+		if (client->etdev->groups[i] == group) {
+			edgetpu_device_group_put(client->etdev->groups[i]);
+			client->etdev->groups[i] = NULL;
+			break;
+		}
+	}
+	/*
+	 * if etdev is not in any group and state is still good, set time
+	 * interval of wdt to DORMANT state.
+	 */
+	if (!edgetpu_in_any_group_locked(client->etdev) &&
+	    client->etdev->state == ETDEV_STATE_GOOD)
+		edgetpu_sw_wdt_modify_heartbeat(client->etdev,
+						EDGETPU_DORMANT_DEV_BEAT_MS);
+	mutex_unlock(&client->etdev->groups_lock);
 }
 
-int edgetpu_device_group_add(struct edgetpu_device_group *group,
-			     struct edgetpu_client *client)
+/* caller should hold client's etdev state lock. */
+static int edgetpu_device_group_add_locked(struct edgetpu_device_group *group,
+					   struct edgetpu_client *client)
 {
 	struct edgetpu_list_client *c;
 	int i;
 	int ret = 0;
 
-	if (client->group != NULL)
+	mutex_lock(&client->group_lock);
+	if (client->group) {
+		mutex_unlock(&client->group_lock);
 		return -EINVAL;
-	if (edgetpu_chip_bypassed(client->etdev))
-		return 0;
+	}
 
 	mutex_lock(&group->lock);
 	if (!edgetpu_device_group_is_waiting(group)) {
@@ -509,58 +552,100 @@ int edgetpu_device_group_add(struct edgetpu_device_group *group,
 
 out:
 	mutex_unlock(&group->lock);
+	mutex_unlock(&client->group_lock);
 	return ret;
 }
 
 void edgetpu_device_group_leave(struct edgetpu_client *client)
 {
-	struct edgetpu_device_group *group = client->group;
-	struct edgetpu_list_client *cur, *nxt;
-	bool will_disband = false;
-	int i;
+	mutex_lock(&client->etdev->state_lock);
+	WARN_ON_ONCE(client->etdev->state != ETDEV_STATE_GOOD &&
+		     client->etdev->state != ETDEV_STATE_FWLOADING);
+	edgetpu_device_group_leave_locked(client);
+	mutex_unlock(&client->etdev->state_lock);
+}
 
-	if (!group)
-		return;
+struct edgetpu_device_group *edgetpu_device_group_alloc(
+		struct edgetpu_client *client,
+		const struct edgetpu_mailbox_attr *attr)
+{
+	static uint cur_workload_id;
+	int ret;
+	struct edgetpu_device_group *group;
 
-	mutex_lock(&group->lock);
+	mutex_lock(&client->etdev->state_lock);
+	if (client->etdev->state != ETDEV_STATE_GOOD) {
+		ret = edgetpu_get_state_errno_locked(client->etdev);
+		goto state_unlock;
+	}
 	/*
-	 * Disband the group if the leader leaves, or it's finalized and any
-	 * member leaves.
+	 * The client already belongs to a group.
+	 * It's safe not to take client->group_lock as
+	 * edgetpu_device_group_add_locked() will fail if there is race.
 	 */
-	if (edgetpu_device_group_is_waiting(group)) {
-		if (edgetpu_device_group_leader(group) == client)
-			will_disband = true;
-	} else if (edgetpu_device_group_is_finalized(group)) {
-		will_disband = true;
+	if (client->group) {
+		ret = -EINVAL;
+		goto state_unlock;
 	}
 
-	if (will_disband)
-		/* release the group before removing any members */
-		edgetpu_device_group_release(group);
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group) {
+		ret = -ENOMEM;
+		goto state_unlock;
+	}
 
-	/* removes the client from the list */
-	for_each_list_client_safe(cur, nxt, group) {
-		if (cur->client == client) {
-			list_del(&cur->list);
-			kfree(cur);
-			edgetpu_client_put(client);
-			group->n_clients--;
-			break;
-		}
+	refcount_set(&group->ref_count, 1);
+	group->workload_id = cur_workload_id++;
+	INIT_LIST_HEAD(&group->clients);
+	group->n_clients = 0;
+	group->status = EDGETPU_DEVICE_GROUP_WAITING;
+	group->etdev = client->etdev;
+	mutex_init(&group->lock);
+	rwlock_init(&group->events.lock);
+	edgetpu_mapping_init(&group->host_mappings);
+	edgetpu_mapping_init(&group->dmabuf_mappings);
+	/* adds @client as the first entry */
+	ret = edgetpu_device_group_add_locked(group, client);
+	if (ret) {
+		etdev_dbg(group->etdev, "%s: group %u add failed ret=%d",
+			  __func__, group->workload_id, ret);
+		goto error_put_group;
 	}
-	edgetpu_device_group_put(client->group);
-	client->group = NULL;
-	mutex_unlock(&group->lock);
-	/* remove the group from the client device */
-	mutex_lock(&client->etdev->groups_lock);
-	for (i = 0; i < EDGETPU_NGROUPS; i++) {
-		if (client->etdev->groups[i] == group) {
-			edgetpu_device_group_put(client->etdev->groups[i]);
-			client->etdev->groups[i] = NULL;
-			break;
-		}
+
+	ret = edgetpu_mailbox_init_vii(&group->vii, group, attr);
+	if (ret) {
+		etdev_dbg(group->etdev, "%s: group %u init vii failed ret=%d",
+			  __func__, group->workload_id, ret);
+		edgetpu_device_group_leave_locked(client);
+		goto error_put_group;
 	}
-	mutex_unlock(&client->etdev->groups_lock);
+
+	group->mbox_attr = *attr;
+
+	mutex_unlock(&client->etdev->state_lock);
+	return group;
+
+error_put_group:
+	edgetpu_device_group_put(group);
+state_unlock:
+	mutex_unlock(&client->etdev->state_lock);
+	return ERR_PTR(ret);
+}
+
+int edgetpu_device_group_add(struct edgetpu_device_group *group,
+			     struct edgetpu_client *client)
+{
+	int ret;
+
+	mutex_lock(&client->etdev->state_lock);
+	if (client->etdev->state != ETDEV_STATE_GOOD) {
+		ret = edgetpu_get_state_errno_locked(client->etdev);
+		goto out;
+	}
+	ret = edgetpu_device_group_add_locked(group, client);
+out:
+	mutex_unlock(&client->etdev->state_lock);
+	return ret;
 }
 
 bool edgetpu_device_group_is_leader(struct edgetpu_device_group *group,
@@ -576,7 +661,8 @@ bool edgetpu_device_group_is_leader(struct edgetpu_device_group *group,
 
 int edgetpu_device_group_finalize(struct edgetpu_device_group *group)
 {
-	int ret = 0;
+	int ret = 0, i;
+	struct edgetpu_dev *etdev;
 
 	mutex_lock(&group->lock);
 	/* do nothing if the group is finalized */
@@ -617,6 +703,12 @@ int edgetpu_device_group_finalize(struct edgetpu_device_group *group)
 		goto err_remove_remote_dram;
 
 	group->status = EDGETPU_DEVICE_GROUP_FINALIZED;
+
+	for (i = 0; i < group->n_clients; i++) {
+		etdev = edgetpu_device_group_nth_etdev(group, i);
+		edgetpu_sw_wdt_modify_heartbeat(etdev,
+						EDGETPU_ACTIVE_DEV_BEAT_MS);
+	}
 	mutex_unlock(&group->lock);
 	return 0;
 
@@ -633,18 +725,6 @@ err_unlock:
 	return ret;
 }
 
-static bool edgetpu_in_any_group_locked(struct edgetpu_dev *etdev)
-{
-	int i;
-
-	for (i = 0; i < EDGETPU_NGROUPS; i++) {
-		if (etdev->groups[i])
-			return true;
-	}
-
-	return false;
-}
-
 bool edgetpu_in_any_group(struct edgetpu_dev *etdev)
 {
 	bool ret;
@@ -901,7 +981,7 @@ alloc_mapping_from_useraddr(struct edgetpu_device_group *group, u64 host_addr,
 	hmap->map.release = edgetpu_unmap_node;
 	hmap->map.show = edgetpu_host_map_show;
 	hmap->map.flags = flags;
-	hmap->map.dma_attrs = 0;
+	hmap->map.dma_attrs = map_to_dma_attr(flags, true);
 
 	if (IS_MIRRORED(flags)) {
 		hmap->sg_tables = kcalloc(group->n_clients,
@@ -1127,7 +1207,7 @@ int edgetpu_device_group_unmap(struct edgetpu_device_group *group,
 	}
 
 	edgetpu_mapping_unlink(&group->host_mappings, map);
-	map->dma_attrs = map_to_dma_attr(flags);
+	map->dma_attrs = map_to_dma_attr(flags, false);
 	edgetpu_unmap_node(map);
 	edgetpu_mapping_unlock(&group->host_mappings);
 unlock_group:
@@ -1283,3 +1363,16 @@ out:
 	mutex_unlock(&group->lock);
 	return ret;
 }
+
+void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev)
+{
+	int i;
+
+	mutex_lock(&etdev->groups_lock);
+	for (i = 0; i < EDGETPU_NGROUPS; i++) {
+		if (etdev->groups[i])
+			edgetpu_group_notify(etdev->groups[i],
+					     EDGETPU_EVENT_FATAL_ERROR);
+	}
+	mutex_unlock(&etdev->groups_lock);
+}
diff --git a/drivers/edgetpu/edgetpu-device-group.h b/drivers/edgetpu/edgetpu-device-group.h
index 567b63d..2b5538e 100644
--- a/drivers/edgetpu/edgetpu-device-group.h
+++ b/drivers/edgetpu/edgetpu-device-group.h
@@ -37,7 +37,7 @@ enum edgetpu_device_group_status {
 	EDGETPU_DEVICE_GROUP_DISBANDED,
 };
 
-#define EDGETPU_EVENT_COUNT 1
+#define EDGETPU_EVENT_COUNT 2
 
 /* eventfds registered for event notifications from kernel for a device group */
 struct edgetpu_events {
@@ -79,6 +79,8 @@ struct edgetpu_device_group {
 	/* TPU IOVA mapped to buffers backed by dma-buf */
 	struct edgetpu_mapping_root dmabuf_mappings;
 	struct edgetpu_events events;
+	/* Mailbox attributes used to create this group */
+	struct edgetpu_mailbox_attr mbox_attr;
 };
 
 /*
@@ -178,7 +180,8 @@ static inline struct edgetpu_dev *edgetpu_device_group_nth_etdev(
 }
 
 /*
- * Let @client leave the group it belongs to.
+ * Let @client leave the group it belongs to. Caller should hold the client's
+ * etdev state_lock.
  *
  * If @client is the leader of a group, the group will be marked as "disbanded".
  *
@@ -192,6 +195,9 @@ static inline struct edgetpu_dev *edgetpu_device_group_nth_etdev(
  * @client->group will be removed from @client->etdev->groups.
  * @client->group will be set as NULL.
  */
+void edgetpu_device_group_leave_locked(struct edgetpu_client *client);
+
+/* Let @client leave the group. Device should be in good state, warn if not. */
 void edgetpu_device_group_leave(struct edgetpu_client *client);
 
 /* Returns whether @client is the leader of @group. */
@@ -278,4 +284,7 @@ bool edgetpu_in_any_group(struct edgetpu_dev *etdev);
  */
 bool edgetpu_set_group_join_lockout(struct edgetpu_dev *etdev, bool lockout);
 
+/* Notify all device groups of @etdev about a failure on the die */
+void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev);
+
 #endif /* __EDGETPU_DEVICE_GROUP_H__ */
diff --git a/drivers/edgetpu/edgetpu-direct.c b/drivers/edgetpu/edgetpu-direct.c
index 6736b02..cdd42b8 100644
--- a/drivers/edgetpu/edgetpu-direct.c
+++ b/drivers/edgetpu/edgetpu-direct.c
@@ -32,7 +32,9 @@
 #include "edgetpu-dram.h"
 #include "edgetpu-firmware.h"
 #include "edgetpu-internal.h"
+#include "edgetpu-kci.h"
 #include "edgetpu-mapping.h"
+#include "edgetpu-pm.h"
 #include "edgetpu-telemetry.h"
 #include "edgetpu.h"
 
@@ -63,7 +65,7 @@ static int edgetpu_open(struct inode *inode, struct file *file)
 	/* Set client pointer to NULL if error creating client. */
 	file->private_data = NULL;
 	mutex_lock(&etdev->open.lock);
-	if (etdev->pm && !etdev->open.count) {
+	if (etdev->pm) {
 		res = edgetpu_pm_get(etdev->pm);
 		if (res) {
 			dev_err(etdev->dev,
@@ -88,23 +90,40 @@ static int etdirect_release(struct inode *inode, struct file *file)
 {
 	struct edgetpu_client *client = file->private_data;
 	struct edgetpu_dev *etdev;
+	uint wakelock_count;
 
 	if (!client)
 		return 0;
 	etdev = client->etdev;
 
+	mutex_lock(&client->wakelock.lock);
+
+	wakelock_count = client->wakelock.req_count;
+	/* Set wakelock state to "released" */
+	client->wakelock.req_count = 0;
+
+	/* HACK: Can't disband a group if the device is off, turn it on */
+	if (client->group && !wakelock_count) {
+		wakelock_count = 1;
+		edgetpu_pm_get(etdev->pm);
+	}
+
+	mutex_unlock(&client->wakelock.lock);
+
 	edgetpu_client_remove(client);
 
 	mutex_lock(&etdev->open.lock);
 	if (etdev->open.count)
 		--etdev->open.count;
-	if (!etdev->open.count)
+
+	/* count was zero if client previously released its wake lock */
+	if (wakelock_count)
 		edgetpu_pm_put(etdev->pm);
 	mutex_unlock(&etdev->open.lock);
 	return 0;
 }
 
-static int etdirect_set_eventfd(struct edgetpu_client *client,
+static int etdirect_set_eventfd(struct edgetpu_device_group *group,
 				struct edgetpu_event_register __user *argp)
 {
 	struct edgetpu_event_register eventreg;
@@ -112,7 +131,7 @@ static int etdirect_set_eventfd(struct edgetpu_client *client,
 	if (copy_from_user(&eventreg, argp, sizeof(eventreg)))
 		return -EFAULT;
 
-	return edgetpu_group_set_eventfd(client->group, eventreg.event_id,
+	return edgetpu_group_set_eventfd(group, eventreg.event_id,
 					 eventreg.eventfd);
 }
 
@@ -160,6 +179,7 @@ static int etdirect_join_group(struct edgetpu_client *client, u64 leader_fd)
 	struct file *file = f.file;
 	struct edgetpu_client *leader;
 	int ret;
+	struct edgetpu_device_group *group;
 
 	if (!file) {
 		ret = -EBADF;
@@ -171,21 +191,29 @@ static int etdirect_join_group(struct edgetpu_client *client, u64 leader_fd)
 	}
 
 	leader = file->private_data;
-	if (!leader || !leader->group ||
+	if (!leader) {
+		ret = -EINVAL;
+		goto out;
+	}
+	mutex_lock(&leader->group_lock);
+	if (!leader->group ||
 	    !edgetpu_device_group_is_leader(leader->group, leader)) {
 		ret = -EINVAL;
+		mutex_unlock(&leader->group_lock);
 		goto out;
 	}
+	group = edgetpu_device_group_get(leader->group);
+	mutex_unlock(&leader->group_lock);
 
-	ret = edgetpu_device_group_add(leader->group, client);
-
+	ret = edgetpu_device_group_add(group, client);
+	edgetpu_device_group_put(group);
 out:
 	fdput(f);
 	return ret;
 }
 
-static int etdirect_create_group(struct edgetpu_client *client,
-				 struct edgetpu_mailbox_attr __user *argp)
+static int edgetpu_ioctl_create_group(struct edgetpu_client *client,
+				      struct edgetpu_mailbox_attr __user *argp)
 {
 	struct edgetpu_mailbox_attr attr;
 	struct edgetpu_device_group *group;
@@ -201,6 +229,32 @@ static int etdirect_create_group(struct edgetpu_client *client,
 	return 0;
 }
 
+/* TODO(b/167151866): remove me */
+static int
+etdirect_create_group(struct edgetpu_client *client,
+		      struct edgetpu_mailbox_attr_compat __user *argp)
+{
+	struct edgetpu_mailbox_attr_compat attr_c;
+	struct edgetpu_mailbox_attr attr;
+	struct edgetpu_device_group *group;
+
+	if (copy_from_user(&attr_c, argp, sizeof(attr_c)))
+		return -EFAULT;
+
+	attr.cmd_queue_size = attr_c.cmd_queue_size;
+	attr.resp_queue_size = attr_c.resp_queue_size;
+	attr.sizeof_cmd = EDGETPU_SIZEOF_VII_CMD_ELEMENT;
+	attr.sizeof_resp = EDGETPU_SIZEOF_VII_RESP_ELEMENT;
+	attr.priority = attr_c.priority;
+	attr.cmdq_tail_doorbell = attr_c.cmdq_tail_doorbell;
+	group = edgetpu_device_group_alloc(client, &attr);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	edgetpu_device_group_put(group);
+	return 0;
+}
+
 static int etdirect_map_buffer(struct edgetpu_device_group *group,
 			       struct edgetpu_map_ioctl __user *argp)
 {
@@ -404,11 +458,13 @@ static bool etdirect_ioctl_check_permissions(struct file *file, uint cmd)
 
 /*
  * Checks if the state of @client is valid to execute ioctl command @cmd.
+ * Caller holds @client->group_lock;
  */
 static bool etdirect_ioctl_check_group(struct edgetpu_client *client, uint cmd)
 {
 	/* @client must not belong to any group */
-	if (cmd == EDGETPU_CREATE_GROUP || cmd == EDGETPU_JOIN_GROUP)
+	if (cmd == EDGETPU_CREATE_GROUP_COMPAT || cmd == EDGETPU_CREATE_GROUP ||
+	    cmd == EDGETPU_JOIN_GROUP)
 		return !client->group;
 
 	/* Valid for any @client */
@@ -417,7 +473,9 @@ static bool etdirect_ioctl_check_group(struct edgetpu_client *client, uint cmd)
 	    cmd == EDGETPU_ALLOCATE_DEVICE_BUFFER ||
 	    cmd == EDGETPU_CREATE_SYNC_FENCE ||
 	    cmd == EDGETPU_SIGNAL_SYNC_FENCE ||
-	    cmd == EDGETPU_SYNC_FENCE_STATUS)
+	    cmd == EDGETPU_SYNC_FENCE_STATUS ||
+	    cmd == EDGETPU_RELEASE_WAKE_LOCK ||
+	    cmd == EDGETPU_ACQUIRE_WAKE_LOCK)
 		return true;
 
 	if (!client->group)
@@ -432,6 +490,66 @@ static bool etdirect_ioctl_check_group(struct edgetpu_client *client, uint cmd)
 	return edgetpu_device_group_is_leader(client->group, client);
 }
 
+static int etdirect_ioctl_release_wakelock(struct edgetpu_client *client)
+{
+	if (!client->etdev->pm)
+		return -ENODEV;
+
+	mutex_lock(&client->wakelock.lock);
+
+	/* Cannot release wakelock if client has active CSR mappings */
+	if (client->wakelock.csr_map_count) {
+		etdev_warn(
+			client->etdev,
+			"%s: refusing wakelock release with %u CSR mappings\n",
+			__func__, client->wakelock.csr_map_count);
+		mutex_unlock(&client->wakelock.lock);
+		return -EAGAIN;
+	}
+
+	/* Cannot release wakelock if it wasn't acquired */
+	if (!client->wakelock.req_count) {
+		etdev_warn(client->etdev, "%s: invalid wakelock release\n",
+			   __func__);
+		mutex_unlock(&client->wakelock.lock);
+		return -EINVAL;
+	}
+
+	edgetpu_pm_put(client->etdev->pm);
+	client->wakelock.req_count--;
+	etdev_dbg(client->etdev,
+		  "%s: wakelock req count = %u CSR map count = %u\n", __func__,
+		  client->wakelock.req_count, client->wakelock.csr_map_count);
+	mutex_unlock(&client->wakelock.lock);
+	return 0;
+}
+
+static int etdirect_ioctl_acquire_wakelock(struct edgetpu_client *client)
+{
+	int ret;
+
+	if (!client->etdev->pm)
+		return -ENODEV;
+
+	mutex_lock(&client->wakelock.lock);
+
+	ret = edgetpu_pm_get(client->etdev->pm);
+
+	if (ret) {
+		etdev_warn(client->etdev, "%s: pm_get failed (%d)", __func__,
+			   ret);
+		mutex_unlock(&client->wakelock.lock);
+		return ret;
+	}
+
+	client->wakelock.req_count++;
+	etdev_dbg(client->etdev,
+		  "%s: wakelock req count = %u CSR map count = %u\n", __func__,
+		  client->wakelock.req_count, client->wakelock.csr_map_count);
+	mutex_unlock(&client->wakelock.lock);
+	return 0;
+}
+
 static long etdirect_ioctl(struct file *file, uint cmd, ulong arg)
 {
 	struct edgetpu_client *client = file->private_data;
@@ -444,9 +562,12 @@ static long etdirect_ioctl(struct file *file, uint cmd, ulong arg)
 	if (!etdirect_ioctl_check_permissions(file, cmd))
 		return -EPERM;
 
-	if (!etdirect_ioctl_check_group(client, cmd))
+	mutex_lock(&client->group_lock);
+	if (!etdirect_ioctl_check_group(client, cmd)) {
+		mutex_unlock(&client->group_lock);
 		return -EINVAL;
-
+	}
+	/* ioctl commands operating on device group */
 	switch (cmd) {
 	case EDGETPU_MAP_BUFFER:
 		ret = etdirect_map_buffer(client->group, argp);
@@ -454,35 +575,17 @@ static long etdirect_ioctl(struct file *file, uint cmd, ulong arg)
 	case EDGETPU_UNMAP_BUFFER:
 		ret = etdirect_unmap_buffer(client->group, argp);
 		break;
-	case EDGETPU_SET_EVENTFD:
-		ret = etdirect_set_eventfd(client, argp);
-		break;
 	case EDGETPU_UNSET_EVENT:
 		edgetpu_group_unset_eventfd(client->group, arg);
 		ret = 0;
 		break;
-	case EDGETPU_CREATE_GROUP:
-		ret = etdirect_create_group(client, argp);
-		break;
-	case EDGETPU_JOIN_GROUP:
-		ret = etdirect_join_group(client, (u64)argp);
-		break;
 	case EDGETPU_FINALIZE_GROUP:
 		ret = edgetpu_device_group_finalize(client->group);
 		break;
-	case EDGETPU_SET_PERDIE_EVENTFD:
-		ret = etdirect_set_perdie_eventfd(client->etdev, argp);
-		break;
-	case EDGETPU_UNSET_PERDIE_EVENT:
-		ret = etdirect_unset_perdie_eventfd(client->etdev, arg);
-		break;
 	case EDGETPU_ALLOCATE_DEVICE_BUFFER_COMPAT:
 		ret = etdirect_allocate_device_buffer_compat(client->group,
 							     argp);
 		break;
-	case EDGETPU_ALLOCATE_DEVICE_BUFFER:
-		ret = etdirect_allocate_device_buffer(client, (u64)argp);
-		break;
 	case EDGETPU_SYNC_BUFFER:
 		ret = etdirect_sync_buffer(client->group, argp);
 		break;
@@ -492,21 +595,56 @@ static long etdirect_ioctl(struct file *file, uint cmd, ulong arg)
 	case EDGETPU_UNMAP_DMABUF:
 		ret = etdirect_unmap_dmabuf(client->group, argp);
 		break;
-	case EDGETPU_CREATE_SYNC_FENCE:
-		ret = edgetpu_ioctl_sync_fence_create(argp);
-		break;
-	case EDGETPU_SIGNAL_SYNC_FENCE:
-		ret = edgetpu_ioctl_sync_fence_signal(argp);
-		break;
 	case EDGETPU_MAP_BULK_DMABUF:
 		ret = edgetpu_ioctl_map_bulk_dmabuf(client->group, argp);
 		break;
 	case EDGETPU_UNMAP_BULK_DMABUF:
 		ret = edgetpu_ioctl_unmap_bulk_dmabuf(client->group, argp);
 		break;
+	case EDGETPU_SET_EVENTFD:
+		ret = etdirect_set_eventfd(client->group, argp);
+		break;
+	default:
+		ret = -ENOTTY; /* unknown command */
+	}
+	mutex_unlock(&client->group_lock);
+	if (ret != -ENOTTY)
+		return ret;
+
+	switch (cmd) {
+	case EDGETPU_CREATE_GROUP:
+		ret = edgetpu_ioctl_create_group(client, argp);
+		break;
+	case EDGETPU_CREATE_GROUP_COMPAT:
+		ret = etdirect_create_group(client, argp);
+		break;
+	case EDGETPU_JOIN_GROUP:
+		ret = etdirect_join_group(client, (u64)argp);
+		break;
+	case EDGETPU_SET_PERDIE_EVENTFD:
+		ret = etdirect_set_perdie_eventfd(client->etdev, argp);
+		break;
+	case EDGETPU_UNSET_PERDIE_EVENT:
+		ret = etdirect_unset_perdie_eventfd(client->etdev, arg);
+		break;
+	case EDGETPU_ALLOCATE_DEVICE_BUFFER:
+		ret = etdirect_allocate_device_buffer(client, (u64)argp);
+		break;
+	case EDGETPU_CREATE_SYNC_FENCE:
+		ret = edgetpu_ioctl_sync_fence_create(argp);
+		break;
+	case EDGETPU_SIGNAL_SYNC_FENCE:
+		ret = edgetpu_ioctl_sync_fence_signal(argp);
+		break;
 	case EDGETPU_SYNC_FENCE_STATUS:
 		ret = edgetpu_ioctl_sync_fence_status(argp);
 		break;
+	case EDGETPU_RELEASE_WAKE_LOCK:
+		ret = etdirect_ioctl_release_wakelock(client);
+		break;
+	case EDGETPU_ACQUIRE_WAKE_LOCK:
+		ret = etdirect_ioctl_acquire_wakelock(client);
+		break;
 	default:
 		return -ENOTTY; /* unknown command */
 	}
diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c
index 99ccb84..7e03aac 100644
--- a/drivers/edgetpu/edgetpu-firmware.c
+++ b/drivers/edgetpu/edgetpu-firmware.c
@@ -21,6 +21,7 @@
 #include "edgetpu-kci.h"
 #include "edgetpu-pm.h"
 #include "edgetpu-shared-fw.h"
+#include "edgetpu-sw-watchdog.h"
 #include "edgetpu-telemetry.h"
 
 /*
@@ -252,14 +253,14 @@ static int edgetpu_firmware_handshake(struct edgetpu_firmware *et_fw)
 {
 	enum edgetpu_fw_flavor fw_flavor;
 	struct edgetpu_firmware_buffer *fw_buf;
+	struct edgetpu_dev *etdev = et_fw->etdev;
 
 	/* Give the firmware some time to initialize */
 	msleep(100);
-	etdev_dbg(et_fw->etdev, "Detecting firmware flavor...");
-	fw_flavor = edgetpu_kci_fw_flavor(et_fw->etdev->kci);
+	etdev_dbg(etdev, "Detecting firmware flavor...");
+	fw_flavor = edgetpu_kci_fw_flavor(etdev->kci);
 	if (fw_flavor < 0) {
-		etdev_err(et_fw->etdev, "firmware handshake failed: %d",
-			  fw_flavor);
+		etdev_err(etdev, "firmware handshake failed: %d", fw_flavor);
 		et_fw->p->status = FW_INVALID;
 		et_fw->p->fw_flavor = FW_FLAVOR_UNKNOWN;
 		return fw_flavor;
@@ -267,17 +268,21 @@ static int edgetpu_firmware_handshake(struct edgetpu_firmware *et_fw)
 
 	if (fw_flavor != FW_FLAVOR_BL1) {
 		fw_buf = &et_fw->p->fw_desc.buf;
-		etdev_info(et_fw->etdev, "loaded %s firmware%s",
+		etdev_info(etdev, "loaded %s firmware%s",
 			   fw_flavor_str(fw_flavor),
 			   fw_buf->flags & FW_ONDEV ? " on device" : "");
 	} else {
-		etdev_dbg(et_fw->etdev, "loaded stage 2 bootloader");
+		etdev_dbg(etdev, "loaded stage 2 bootloader");
 	}
 	et_fw->p->status = FW_VALID;
 	et_fw->p->fw_flavor = fw_flavor;
 	/* Hermosa second-stage bootloader doesn't implement log/trace */
-	if (fw_flavor != FW_FLAVOR_BL1)
-		edgetpu_telemetry_kci(et_fw->etdev);
+	if (fw_flavor != FW_FLAVOR_BL1) {
+		int ret = edgetpu_telemetry_kci(etdev);
+
+		if (ret)
+			etdev_warn(etdev, "telemetry KCI error: %d", ret);
+	}
 	return 0;
 }
 
@@ -288,8 +293,11 @@ int edgetpu_firmware_run_locked(struct edgetpu_firmware *et_fw,
 	const struct edgetpu_firmware_handlers *handlers = et_fw->p->handlers;
 	struct edgetpu_firmware_desc new_fw_desc;
 	int ret;
+	bool wdt_en = !(flags & FW_BL1); /* not BL1 */
 
 	et_fw->p->status = FW_LOADING;
+	if (wdt_en)
+		edgetpu_sw_wdt_stop(et_fw->etdev);
 
 	memset(&new_fw_desc, 0, sizeof(new_fw_desc));
 	ret = edgetpu_firmware_load_locked(et_fw, &new_fw_desc, name, flags);
@@ -312,7 +320,12 @@ int edgetpu_firmware_run_locked(struct edgetpu_firmware *et_fw,
 	edgetpu_firmware_unload_locked(et_fw, &et_fw->p->fw_desc);
 	et_fw->p->fw_desc = new_fw_desc;
 
-	return edgetpu_firmware_handshake(et_fw);
+	ret = edgetpu_firmware_handshake(et_fw);
+
+	/* Don't start wdt if loaded firmware is second stage bootloader. */
+	if (!ret && wdt_en && et_fw->p->fw_flavor != FW_FLAVOR_BL1)
+		edgetpu_sw_wdt_start(et_fw->etdev);
+	return ret;
 
 out_unload_new_fw:
 	edgetpu_firmware_unload_locked(et_fw, &new_fw_desc);
@@ -324,12 +337,28 @@ int edgetpu_firmware_run(struct edgetpu_dev *etdev, const char *name,
 {
 	struct edgetpu_firmware *et_fw = etdev->firmware;
 	int ret;
+	enum edgetpu_dev_state prev_state;
 
 	if (!et_fw)
 		return -ENODEV;
+	/*
+	 * All other operations on device will first check for device state
+	 * and then proceed.
+	 */
+	mutex_lock(&etdev->state_lock);
+	if (etdev->state == ETDEV_STATE_FWLOADING) {
+		mutex_unlock(&etdev->state_lock);
+		return -EAGAIN;
+	}
+	prev_state = etdev->state;
+	etdev->state = ETDEV_STATE_FWLOADING;
+	mutex_unlock(&etdev->state_lock);
 	ret = edgetpu_firmware_lock(etdev);
 	if (ret) {
 		etdev_err(etdev, "%s: lock failed (%d)\n", __func__, ret);
+		mutex_lock(&etdev->state_lock);
+		etdev->state = prev_state; /* restore etdev state */
+		mutex_unlock(&etdev->state_lock);
 		return ret;
 	}
 	/*
@@ -344,6 +373,16 @@ int edgetpu_firmware_run(struct edgetpu_dev *etdev, const char *name,
 	etdev->firmware = et_fw;
 	edgetpu_pm_put(etdev->pm);
 	edgetpu_firmware_unlock(etdev);
+
+	mutex_lock(&etdev->state_lock);
+	if (ret == -EIO)
+		etdev->state = ETDEV_STATE_BAD; /* f/w handshake error */
+	else if (ret)
+		etdev->state = ETDEV_STATE_NOFW; /* other errors */
+	else
+		etdev->state = ETDEV_STATE_GOOD; /* f/w handshake success */
+	mutex_unlock(&etdev->state_lock);
+
 	return ret;
 }
 
@@ -400,12 +439,16 @@ int edgetpu_firmware_restart_locked(struct edgetpu_dev *etdev)
 	int ret;
 
 	et_fw->p->status = FW_LOADING;
+	edgetpu_sw_wdt_stop(etdev);
 	if (handlers && handlers->prepare_run) {
 		ret = handlers->prepare_run(et_fw, &et_fw->p->fw_desc.buf);
 		if (ret)
 			return ret;
 	}
-	return edgetpu_firmware_handshake(et_fw);
+	ret = edgetpu_firmware_handshake(et_fw);
+	if (!ret)
+		edgetpu_sw_wdt_start(etdev);
+	return ret;
 }
 
 static ssize_t load_firmware_show(
@@ -484,6 +527,77 @@ static const struct attribute_group edgetpu_firmware_attr_group = {
 	.attrs = dev_attrs,
 };
 
+static void edgetpu_firmware_wdt_timeout_action(void *data)
+{
+	int ret, i, num_clients = 0;
+	struct edgetpu_dev *etdev = data;
+	struct edgetpu_device_group *group;
+	struct edgetpu_client *clients[EDGETPU_NGROUPS];
+	struct edgetpu_list_client *c;
+	struct edgetpu_firmware *et_fw = etdev->firmware;
+
+	/* Don't attempt f/w restart if device is off. */
+	if (!edgetpu_is_powered(etdev))
+		return;
+
+	mutex_lock(&etdev->state_lock);
+	if (etdev->state == ETDEV_STATE_FWLOADING) {
+		mutex_unlock(&etdev->state_lock);
+		return;
+	}
+	etdev->state = ETDEV_STATE_FWLOADING;
+	mutex_unlock(&etdev->state_lock);
+
+	for (i = 0; i < EDGETPU_NGROUPS; i++) {
+		group = etdev->groups[i];
+		if (!group)
+			continue;
+		mutex_lock(&group->lock);
+		list_for_each_entry(c, &group->clients, list) {
+			if (etdev == c->client->etdev) {
+				clients[num_clients++] =
+						edgetpu_client_get(c->client);
+				break;
+			}
+		}
+		mutex_unlock(&group->lock);
+	}
+	// TODO(b/154626503): Notify runtime to abort current tasks
+	for (i = 0; i < num_clients; i++) {
+		/*
+		 * No need to hold state lock here since all group operations on
+		 * client are protected by state being GOOD.
+		 */
+		edgetpu_device_group_leave_locked(clients[i]);
+		edgetpu_client_put(clients[i]);
+	}
+
+	ret = edgetpu_firmware_lock(etdev);
+	/*
+	 * edgetpu_firmware_lock() should always return success here as etdev
+	 * is already removed from all groups and fw loader exists.
+	 */
+	if (ret) {
+		etdev_err(etdev, "%s: lock failed (%d)\n", __func__, ret);
+		return;
+	}
+	et_fw->p->status = FW_LOADING;
+	ret = edgetpu_pm_get(etdev->pm);
+	if (!ret)
+		ret = edgetpu_firmware_restart_locked(etdev);
+	edgetpu_pm_put(etdev->pm);
+	edgetpu_firmware_unlock(etdev);
+
+	mutex_lock(&etdev->state_lock);
+	if (ret == -EIO)
+		etdev->state = ETDEV_STATE_BAD;
+	else if (ret)
+		etdev->state = ETDEV_STATE_NOFW;
+	else
+		etdev->state = ETDEV_STATE_GOOD;
+	mutex_unlock(&etdev->state_lock);
+}
+
 int edgetpu_firmware_create(struct edgetpu_dev *etdev,
 			    const struct edgetpu_firmware_handlers *handlers)
 {
@@ -522,6 +636,12 @@ int edgetpu_firmware_create(struct edgetpu_dev *etdev,
 	}
 
 	etdev->firmware = et_fw;
+	ret = edgetpu_sw_wdt_create(etdev, EDGETPU_DORMANT_DEV_BEAT_MS);
+	if (ret)
+		etdev_err(etdev, "Failed to create sw wdt instance\n");
+	else
+		edgetpu_sw_wdt_set_handler(
+			etdev, edgetpu_firmware_wdt_timeout_action, etdev);
 	return 0;
 
 out_device_remove_group:
@@ -540,6 +660,7 @@ void edgetpu_firmware_destroy(struct edgetpu_dev *etdev)
 
 	if (!et_fw)
 		return;
+	edgetpu_sw_wdt_destroy(etdev);
 
 	if (et_fw->p) {
 		handlers = et_fw->p->handlers;
diff --git a/drivers/edgetpu/edgetpu-internal.h b/drivers/edgetpu/edgetpu-internal.h
index 4668eb4..26000b1 100644
--- a/drivers/edgetpu/edgetpu-internal.h
+++ b/drivers/edgetpu/edgetpu-internal.h
@@ -51,6 +51,9 @@
 /* Up to 7 concurrent device groups / workloads per device. */
 #define EDGETPU_NGROUPS		7
 
+/* 1 context per VII/group plus 1 for KCI */
+#define EDGETPU_NCONTEXTS	(EDGETPU_NGROUPS + 1)
+
 /*
  * Common-layer context IDs for non-secure TPU access, translated to chip-
  * specific values in the mmu driver.
@@ -85,6 +88,8 @@ struct edgetpu_client {
 	pid_t tgid;
 	/* Reference count */
 	refcount_t count;
+	/* protects group. */
+	struct mutex group_lock;
 	/*
 	 * The virtual device group this client belongs to. Can be NULL if the
 	 * client doesn't belong to any group.
@@ -103,6 +108,12 @@ struct edgetpu_client {
 	dma_addr_t *remote_drams_dma_addrs;
 	/* range of device CSRs mmap()'able */
 	struct edgetpu_reg_window reg_window;
+	/* Per-client request to keep device active */
+	struct {
+		struct mutex lock;
+		uint req_count;
+		uint csr_map_count;
+	} wakelock;
 };
 
 struct edgetpu_mapping;
@@ -120,6 +131,13 @@ struct edgetpu_mapped_resource {
 	resource_size_t size;	/* size in bytes */
 };
 
+enum edgetpu_dev_state {
+	ETDEV_STATE_NOFW = 0,	/* no firmware running on device. */
+	ETDEV_STATE_GOOD = 1,	/* healthy firmware running. */
+	ETDEV_STATE_FWLOADING = 2, /* firmware is getting loaded on device. */
+	ETDEV_STATE_BAD = 3,	/* firmware/device is in unusable state. */
+};
+
 struct edgetpu_dev {
 	struct device *dev;	   /* platform/pci bus device */
 	struct device *etcdev;	   /* edgetpu class char device */
@@ -132,6 +150,8 @@ struct edgetpu_dev {
 	} open;
 	struct edgetpu_mapped_resource regs; /* ioremapped CSRs */
 	struct dentry *d_entry;    /* debugfs dir for this device */
+	struct mutex state_lock;   /* protects state of this device */
+	enum edgetpu_dev_state state;
 	struct mutex groups_lock;  /* protects groups and lockout */
 	struct edgetpu_device_group *groups[EDGETPU_NGROUPS];
 	bool group_join_lockout;   /* disable group join while reinit */
@@ -148,6 +168,8 @@ struct edgetpu_dev {
 	int mcp_id;		/* multichip pkg id, or -1 for none */
 	uint mcp_die_index;	/* physical die index w/in multichip pkg */
 	u8 mcp_pkg_type;	/* multichip pkg type */
+	struct edgetpu_sw_wdt *etdev_sw_wdt;	/* software watchdog */
+	u64 mcp_serial_num;	/* multichip serial number */
 };
 
 /* Status regs dump. */
@@ -238,9 +260,6 @@ void edgetpu_chip_exit(struct edgetpu_dev *etdev);
 /* IRQ handler */
 irqreturn_t edgetpu_chip_irq_handler(int irq, void *arg);
 
-/* Return true if the device is marked as bypassed. */
-bool edgetpu_chip_bypassed(struct edgetpu_dev *etdev);
-
 /* Device -> Core API */
 
 /* Add current thread as new TPU client */
@@ -261,4 +280,10 @@ void edgetpu_client_put(struct edgetpu_client *client);
 /* Mark die that fails probe to allow bypassing */
 void edgetpu_mark_probe_fail(struct edgetpu_dev *etdev);
 
+/*
+ * Get error code corresponding to @etdev state. Caller holds
+ * etdev->state_lock.
+ */
+int edgetpu_get_state_errno_locked(struct edgetpu_dev *etdev);
+
 #endif /* __EDGETPU_INTERNAL_H__ */
diff --git a/drivers/edgetpu/edgetpu-iremap-pool.c b/drivers/edgetpu/edgetpu-iremap-pool.c
index fa2800a..52094b1 100644
--- a/drivers/edgetpu/edgetpu-iremap-pool.c
+++ b/drivers/edgetpu/edgetpu-iremap-pool.c
@@ -9,6 +9,7 @@
 #include <linux/printk.h>
 
 #ifdef CONFIG_X86
+#include <asm/pgtable_types.h>
 #include <asm/set_memory.h>
 #endif
 
@@ -176,6 +177,8 @@ int edgetpu_iremap_mmap(struct edgetpu_dev *etdev, struct vm_area_struct *vma,
 	struct edgetpu_mempool *etmempool = etdev->iremap_pool;
 	size_t offset;
 	phys_addr_t phys;
+	int ret;
+	unsigned long orig_pgoff = vma->vm_pgoff;
 
 #ifdef CONFIG_ARM64
 	/*
@@ -188,13 +191,19 @@ int edgetpu_iremap_mmap(struct edgetpu_dev *etdev, struct vm_area_struct *vma,
 #endif
 
 	vma->vm_pgoff = 0;
-	if (!etmempool)
-		return dma_mmap_coherent(etdev->dev, vma, mem->vaddr,
-					 mem->dma_addr, mem->size);
+	if (!etmempool) {
+		ret = dma_mmap_coherent(etdev->dev, vma, mem->vaddr,
+					mem->dma_addr, mem->size);
+		vma->vm_pgoff = orig_pgoff;
+		return ret;
+	}
+
 	offset = mem->vaddr - etmempool->base_vaddr;
 	phys = etmempool->base_phys_addr + offset;
 	etdev_dbg(etdev, "iremap_mmap: virt = %llx phys = %llx\n",
 		  (u64)mem->vaddr, phys);
-	return remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
-			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
+	ret = remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
+			      vma->vm_end - vma->vm_start, vma->vm_page_prot);
+	vma->vm_pgoff = orig_pgoff;
+	return ret;
 }
diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h
index fed17b1..37609a3 100644
--- a/drivers/edgetpu/edgetpu-kci.h
+++ b/drivers/edgetpu/edgetpu-kci.h
@@ -75,6 +75,7 @@ struct edgetpu_vii_response_element {
 	u64 retval;
 } __packed;
 
+/* TODO(b/167151866): remove these two constants */
 /* KCI and VII command elements are the same size */
 #define EDGETPU_SIZEOF_VII_CMD_ELEMENT sizeof(struct edgetpu_command_element)
 #define EDGETPU_SIZEOF_VII_RESP_ELEMENT sizeof(struct edgetpu_vii_response_element)
diff --git a/drivers/edgetpu/edgetpu-mailbox.c b/drivers/edgetpu/edgetpu-mailbox.c
index 36422d3..e9b8d01 100644
--- a/drivers/edgetpu/edgetpu-mailbox.c
+++ b/drivers/edgetpu/edgetpu-mailbox.c
@@ -7,6 +7,7 @@
 
 #ifdef CONFIG_X86
 #include <linux/printk.h>
+#include <asm/pgtable_types.h>
 #include <asm/set_memory.h>
 #endif
 #include <linux/dma-mapping.h>
@@ -393,14 +394,23 @@ int edgetpu_mailbox_remove(struct edgetpu_mailbox_manager *mgr,
 static int convert_runtime_queue_size_to_fw(u32 queue_size, u32 element_size)
 {
 	const u32 runtime_unit = 1024;
+	u32 ret;
 
 	/* zero size is not allowed */
-	if (queue_size == 0)
+	if (queue_size == 0 || element_size == 0)
 		return -EINVAL;
 	/* prevent integer overflow */
 	if (queue_size > SIZE_MAX / runtime_unit)
 		return -ENOMEM;
-	return queue_size * runtime_unit / element_size;
+	/*
+	 * Kernel doesn't care whether queue_size * runtime_unit is a multiple
+	 * of element_size.
+	 */
+	ret = queue_size * runtime_unit / element_size;
+	/* hardware limitation */
+	if (ret == 0 || ret > MAX_QUEUE_SIZE)
+		return -EINVAL;
+	return ret;
 }
 
 /*
@@ -423,14 +433,14 @@ int edgetpu_mailbox_init_vii(struct edgetpu_vii *vii,
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 
-	cmd_queue_size = convert_runtime_queue_size_to_fw(
-			attr->cmd_queue_size, EDGETPU_SIZEOF_VII_CMD_ELEMENT);
+	cmd_queue_size = convert_runtime_queue_size_to_fw(attr->cmd_queue_size,
+							  attr->sizeof_cmd);
 	if (cmd_queue_size < 0) {
 		edgetpu_mailbox_remove(mgr, mailbox);
 		return cmd_queue_size;
 	}
 	resp_queue_size = convert_runtime_queue_size_to_fw(
-			attr->resp_queue_size, EDGETPU_SIZEOF_VII_RESP_ELEMENT);
+		attr->resp_queue_size, attr->sizeof_resp);
 	if (resp_queue_size < 0) {
 		edgetpu_mailbox_remove(mgr, mailbox);
 		return resp_queue_size;
@@ -441,10 +451,8 @@ int edgetpu_mailbox_init_vii(struct edgetpu_vii *vii,
 				      cmd_queue_tail_doorbell_enable,
 				      attr->cmdq_tail_doorbell);
 
-	ret = edgetpu_mailbox_alloc_queue(group->etdev, mailbox,
-					  cmd_queue_size,
-					  EDGETPU_SIZEOF_VII_CMD_ELEMENT,
-					  MAILBOX_CMD_QUEUE,
+	ret = edgetpu_mailbox_alloc_queue(group->etdev, mailbox, cmd_queue_size,
+					  attr->sizeof_cmd, MAILBOX_CMD_QUEUE,
 					  &vii->cmd_queue_mem);
 	if (ret) {
 		edgetpu_mailbox_remove(mgr, mailbox);
@@ -456,8 +464,7 @@ int edgetpu_mailbox_init_vii(struct edgetpu_vii *vii,
 		  __func__, mailbox->mailbox_id, vii->cmd_queue_mem.tpu_addr,
 		  &vii->cmd_queue_mem.dma_addr);
 	ret = edgetpu_mailbox_alloc_queue(group->etdev, mailbox,
-					  resp_queue_size,
-					  EDGETPU_SIZEOF_VII_RESP_ELEMENT,
+					  resp_queue_size, attr->sizeof_resp,
 					  MAILBOX_RESP_QUEUE,
 					  &vii->resp_queue_mem);
 
@@ -678,14 +685,85 @@ void edgetpu_mailbox_reset_vii(struct edgetpu_mailbox_manager *mgr)
 
 	write_lock_irqsave(&mgr->mailboxes_lock, flags);
 	for (i = mgr->vii_index_from; i < mgr->vii_index_to; i++) {
-		struct edgetpu_mailbox mbox = {
-			.etdev = mgr->etdev,
-			.context_csr_base = mgr->get_context_csr_base(i),
-		};
-
-		edgetpu_mailbox_reset(&mbox);
-		edgetpu_mailbox_disable(&mbox);
-		edgetpu_mailbox_init_doorbells(&mbox);
+		struct edgetpu_mailbox *mbox = mgr->mailboxes[i];
+
+		if (!mbox)
+			continue;
+		edgetpu_mailbox_reset(mbox);
+		edgetpu_mailbox_disable(mbox);
+		edgetpu_mailbox_init_doorbells(mbox);
 	}
 	write_unlock_irqrestore(&mgr->mailboxes_lock, flags);
 }
+
+static int edgetpu_mailbox_reinit_vii(struct edgetpu_device_group *group)
+{
+	int cmd_queue_size, resp_queue_size;
+	struct edgetpu_mailbox *mailbox = group->vii.mailbox;
+	struct edgetpu_mailbox_attr *attr = &group->mbox_attr;
+	int ret;
+
+	cmd_queue_size = convert_runtime_queue_size_to_fw(attr->cmd_queue_size,
+							  attr->sizeof_cmd);
+	if (cmd_queue_size < 0)
+		return cmd_queue_size;
+
+	resp_queue_size = convert_runtime_queue_size_to_fw(
+		attr->resp_queue_size, attr->sizeof_resp);
+	if (resp_queue_size < 0)
+		return resp_queue_size;
+
+	etdev_dbg(group->etdev, "Restoring vii. workload_id=%u mbox_id=%u\n",
+		  group->workload_id, group->vii.mailbox->mailbox_id);
+
+	etdev_dbg(group->etdev, "Priority: %d\n", attr->priority);
+	etdev_dbg(group->etdev, "Tail doorbell %s",
+		  attr->cmdq_tail_doorbell ? "enabled" : "disabled");
+	etdev_dbg(group->etdev, "cmd queue: addr=%llX size=%u\n",
+		  group->vii.cmd_queue_mem.tpu_addr,
+		  cmd_queue_size);
+	etdev_dbg(group->etdev, "resp queue: addr=%llX size=%u\n",
+		  group->vii.resp_queue_mem.tpu_addr,
+		  resp_queue_size);
+
+	edgetpu_mailbox_set_priority(mailbox, attr->priority);
+	EDGETPU_MAILBOX_CONTEXT_WRITE(mailbox, cmd_queue_tail_doorbell_enable,
+				      attr->cmdq_tail_doorbell);
+
+	ret = edgetpu_mailbox_set_queue(mailbox, MAILBOX_CMD_QUEUE,
+					group->vii.cmd_queue_mem.tpu_addr,
+					cmd_queue_size);
+	if (ret) {
+		etdev_warn(group->etdev,
+			   "%s: Restoring command queue failed: %d\n", __func__,
+			   ret);
+		return ret;
+	}
+
+	ret = edgetpu_mailbox_set_queue(mailbox, MAILBOX_RESP_QUEUE,
+					group->vii.resp_queue_mem.tpu_addr,
+					resp_queue_size);
+	if (ret) {
+		etdev_warn(group->etdev,
+			   "%s: Restoring response queue failed: %d\n",
+			   __func__, ret);
+		return ret;
+	}
+
+	EDGETPU_MAILBOX_CONTEXT_WRITE(mailbox, context_enable, 1);
+	return 0;
+}
+
+void edgetpu_mailbox_restore_active_vii_queues(struct edgetpu_dev *etdev)
+{
+	int i;
+	struct edgetpu_device_group *group;
+
+	mutex_lock(&etdev->groups_lock);
+	for (i = 0; i < EDGETPU_NGROUPS; i++) {
+		group = etdev->groups[i];
+		if (group)
+			edgetpu_mailbox_reinit_vii(group);
+	}
+	mutex_unlock(&etdev->groups_lock);
+}
diff --git a/drivers/edgetpu/edgetpu-mailbox.h b/drivers/edgetpu/edgetpu-mailbox.h
index 50e38df..b0c07b4 100644
--- a/drivers/edgetpu/edgetpu-mailbox.h
+++ b/drivers/edgetpu/edgetpu-mailbox.h
@@ -226,6 +226,12 @@ void edgetpu_mailbox_free_queue(struct edgetpu_dev *etdev,
 				struct edgetpu_mailbox *mailbox,
 				edgetpu_queue_mem *mem);
 
+/*
+ * Re-configure VII mailbox queues which have an active client, re-using
+ * existing buffers
+ */
+void edgetpu_mailbox_restore_active_vii_queues(struct edgetpu_dev *etdev);
+
 /* Return context ID for mailbox. */
 static inline enum edgetpu_context_id
 edgetpu_mailbox_context_id(struct edgetpu_mailbox *mailbox)
diff --git a/drivers/edgetpu/edgetpu-map-dmabuf.c b/drivers/edgetpu/edgetpu-map-dmabuf.c
deleted file mode 100644
index d8b0712..0000000
--- a/drivers/edgetpu/edgetpu-map-dmabuf.c
+++ /dev/null
@@ -1,675 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Provides functions for mapping buffers backed by dma-buf onto EdgeTPU
- * devices.
- *
- * Copyright (C) 2020 Google, Inc.
- */
-
-#include <linux/dma-buf.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-
-#include "edgetpu-device-group.h"
-#include "edgetpu-dram.h"
-#include "edgetpu-internal.h"
-#include "edgetpu-map-dmabuf.h"
-#include "edgetpu-mapping.h"
-#include "edgetpu-mmu.h"
-#include "edgetpu.h"
-
-#if IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)
-
-#if IS_ENABLED(CONFIG_ION_EXYNOS) && IS_ENABLED(CONFIG_EXYNOS_IOVMM)
-#define WORKAROUND_EXYNOS_IOVMM
-#endif
-
-/*
- * Records objects for mapping a dma-buf to an edgetpu_dev.
- */
-struct dmabuf_map_entry {
-	struct dma_buf_attachment *attachment;
-	/* SG table returned by dma_buf_map_attachment() */
-	struct sg_table *sgt;
-#ifdef WORKAROUND_EXYNOS_IOVMM
-	/* modified @sgt for the workaround */
-	struct sg_table *mapped_sgt;
-#endif
-	/* the DMA addresses mapped to */
-	struct {
-		dma_addr_t addr;
-		size_t len;
-	} *dma_addrs;
-	uint n; /* length of @dma_addrs */
-};
-
-/*
- * Records the mapping and other fields needed for mapping a dma-buf to a device
- * group.
- */
-struct edgetpu_dmabuf_map {
-	struct edgetpu_mapping map;
-	u64 offset;
-	u64 size; /* size of this mapping in bytes */
-	u32 mmu_flags;
-	struct dma_buf *dmabuf;
-	/*
-	 * The length of array @entries will be
-	 * - 1, for a non-mirrored mapping request
-	 * - number of dies in @group, otherwise
-	 */
-	struct dmabuf_map_entry *entries;
-	uint num_entries;
-};
-
-static int etdev_add_translations(struct edgetpu_dev *etdev,
-				  tpu_addr_t tpu_addr,
-				  struct dmabuf_map_entry *entry,
-				  enum dma_data_direction dir,
-				  enum edgetpu_context_id ctx_id)
-{
-	const int prot = __dma_dir_to_iommu_prot(dir);
-	uint i;
-	u64 offset = 0;
-	int ret;
-
-	for (i = 0; i < entry->n; i++) {
-		ret = edgetpu_mmu_add_translation(etdev, tpu_addr + offset,
-						  entry->dma_addrs[i].addr,
-						  entry->dma_addrs[i].len, prot,
-						  ctx_id);
-		if (ret)
-			goto rollback;
-		offset += entry->dma_addrs[i].len;
-	}
-	return 0;
-
-rollback:
-	edgetpu_mmu_remove_translation(etdev, tpu_addr, offset, ctx_id);
-	return ret;
-}
-
-/* Maps to the first entry in @dmap. */
-static int etdev_map_dmabuf(struct edgetpu_dev *etdev,
-			    struct edgetpu_dmabuf_map *dmap,
-			    enum dma_data_direction dir, tpu_addr_t *tpu_addr_p)
-{
-	struct edgetpu_device_group *group = dmap->map.priv;
-	const enum edgetpu_context_id ctx_id = edgetpu_group_context_id(group);
-	struct dmabuf_map_entry *entry = &dmap->entries[0];
-	tpu_addr_t tpu_addr;
-	int ret;
-
-	if (entry->n == 1) {
-		/*
-		 * Easy case - only one DMA address, we can use chip-dependent
-		 * tpu_map to map and acquire the TPU VA.
-		 */
-		tpu_addr = edgetpu_mmu_tpu_map(etdev, entry->dma_addrs[0].addr,
-					       dmap->size, dir, ctx_id,
-					       dmap->mmu_flags);
-		if (!tpu_addr)
-			return -ENOSPC;
-	} else {
-		/*
-		 * Maps multiple DMA addresses, only chips with an internal MMU
-		 * can handle this.
-		 */
-		tpu_addr =
-			edgetpu_mmu_alloc(etdev, dmap->size, dmap->mmu_flags);
-		if (!tpu_addr)
-			return -ENOSPC;
-		ret = etdev_add_translations(etdev, tpu_addr, entry, dir,
-					     ctx_id);
-		if (ret) {
-			edgetpu_mmu_free(etdev, tpu_addr, dmap->size);
-			return ret;
-		}
-	}
-
-	*tpu_addr_p = tpu_addr;
-	return 0;
-}
-
-/* reverts etdev_map_dmabuf() */
-static void etdev_unmap_dmabuf(struct edgetpu_dev *etdev,
-			       struct edgetpu_dmabuf_map *dmap,
-			       tpu_addr_t tpu_addr)
-{
-	struct edgetpu_device_group *group = dmap->map.priv;
-	const enum edgetpu_context_id ctx_id = edgetpu_group_context_id(group);
-	struct dmabuf_map_entry *entry = &dmap->entries[0];
-
-	if (entry->n == 1) {
-		edgetpu_mmu_tpu_unmap(etdev, tpu_addr, dmap->size, ctx_id);
-	} else {
-		edgetpu_mmu_remove_translation(etdev, tpu_addr, dmap->size,
-					       ctx_id);
-		edgetpu_mmu_free(etdev, tpu_addr, dmap->size);
-	}
-}
-
-/* handles mirrored mapping request */
-static int group_map_dmabuf(struct edgetpu_device_group *group,
-			    struct edgetpu_dmabuf_map *dmap,
-			    enum dma_data_direction dir, tpu_addr_t *tpu_addr_p)
-{
-	const enum edgetpu_context_id ctx_id = edgetpu_group_context_id(group);
-	struct edgetpu_dev *etdev = group->etdev;
-	tpu_addr_t tpu_addr;
-	uint i;
-	int ret;
-
-	ret = etdev_map_dmabuf(etdev, dmap, dir, &tpu_addr);
-	if (ret)
-		return ret;
-	for (i = 1; i < group->n_clients; i++) {
-		etdev = edgetpu_device_group_nth_etdev(group, i);
-		ret = etdev_add_translations(etdev, tpu_addr, &dmap->entries[i],
-					     dir, ctx_id);
-		if (ret)
-			goto err_remove;
-	}
-	*tpu_addr_p = tpu_addr;
-	return 0;
-
-err_remove:
-	while (i > 1) {
-		i--;
-		etdev = edgetpu_device_group_nth_etdev(group, i);
-		edgetpu_mmu_remove_translation(etdev, tpu_addr, dmap->size,
-					       ctx_id);
-	}
-	etdev_unmap_dmabuf(group->etdev, dmap, tpu_addr);
-
-	return ret;
-}
-
-/* reverts group_map_dmabuf() */
-static void group_unmap_dmabuf(struct edgetpu_device_group *group,
-			       struct edgetpu_dmabuf_map *dmap,
-			       tpu_addr_t tpu_addr)
-{
-	const enum edgetpu_context_id ctx_id = edgetpu_group_context_id(group);
-	struct edgetpu_dev *etdev;
-	uint i;
-
-	for (i = 1; i < group->n_clients; i++) {
-		etdev = edgetpu_device_group_nth_etdev(group, i);
-		edgetpu_mmu_remove_translation(etdev, tpu_addr, dmap->size,
-					       ctx_id);
-	}
-	edgetpu_mmu_tpu_unmap(group->etdev, tpu_addr, dmap->size, ctx_id);
-}
-
-/*
- * Clean resources recorded in @dmap.
- *
- * Caller holds the lock of group (map->priv) and ensures the group is in
- * the finalized state.
- */
-static void dmabuf_map_callback_release(struct edgetpu_mapping *map)
-{
-	struct edgetpu_dmabuf_map *dmap =
-		container_of(map, struct edgetpu_dmabuf_map, map);
-	struct edgetpu_device_group *group = map->priv;
-	const enum dma_data_direction dir = edgetpu_host_dma_dir(map->dir);
-	const tpu_addr_t tpu_addr = map->device_address;
-	struct edgetpu_dev *etdev;
-	uint i;
-
-	if (tpu_addr) {
-		if (IS_MIRRORED(map->flags)) {
-			group_unmap_dmabuf(group, dmap, tpu_addr);
-		} else {
-			etdev = edgetpu_device_group_nth_etdev(group,
-							       map->die_index);
-			etdev_unmap_dmabuf(etdev, dmap, tpu_addr);
-		}
-	}
-	for (i = 0; i < dmap->num_entries; i++) {
-		struct dmabuf_map_entry *entry = &dmap->entries[i];
-
-#ifdef WORKAROUND_EXYNOS_IOVMM
-		if (entry->mapped_sgt) {
-			dma_unmap_sg(entry->attachment->dev,
-				     entry->mapped_sgt->sgl,
-				     entry->mapped_sgt->orig_nents, dir);
-			sg_free_table(entry->mapped_sgt);
-			kfree(entry->mapped_sgt);
-		}
-#endif
-		kfree(entry->dma_addrs);
-		if (entry->sgt)
-			dma_buf_unmap_attachment(entry->attachment, entry->sgt,
-						 dir);
-		if (entry->attachment)
-			dma_buf_detach(dmap->dmabuf, entry->attachment);
-	}
-	dma_buf_put(dmap->dmabuf);
-	edgetpu_device_group_put(group);
-	kfree(dmap->entries);
-	kfree(dmap);
-}
-
-static void entry_show_dma_addrs(struct dmabuf_map_entry *entry,
-				 struct seq_file *s)
-{
-	if (entry->n == 1) {
-		seq_printf(s, "%pad\n", &entry->dma_addrs[0].addr);
-	} else {
-		uint i;
-
-		seq_puts(s, "[");
-		for (i = 0; i < entry->n; i++) {
-			if (i)
-				seq_puts(s, ", ");
-			seq_printf(s, "%pad", &entry->dma_addrs[i].addr);
-		}
-		seq_puts(s, "]\n");
-	}
-}
-
-static void dmabuf_map_callback_show(struct edgetpu_mapping *map,
-				     struct seq_file *s)
-{
-	struct edgetpu_dmabuf_map *dmap =
-		container_of(map, struct edgetpu_dmabuf_map, map);
-
-	if (IS_MIRRORED(dmap->map.flags))
-		seq_printf(
-			s,
-			"  <%s> mirrored: iova=0x%llx pages=%llu %s offset=0x%llx",
-			dmap->dmabuf->exp_name, map->device_address,
-			dmap->size / PAGE_SIZE, edgetpu_dma_dir_rw_s(map->dir),
-			dmap->offset);
-	else
-		seq_printf(
-			s,
-			"  <%s> die %u: iova=0x%llx pages=%llu %s offset=0x%llx",
-			dmap->dmabuf->exp_name, map->die_index,
-			map->device_address, dmap->size / PAGE_SIZE,
-			edgetpu_dma_dir_rw_s(map->dir), dmap->offset);
-
-	edgetpu_device_dram_dmabuf_info_show(dmap->dmabuf, s);
-	seq_puts(s, " dma=");
-	entry_show_dma_addrs(&dmap->entries[0], s);
-}
-
-/*
- * Allocates and properly sets fields of an edgetpu_dmabuf_map.
- *
- * Caller holds group->lock and checks @group is finalized.
- *
- * Returns the pointer on success, or NULL on failure.
- */
-static struct edgetpu_dmabuf_map *
-alloc_dmabuf_map(struct edgetpu_device_group *group, edgetpu_map_flag_t flags)
-{
-	struct edgetpu_dmabuf_map *dmap = kzalloc(sizeof(*dmap), GFP_KERNEL);
-	struct edgetpu_mapping *map;
-	uint n;
-
-	if (!dmap)
-		return NULL;
-	if (IS_MIRRORED(flags))
-		n = group->n_clients;
-	else
-		n = 1;
-	dmap->entries = kcalloc(n, sizeof(*dmap->entries), GFP_KERNEL);
-	if (!dmap->entries)
-		goto err_free;
-	dmap->num_entries = n;
-	dmap->mmu_flags = map_to_mmu_flags(flags) | EDGETPU_MMU_DMABUF;
-	map = &dmap->map;
-	map->flags = flags;
-	map->dir = flags & EDGETPU_MAP_DIR_MASK;
-	map->release = dmabuf_map_callback_release;
-	map->show = dmabuf_map_callback_show;
-	map->priv = edgetpu_device_group_get(group);
-	return dmap;
-
-err_free:
-	kfree(dmap->entries);
-	kfree(dmap);
-	return NULL;
-}
-
-/*
- * Set @entry with one DMA address if we can use that address to present the DMA
- * addresses in @sgt.
- *
- * Returns 0 if succeeded.
- * Returns -EINVAL if the DMA addresses in @sgt is not contiguous.
- */
-static int entry_set_one_dma(struct dmabuf_map_entry *entry,
-			     const struct sg_table *sgt, u64 offset, u64 size)
-{
-	int i;
-	struct scatterlist *sg;
-	dma_addr_t addr;
-
-	addr = sg_dma_address(sgt->sgl);
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		if (sg_dma_len(sg) == 0)
-			break;
-		if (sg_dma_address(sg) != addr)
-			return -EINVAL;
-		addr += sg_dma_len(sg);
-	}
-
-	entry->dma_addrs = kmalloc(sizeof(*entry->dma_addrs), GFP_KERNEL);
-	if (!entry->dma_addrs)
-		return -ENOMEM;
-	entry->n = 1;
-	entry->dma_addrs[0].addr = sg_dma_address(sgt->sgl) + offset;
-	entry->dma_addrs[0].len = size;
-
-	return 0;
-}
-
-#ifdef WORKAROUND_EXYNOS_IOVMM
-
-static struct sg_table *dup_sg_table(const struct sg_table *sgt)
-{
-	struct sg_table *new_sgt;
-	int i;
-	struct scatterlist *sg, *new_sg;
-	int ret;
-
-	new_sgt = kmalloc(sizeof(*new_sgt), GFP_KERNEL);
-	if (!new_sgt)
-		return ERR_PTR(-ENOMEM);
-
-	ret = sg_alloc_table(new_sgt, sgt->nents, GFP_KERNEL);
-	if (ret) {
-		kfree(new_sgt);
-		return ERR_PTR(ret);
-	}
-	new_sg = new_sgt->sgl;
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		memcpy(new_sg, sg, sizeof(*sg));
-		new_sg = sg_next(new_sg);
-	}
-
-	return new_sgt;
-}
-
-/*
- * Here assumes we are mapping the dmabuf backed by Exynos' ION, which doesn't
- * follow the standard dma-buf framework.
- *
- * This workaround is needed since dma_buf_map_attachment() of Exynos ION
- * doesn't call dma_map_sg(), and it expects drivers call ion_iovmm_map()
- * manually. However our EdgeTPU is not backed by Exynos's IOVMM, so we simply
- * call dma_map_sg() before setting @entry->dma_addrs.
- */
-static int entry_set_dma_addrs(struct dmabuf_map_entry *entry, u64 offset,
-			       u64 size, enum dma_data_direction dir)
-{
-	struct dma_buf_attachment *attach = entry->attachment;
-	struct sg_table *sgt;
-	int ret;
-
-	sgt = dup_sg_table(entry->sgt);
-	if (IS_ERR(sgt))
-		return PTR_ERR(sgt);
-	sgt->nents = dma_map_sg(attach->dev, sgt->sgl, sgt->orig_nents, dir);
-	if (sgt->nents == 0) {
-		dev_err(attach->dev, "%s: dma_map_sg failed", __func__);
-		ret = -EINVAL;
-		goto err_free;
-	}
-	ret = entry_set_one_dma(entry, sgt, offset, size);
-	if (ret) {
-		dev_err(attach->dev,
-			"%s: cannot map to one DMA addr, nents=%u, ret=%d",
-			__func__, sgt->nents, ret);
-		goto err_unmap;
-	}
-	entry->mapped_sgt = sgt;
-
-	return 0;
-
-err_unmap:
-	dma_unmap_sg(attach->dev, sgt->sgl, sgt->orig_nents, dir);
-err_free:
-	sg_free_table(sgt);
-	kfree(sgt);
-	return ret;
-}
-
-#else /* !WORKAROUND_EXYNOS_IOVMM */
-
-/*
- * Allocates @entry->dma_addrs and assigns DMA addresses in @sgt start from
- * @offset with size @size to @entry->dma_addrs.
- */
-static int entry_set_dma_addrs(struct dmabuf_map_entry *entry, u64 offset,
-			       u64 size, enum dma_data_direction dir)
-{
-	struct sg_table *sgt = entry->sgt;
-	struct scatterlist *sg;
-	u64 cur_offset = 0;
-	uint n = 0;
-	uint i;
-
-	if (!entry_set_one_dma(entry, sgt, offset, size))
-		return 0;
-	/* calculate the number of sg covered by [offset, offset + size) */
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		if (offset < cur_offset + sg_dma_len(sg))
-			n++;
-		if (offset + size <= cur_offset + sg_dma_len(sg))
-			break;
-		cur_offset += sg_dma_len(sg);
-	}
-	if (n == 0)
-		return -EINVAL;
-	entry->dma_addrs =
-		kmalloc_array(n, sizeof(*entry->dma_addrs), GFP_KERNEL);
-	if (!entry->dma_addrs)
-		return -ENOMEM;
-	entry->n = n;
-	cur_offset = 0;
-	i = 0;
-	for (sg = sgt->sgl; sg;
-	     cur_offset += sg_dma_len(sg), sg = sg_next(sg)) {
-		u64 remain_size = offset + size - cur_offset;
-
-		/* hasn't touched the first covered sg */
-		if (offset >= cur_offset + sg_dma_len(sg))
-			continue;
-		entry->dma_addrs[i].addr = sg_dma_address(sg);
-		entry->dma_addrs[i].len = sg_dma_len(sg);
-		/* offset exceeds current sg */
-		if (offset > cur_offset) {
-			entry->dma_addrs[i].addr += offset - cur_offset;
-			entry->dma_addrs[i].len -= offset - cur_offset;
-		}
-		if (remain_size <= sg_dma_len(sg)) {
-			entry->dma_addrs[i].len -= sg_dma_len(sg) - remain_size;
-			break;
-		}
-		i++;
-	}
-
-	return 0;
-}
-
-#endif /* WORKAROUND_EXYNOS_IOVMM */
-
-/*
- * Performs dma_buf_attach + dma_buf_map_attachment of @dmabuf to @etdev, and
- * sets @entry per the attaching result.
- *
- * Fields of @entry will be set on success.
- */
-static int etdev_attach_dmabuf_to_entry(struct edgetpu_dev *etdev,
-					struct dma_buf *dmabuf,
-					struct dmabuf_map_entry *entry,
-					u64 offset, u64 size,
-					enum dma_data_direction dir)
-{
-	struct dma_buf_attachment *attachment;
-	struct sg_table *sgt;
-	int ret;
-
-	attachment = dma_buf_attach(dmabuf, etdev->dev);
-	if (IS_ERR(attachment))
-		return PTR_ERR(attachment);
-	sgt = dma_buf_map_attachment(attachment, dir);
-	if (IS_ERR(sgt)) {
-		ret = PTR_ERR(sgt);
-		goto err_detach;
-	}
-	entry->attachment = attachment;
-	entry->sgt = sgt;
-	ret = entry_set_dma_addrs(entry, offset, size, dir);
-	if (ret)
-		goto err_unmap;
-
-	return 0;
-
-err_unmap:
-	dma_buf_unmap_attachment(attachment, sgt, dir);
-err_detach:
-	dma_buf_detach(dmabuf, attachment);
-	entry->sgt = NULL;
-	entry->attachment = NULL;
-	return ret;
-}
-
-int edgetpu_map_dmabuf(struct edgetpu_device_group *group,
-		       struct edgetpu_map_dmabuf_ioctl *arg)
-{
-	int ret = -EINVAL;
-	struct dma_buf *dmabuf;
-	edgetpu_map_flag_t flags = arg->flags;
-	const u64 offset = arg->offset;
-	const u64 size = arg->size;
-	const enum dma_data_direction dir =
-		edgetpu_host_dma_dir(flags & EDGETPU_MAP_DIR_MASK);
-	struct edgetpu_dev *etdev;
-	struct edgetpu_dmabuf_map *dmap;
-	tpu_addr_t tpu_addr;
-	uint i;
-
-	/* offset is not page-aligned */
-	if (offset_in_page(offset))
-		return -EINVAL;
-	/* size == 0 or overflow */
-	if (offset + size <= offset)
-		return -EINVAL;
-	dmabuf = dma_buf_get(arg->dmabuf_fd);
-	if (IS_ERR(dmabuf))
-		return PTR_ERR(dmabuf);
-	if (offset + size > dmabuf->size)
-		goto err_put;
-
-	mutex_lock(&group->lock);
-	if (!edgetpu_device_group_is_finalized(group))
-		goto err_unlock_group;
-
-	dmap = alloc_dmabuf_map(group, flags);
-	if (!dmap) {
-		ret = -ENOMEM;
-		goto err_unlock_group;
-	}
-
-	get_dma_buf(dmabuf);
-	dmap->dmabuf = dmabuf;
-	dmap->offset = offset;
-	dmap->size = size;
-	if (IS_MIRRORED(flags)) {
-		for (i = 0; i < group->n_clients; i++) {
-			etdev = edgetpu_device_group_nth_etdev(group, i);
-			ret = etdev_attach_dmabuf_to_entry(etdev, dmabuf,
-							   &dmap->entries[i],
-							   offset, size, dir);
-			if (ret)
-				goto err_release_map;
-		}
-		ret = group_map_dmabuf(group, dmap, dir, &tpu_addr);
-		if (ret)
-			goto err_release_map;
-		dmap->map.die_index = ALL_DIES;
-	} else {
-		etdev = edgetpu_device_group_nth_etdev(group, arg->die_index);
-		if (!etdev) {
-			ret = -EINVAL;
-			goto err_release_map;
-		}
-		ret = etdev_attach_dmabuf_to_entry(
-			etdev, dmabuf, &dmap->entries[0], offset, size, dir);
-		if (ret)
-			goto err_release_map;
-		ret = etdev_map_dmabuf(etdev, dmap, dir, &tpu_addr);
-		if (ret)
-			goto err_release_map;
-		dmap->map.die_index = arg->die_index;
-	}
-	dmap->map.device_address = tpu_addr;
-	ret = edgetpu_mapping_add(&group->dmabuf_mappings, &dmap->map);
-	if (ret)
-		goto err_release_map;
-	arg->device_address = tpu_addr;
-	mutex_unlock(&group->lock);
-	dma_buf_put(dmabuf);
-	return 0;
-
-err_release_map:
-	/* also releases entries if they are set */
-	dmabuf_map_callback_release(&dmap->map);
-err_unlock_group:
-	mutex_unlock(&group->lock);
-err_put:
-	dma_buf_put(dmabuf);
-
-	return ret;
-}
-
-int edgetpu_unmap_dmabuf(struct edgetpu_device_group *group, u32 die_index,
-			 tpu_addr_t tpu_addr)
-{
-	struct edgetpu_mapping_root *mappings = &group->dmabuf_mappings;
-	struct edgetpu_mapping *map;
-	int ret = -EINVAL;
-
-	mutex_lock(&group->lock);
-	/* the group is disbanded means all the mappings have been released */
-	if (!edgetpu_device_group_is_finalized(group))
-		goto out_unlock;
-	edgetpu_mapping_lock(mappings);
-	map = edgetpu_mapping_find_locked(mappings, die_index, tpu_addr);
-	if (!map)
-		map = edgetpu_mapping_find_locked(mappings, ALL_DIES, tpu_addr);
-	/* the mapping is not found */
-	if (!map) {
-		edgetpu_mapping_unlock(mappings);
-		goto out_unlock;
-	}
-	edgetpu_mapping_unlink(mappings, map);
-	edgetpu_mapping_unlock(mappings);
-	dmabuf_map_callback_release(map);
-	ret = 0;
-out_unlock:
-	mutex_unlock(&group->lock);
-	return ret;
-}
-
-#else /* !CONFIG_DMA_SHARED_BUFFER */
-
-int edgetpu_map_dmabuf(struct edgetpu_device_group *group,
-		       struct edgetpu_map_dmabuf_ioctl *arg)
-{
-	return -ENOTTY;
-}
-
-int edgetpu_unmap_dmabuf(struct edgetpu_device_group *group, u32 die_index,
-			 tpu_addr_t tpu_addr)
-{
-	return -ENOTTY;
-}
-
-#endif /* CONFIG_DMA_SHARED_BUFFER */
diff --git a/drivers/edgetpu/edgetpu-map-dmabuf.h b/drivers/edgetpu/edgetpu-map-dmabuf.h
deleted file mode 100644
index 75fd9e1..0000000
--- a/drivers/edgetpu/edgetpu-map-dmabuf.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Provides functions for mapping buffers backed by dma-buf to EdgeTPU devices.
- *
- * Copyright (C) 2020 Google, Inc.
- */
-#ifndef __EDGETPU_MAP_DMABUF_H__
-#define __EDGETPU_MAP_DMABUF_H__
-
-#include "edgetpu-device-group.h"
-#include "edgetpu-internal.h"
-#include "edgetpu.h"
-
-/*
- * Maps a dma-buf to a device group.
- *
- * @arg->device_address will be set as the mapped TPU VA on success.
- *
- * Returns zero on success or a negative errno on error.
- */
-int edgetpu_map_dmabuf(struct edgetpu_device_group *group,
-		       struct edgetpu_map_dmabuf_ioctl *arg);
-/* unmap the dma-buf backed buffer from a device group */
-int edgetpu_unmap_dmabuf(struct edgetpu_device_group *group, u32 die_index,
-			 tpu_addr_t tpu_addr);
-
-#endif /* __EDGETPU_MAP_DMABUF_H__ */
diff --git a/drivers/edgetpu/edgetpu-mcp.h b/drivers/edgetpu/edgetpu-mcp.h
index 3f2d456..b49cf07 100644
--- a/drivers/edgetpu/edgetpu-mcp.h
+++ b/drivers/edgetpu/edgetpu-mcp.h
@@ -20,6 +20,7 @@ struct edgetpu_mcp {
 	u8 id;		/* the MCP ID matches etdev->mcp_id */
 	u8 pkgtype;	/* package type, definition is chip-dependent */
 	u8 total_num;	/* total number of etdevs expected by this MCP */
+	u64 serial_num; /* serial number of the package */
 
 	/* fields need to be locked before accessing */
 
@@ -77,6 +78,13 @@ void edgetpu_mcp_remove_etdev(struct edgetpu_dev *etdev);
 void edgetpu_mcp_probe_fail(struct edgetpu_dev *etdev);
 
 /*
+ * Verify that the etdev belongs to the correct MCP.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int edgetpu_mcp_verify_membership(struct edgetpu_dev *etdev);
+
+/*
  * Invokes @callback with each (currently) registered MCP.
  *
  * If @stop_on_err is true, this function stops when @callback returned non-zero
@@ -130,6 +138,11 @@ static inline void edgetpu_mcp_exit(void)
 {
 }
 
+static inline int edgetpu_mcp_verify_membership(struct edgetpu_dev *etdev)
+{
+	return 0;
+}
+
 #endif /* EDGETPU_HAS_MCP */
 
 #endif /* __EDGETPU_MCP_H__ */
diff --git a/drivers/edgetpu/edgetpu-mmu.h b/drivers/edgetpu/edgetpu-mmu.h
index c1f560a..5226e2e 100644
--- a/drivers/edgetpu/edgetpu-mmu.h
+++ b/drivers/edgetpu/edgetpu-mmu.h
@@ -9,6 +9,7 @@
 
 #include <linux/dma-direction.h>
 #include <linux/dma-mapping.h>
+#include <linux/iommu.h>
 
 #include "edgetpu-internal.h"
 #include "edgetpu.h"
@@ -56,9 +57,30 @@ static inline u32 map_to_mmu_flags(edgetpu_map_flag_t flags)
 	return ret;
 }
 
-static inline unsigned long map_to_dma_attr(edgetpu_map_flag_t flags)
+/* To be compatible with Linux kernel without this flag. */
+#ifndef DMA_ATTR_PBHA_PROT
+#define DMA_ATTR_PBHA_PROT(x) 0
+#endif
+#ifndef IOMMU_PBHA_PROT
+#define IOMMU_PBHA_PROT(x) 0
+#endif
+/* fetch the value of PBHA in map flags */
+#define EDGEPTU_MAP_PBHA_VALUE(flags)                                          \
+	((flags >> EDGETPU_MAP_ATTR_PBHA_SHIFT) & EDGETPU_MAP_ATTR_PBHA_MASK)
+/*
+ * Converts edgetpu map flag to DMA attr.
+ *
+ * Ignore EDGETPU_MAP_SKIP_CPU_SYNC if @map = true
+ */
+static inline unsigned long map_to_dma_attr(edgetpu_map_flag_t flags, bool map)
 {
-	return (flags & EDGETPU_MAP_SKIP_CPU_SYNC) ? DMA_ATTR_SKIP_CPU_SYNC : 0;
+	unsigned long attr = 0;
+
+	if (!map && flags & EDGETPU_MAP_SKIP_CPU_SYNC)
+		attr = DMA_ATTR_SKIP_CPU_SYNC;
+	attr |= DMA_ATTR_PBHA_PROT(EDGEPTU_MAP_PBHA_VALUE(flags));
+
+	return attr;
 }
 
 int edgetpu_mmu_attach(struct edgetpu_dev *dev, void *mmu_info);
@@ -183,4 +205,10 @@ void edgetpu_mmu_tpu_unmap(struct edgetpu_dev *etdev,
 			   tpu_addr_t tpu_addr, size_t size,
 			   enum edgetpu_context_id context_id);
 
+/*
+ * Hints the MMU to use edgetpu_device_dram_alloc() for allocating MMU page
+ * tables.
+ */
+void edgetpu_mmu_use_dev_dram(struct edgetpu_dev *etdev);
+
 #endif /* __EDGETPU_MMU_H__ */
diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c
index 8a9ef6b..01d60a4 100644
--- a/drivers/edgetpu/edgetpu-pm.c
+++ b/drivers/edgetpu/edgetpu-pm.c
@@ -8,12 +8,16 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 
+#include "edgetpu-device-group.h"
 #include "edgetpu-firmware.h"
 #include "edgetpu-internal.h"
+#include "edgetpu-mailbox.h"
 #include "edgetpu-pm.h"
+#include "edgetpu-sw-watchdog.h"
 
 struct edgetpu_pm_private {
 	const struct edgetpu_pm_handlers *handlers;
+	struct mutex lock;
 	int power_up_count;
 };
 
@@ -24,11 +28,16 @@ int edgetpu_pm_get(struct edgetpu_pm *etpm)
 
 	if (!etpm || !etpm->p->handlers || !etpm->p->handlers->power_up)
 		return 0;
+	mutex_lock(&etpm->p->lock);
 	power_up_count = etpm->p->power_up_count++;
-	if (!power_up_count)
+	if (!power_up_count) {
 		ret = etpm->p->handlers->power_up(etpm);
+		if (!ret)
+			edgetpu_mailbox_restore_active_vii_queues(etpm->etdev);
+	}
 	if (ret)
 		etpm->p->power_up_count--;
+	mutex_unlock(&etpm->p->lock);
 	return ret;
 }
 
@@ -36,13 +45,18 @@ void edgetpu_pm_put(struct edgetpu_pm *etpm)
 {
 	if (!etpm || !etpm->p->handlers || !etpm->p->handlers->power_down)
 		return;
+	mutex_lock(&etpm->p->lock);
 	if (!etpm->p->power_up_count) {
 		dev_err(etpm->etdev->dev, "Unbalanced pm_put");
 		WARN_ON(1);
+		mutex_unlock(&etpm->p->lock);
 		return;
 	}
-	if (!--etpm->p->power_up_count)
+	if (!--etpm->p->power_up_count) {
+		edgetpu_sw_wdt_stop(etpm->etdev);
 		etpm->p->handlers->power_down(etpm);
+	}
+	mutex_unlock(&etpm->p->lock);
 }
 
 int edgetpu_pm_create(struct edgetpu_dev *etdev,
@@ -70,6 +84,8 @@ int edgetpu_pm_create(struct edgetpu_dev *etdev,
 	etpm->p->handlers = handlers;
 	etpm->etdev = etdev;
 
+	mutex_init(&etpm->p->lock);
+
 	if (handlers->after_create) {
 		ret = handlers->after_create(etpm);
 		if (ret) {
@@ -108,6 +124,7 @@ void edgetpu_pm_shutdown(struct edgetpu_dev *etdev)
 
 	if (!etpm)
 		return;
+	mutex_lock(&etpm->p->lock);
 	if (etdev->firmware)
 		edgetpu_firmware_lock(etdev);
 	if (etpm->p->power_up_count) {
@@ -118,4 +135,15 @@ void edgetpu_pm_shutdown(struct edgetpu_dev *etdev)
 		etpm->p->handlers->power_down(etpm);
 	if (etdev->firmware)
 		edgetpu_firmware_unlock(etdev);
+	mutex_unlock(&etpm->p->lock);
+}
+
+bool edgetpu_is_powered(struct edgetpu_dev *etdev)
+{
+	struct edgetpu_pm *etpm = etdev->pm;
+
+	if (!etpm)
+		/* Assume powered-on in case of no power interface. */
+		return true;
+	return etpm->p->power_up_count;
 }
diff --git a/drivers/edgetpu/edgetpu-pm.h b/drivers/edgetpu/edgetpu-pm.h
index 8d8491e..82eb154 100644
--- a/drivers/edgetpu/edgetpu-pm.h
+++ b/drivers/edgetpu/edgetpu-pm.h
@@ -63,4 +63,7 @@ void edgetpu_pm_destroy(struct edgetpu_dev *etdev);
  */
 void edgetpu_pm_shutdown(struct edgetpu_dev *etdev);
 
+/* Check if device is powered on. power_up_count is not protected by a lock */
+bool edgetpu_is_powered(struct edgetpu_dev *etdev);
+
 #endif /* __EDGETPU_PM_H__ */
diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.c b/drivers/edgetpu/edgetpu-sw-watchdog.c
new file mode 100644
index 0000000..4cb96e3
--- /dev/null
+++ b/drivers/edgetpu/edgetpu-sw-watchdog.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Edge TPU software WDT interface.
+ *
+ * Copyright (C) 2020 Google, Inc.
+ */
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include "edgetpu-internal.h"
+#include "edgetpu-kci.h"
+#include "edgetpu-sw-watchdog.h"
+
+/* Worker to execute action callback handler on watchdog bite. */
+static void sw_wdt_handler_work(struct work_struct *work)
+{
+	struct edgetpu_sw_wdt_action_work *et_action_work =
+		container_of(work, struct edgetpu_sw_wdt_action_work, work);
+
+	if (et_action_work->edgetpu_sw_wdt_handler)
+		et_action_work->edgetpu_sw_wdt_handler(et_action_work->data);
+}
+
+/*
+ * Ping the f/w for a response. Reschedule the work for next beat
+ * in case of response or schedule a worker for action callback in case of
+ * TIMEOUT.
+ */
+static void sw_wdt_work(struct work_struct *work)
+{
+	int ret;
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct edgetpu_sw_wdt *etdev_sw_wdt =
+		container_of(dwork, struct edgetpu_sw_wdt, dwork);
+	struct edgetpu_dev *etdev = etdev_sw_wdt->etdev;
+
+	/* ping f/w */
+	etdev_dbg(etdev, "sw wdt: pinging firmware\n");
+	ret = edgetpu_kci_ack(etdev->kci);
+	if (ret)
+		etdev_dbg(etdev, "sw-watchdog ping resp:%d\n", ret);
+	if (ret == -ETIMEDOUT) {
+		etdev_err(etdev, "sw-watchdog response timed out\n");
+		schedule_work(&etdev_sw_wdt->et_action_work.work);
+	} else {
+		/* reschedule to next beat. */
+		schedule_delayed_work(dwork, etdev_sw_wdt->hrtbeat_jiffs);
+	}
+}
+
+int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long hrtbeat_ms)
+{
+	struct edgetpu_sw_wdt *etdev_sw_wdt;
+
+	etdev_sw_wdt = kzalloc(sizeof(*etdev_sw_wdt), GFP_KERNEL);
+	if (!etdev_sw_wdt)
+		return -ENOMEM;
+
+	etdev_sw_wdt->etdev = etdev;
+	etdev_sw_wdt->hrtbeat_jiffs = msecs_to_jiffies(hrtbeat_ms);
+	INIT_DELAYED_WORK(&etdev_sw_wdt->dwork, sw_wdt_work);
+	INIT_WORK(&etdev_sw_wdt->et_action_work.work, sw_wdt_handler_work);
+	etdev->etdev_sw_wdt = etdev_sw_wdt;
+	return 0;
+}
+
+int edgetpu_sw_wdt_start(struct edgetpu_dev *etdev)
+{
+	struct edgetpu_sw_wdt *etdev_sw_wdt = etdev->etdev_sw_wdt;
+
+	if (!etdev_sw_wdt)
+		return -EINVAL;
+	if (!etdev_sw_wdt->et_action_work.edgetpu_sw_wdt_handler)
+		etdev_err(etdev, "sw wdt handler not set\n");
+	etdev_dbg(etdev, "sw wdt: started\n");
+	schedule_delayed_work(&etdev_sw_wdt->dwork,
+			      etdev_sw_wdt->hrtbeat_jiffs);
+	return 0;
+}
+
+void edgetpu_sw_wdt_stop(struct edgetpu_dev *etdev)
+{
+	if (!etdev->etdev_sw_wdt)
+		return;
+	etdev_dbg(etdev, "sw wdt: stopped\n");
+	cancel_delayed_work_sync(&etdev->etdev_sw_wdt->dwork);
+}
+
+void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev)
+{
+	/* cancel and sync work due to watchdog bite to prevent UAF */
+	cancel_work_sync(&etdev->etdev_sw_wdt->et_action_work.work);
+	edgetpu_sw_wdt_stop(etdev);
+	kfree(etdev->etdev_sw_wdt);
+	etdev->etdev_sw_wdt = NULL;
+}
+
+void edgetpu_sw_wdt_set_handler(struct edgetpu_dev *etdev,
+				void (*handler_cb)(void *), void *data)
+{
+	struct edgetpu_sw_wdt *et_sw_wdt = etdev->etdev_sw_wdt;
+
+	if (!et_sw_wdt)
+		return;
+	et_sw_wdt->et_action_work.edgetpu_sw_wdt_handler = handler_cb;
+	et_sw_wdt->et_action_work.data = data;
+}
+
+void edgetpu_sw_wdt_modify_heartbeat(struct edgetpu_dev *etdev,
+				     unsigned long hrtbeat_ms)
+{
+	struct edgetpu_sw_wdt *etdev_sw_wdt = etdev->etdev_sw_wdt;
+	unsigned long hrtbeat_jiffs = msecs_to_jiffies(hrtbeat_ms);
+
+	if (!etdev_sw_wdt)
+		return;
+	/*
+	 * check if (et_action_work) is pending, since after watchdog bite
+	 * there is no need to restart another work.
+	 */
+	if (work_pending(&etdev_sw_wdt->et_action_work.work))
+		return;
+	if (hrtbeat_jiffs != etdev_sw_wdt->hrtbeat_jiffs) {
+		edgetpu_sw_wdt_stop(etdev);
+		etdev_sw_wdt->hrtbeat_jiffs = hrtbeat_jiffs;
+		edgetpu_sw_wdt_start(etdev);
+	}
+}
diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.h b/drivers/edgetpu/edgetpu-sw-watchdog.h
new file mode 100644
index 0000000..931cc08
--- /dev/null
+++ b/drivers/edgetpu/edgetpu-sw-watchdog.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Edge TPU software WDT interface.
+ *
+ * Copyright (C) 2020 Google, Inc.
+ */
+#ifndef __EDGETPU_SW_WDT_H__
+#define __EDGETPU_SW_WDT_H__
+
+#include <linux/workqueue.h>
+
+#include "edgetpu-internal.h"
+
+#define EDGETPU_ACTIVE_DEV_BEAT_MS 15000 /* 15 seconds */
+#define EDGETPU_DORMANT_DEV_BEAT_MS 60000 /* 60 seconds */
+
+struct edgetpu_sw_wdt_action_work {
+	struct work_struct work;
+	/* pending function to be called on watchdog bite. */
+	void (*edgetpu_sw_wdt_handler)(void *data);
+	/* optional data can be used by callback function. */
+	void *data;
+};
+
+struct edgetpu_sw_wdt {
+	struct delayed_work dwork;
+	/* edgetpu device this watchdog is monitoring. */
+	struct edgetpu_dev *etdev;
+	/* time period in jiffies in pinging device firmware. */
+	unsigned long hrtbeat_jiffs;
+	/* work information for watchdog bite. */
+	struct edgetpu_sw_wdt_action_work et_action_work;
+};
+
+int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long hrtbeat_ms);
+int edgetpu_sw_wdt_start(struct edgetpu_dev *etdev);
+void edgetpu_sw_wdt_stop(struct edgetpu_dev *etdev);
+void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev);
+/*
+ * Set callback function @handler_cb and optional param @data which is to be
+ * called on f/w ping timeout.
+ */
+void edgetpu_sw_wdt_set_handler(struct edgetpu_dev *etdev,
+				void (*handler_cb)(void *), void *data);
+/* Modify the time interval of heartbeat. It will also start the watchdog. */
+void edgetpu_sw_wdt_modify_heartbeat(struct edgetpu_dev *etdev,
+				     unsigned long hrtbeat_ms);
+
+#endif /* __EDGETPU_SW_WDT_H__ */
diff --git a/drivers/edgetpu/edgetpu-telemetry.c b/drivers/edgetpu/edgetpu-telemetry.c
index 67e8270..bd21597 100644
--- a/drivers/edgetpu/edgetpu-telemetry.c
+++ b/drivers/edgetpu/edgetpu-telemetry.c
@@ -6,6 +6,7 @@
  */
 #ifdef CONFIG_X86
 #include <linux/printk.h>	// pr_warn used by set_memory.h
+#include <asm/pgtable_types.h>
 #include <asm/set_memory.h>
 #endif
 #include <linux/dma-mapping.h>
@@ -253,40 +254,8 @@ static void edgetpu_fw_trace(struct edgetpu_dev *etdev,
 			     struct edgetpu_telemetry *trace)
 {
 	struct edgetpu_telemetry_header *header = trace->header;
-#ifndef DEBUG
-	header->head = header->tail;
-#else /* DEBUG */
-	struct edgetpu_trace_entry_header entry;
-	u8 *start;
-	const size_t queue_size = trace->coherent_mem.size - sizeof(*header);
-	const size_t max_length = queue_size - sizeof(entry);
-	char *buffer = kmalloc(max_length + 1, GFP_ATOMIC);
 
-	if (!buffer) {
-		header->head = header->tail;
-		etdev_err_ratelimited(etdev, "failed to allocate trace buffer");
-		return;
-	}
-	start = (u8 *)header + sizeof(*header);
-
-	while (header->head != header->tail) {
-		copy_with_wrap(header, &entry, sizeof(entry), queue_size,
-			       start);
-		if (entry.length > max_length) {
-			header->head = header->tail;
-			etdev_err_ratelimited(etdev,
-					      "trace queue is corrupted");
-			break;
-		}
-		copy_with_wrap(header, buffer, entry.length, queue_size, start);
-		buffer[entry.length] = 0;
-		etdev_dbg_ratelimited(
-			etdev, "trace: %s: tid=%llu, start=%lld, end=%lld\n",
-			buffer, entry.thread_id, entry.start_timestamp,
-			entry.end_timestamp);
-	}
-	kfree(buffer);
-#endif /* DEBUG */
+	header->head = header->tail;
 }
 
 /*
diff --git a/drivers/edgetpu/edgetpu-telemetry.h b/drivers/edgetpu/edgetpu-telemetry.h
index 49ec4ad..66f9f3b 100644
--- a/drivers/edgetpu/edgetpu-telemetry.h
+++ b/drivers/edgetpu/edgetpu-telemetry.h
@@ -45,14 +45,6 @@ struct edgetpu_log_entry_header {
 	u16 crc16;
 } __packed;
 
-struct edgetpu_trace_entry_header {
-	u64 thread_id;
-	s64 start_timestamp;
-	s64 end_timestamp;
-	u16 length; /* the size of string appended after this entry in bytes */
-	u16 crc16;
-} __packed;
-
 struct edgetpu_telemetry {
 	/*
 	 * State transitioning is to prevent racing in IRQ handlers. e.g. the
diff --git a/drivers/edgetpu/edgetpu-thermal.h b/drivers/edgetpu/edgetpu-thermal.h
index 0996c1f..c9d38bc 100644
--- a/drivers/edgetpu/edgetpu-thermal.h
+++ b/drivers/edgetpu/edgetpu-thermal.h
@@ -21,7 +21,7 @@ struct edgetpu_thermal {
 	struct thermal_cooling_device *cdev;
 	struct mutex lock;
 	void *op_data;
-	unsigned long pwr_state;
+	unsigned long cooling_state;
 };
 
 struct edgetpu_state_pwr {
diff --git a/drivers/edgetpu/edgetpu.h b/drivers/edgetpu/edgetpu.h
index 1882fdc..dcb8521 100644
--- a/drivers/edgetpu/edgetpu.h
+++ b/drivers/edgetpu/edgetpu.h
@@ -37,6 +37,9 @@ typedef __u32 edgetpu_map_flag_t;
 #define EDGETPU_MAP_CPU_NONACCESSIBLE	(1u << 3)
 /* Skip CPU sync on unmap */
 #define EDGETPU_MAP_SKIP_CPU_SYNC	(1u << 4)
+/* Offset and mask to set the PBHA bits of IOMMU mappings */
+#define EDGETPU_MAP_ATTR_PBHA_SHIFT	5
+#define EDGETPU_MAP_ATTR_PBHA_MASK	0xf
 
 struct edgetpu_map_ioctl {
 	__u64 host_address;
@@ -68,7 +71,8 @@ struct edgetpu_map_ioctl {
 	 *               0 = Don't skip CPU sync. Default DMA API behavior.
 	 *               1 = Skip CPU sync.
 	 *             Note: This bit is ignored on the map call.
-	 *   [31:5]  - RESERVED
+	 *   [8:5]   - Value of PBHA bits for IOMMU mappings. For Abrolhos only.
+	 *   [31:9]  - RESERVED
 	 */
 	edgetpu_map_flag_t flags;
 	/*
@@ -107,6 +111,7 @@ struct edgetpu_map_ioctl {
  * for notifications.
  */
 #define EDGETPU_EVENT_RESPDATA		0
+#define EDGETPU_EVENT_FATAL_ERROR	1
 
 struct edgetpu_event_register {
 	__u32 event_id;
@@ -117,7 +122,8 @@ struct edgetpu_event_register {
 #define EDGETPU_SET_EVENTFD \
 	_IOR(EDGETPU_IOCTL_BASE, 5, struct edgetpu_event_register)
 
-struct edgetpu_mailbox_attr {
+/* TODO(b/167151866): remove this structure and EDGETPU_CREATE_GROUP_COMPAT */
+struct edgetpu_mailbox_attr_compat {
 	__u32 cmd_queue_size    : 10; /* size of cmd queue in KB */
 	__u32 resp_queue_size   : 10; /* size of response queue in KB */
 	__u32 priority          :  4; /* mailbox service priority */
@@ -125,8 +131,34 @@ struct edgetpu_mailbox_attr {
 };
 
 /* Create a new device group with the caller as the master. */
+#define EDGETPU_CREATE_GROUP_COMPAT \
+	_IOR(EDGETPU_IOCTL_BASE, 6, struct edgetpu_mailbox_attr_compat)
+
+struct edgetpu_mailbox_attr {
+	/*
+	 * There are limitations on these size fields, see the error cases in
+	 * EDGETPU_CREATE_GROUP.
+	 */
+
+	__u32 cmd_queue_size; /* size of command queue in KB */
+	__u32 resp_queue_size; /* size of response queue in KB */
+	__u32 sizeof_cmd; /* size of command element in bytes */
+	__u32 sizeof_resp; /* size of response element in bytes */
+	__u32 priority          : 4; /* mailbox service priority */
+	__u32 cmdq_tail_doorbell: 1; /* auto doorbell on cmd queue tail move */
+};
+
+/*
+ * Create a new device group with the caller as the master.
+ *
+ * EINVAL: If the caller already belongs to a group.
+ * EINVAL: If @cmd/resp_queue_size equals 0.
+ * EINVAL: If @sizeof_cmd/resp equals 0.
+ * EINVAL: If @cmd_queue_size * 1024 / @sizeof_cmd >= 1024, this is a hardware
+ *         limitation. Same rule for the response sizes pair.
+ */
 #define EDGETPU_CREATE_GROUP \
-	_IOR(EDGETPU_IOCTL_BASE, 6, struct edgetpu_mailbox_attr)
+	_IOW(EDGETPU_IOCTL_BASE, 6, struct edgetpu_mailbox_attr)
 
 /* Join the calling fd to the device group of the supplied fd. */
 #define EDGETPU_JOIN_GROUP \
@@ -255,8 +287,8 @@ struct edgetpu_map_dmabuf_ioctl {
 	 * Flags indicating mapping attributes. See edgetpu_map_ioctl.flags for
 	 * details.
 	 *
-	 * Note: the SKIP_CPU_SYNC flag is ignored, the behavior of
-	 * synchronization on unmap is controlled by the dma-buf exporter.
+	 * Note: the SKIP_CPU_SYNC and PBHA flags are ignored, DMA flags to be
+	 * used is controlled by the dma-buf exporter.
 	 */
 	edgetpu_map_flag_t flags;
 	/*
@@ -418,4 +450,18 @@ struct edgetpu_sync_fence_status {
 #define EDGETPU_SYNC_FENCE_STATUS \
 	_IOWR(EDGETPU_IOCTL_BASE, 24, struct edgetpu_sync_fence_status)
 
+/*
+ * Release the current client's wakelock, allowing firmware to be shut down if
+ * no other clients are active.
+ * Groups and buffer mappings are preserved.
+ * WARNING: Attempts to access any mapped CSRs before re-acquiring the wakelock
+ * may crash the system.
+ */
+#define EDGETPU_RELEASE_WAKE_LOCK	_IO(EDGETPU_IOCTL_BASE, 25)
+
+/*
+ * Acquire the wakelock for this client, ensures firmware keeps running.
+ */
+#define EDGETPU_ACQUIRE_WAKE_LOCK	_IO(EDGETPU_IOCTL_BASE, 26)
+
 #endif /* __EDGETPU_H__ */
author	Nrithya Kanakasabapathy <nrithya@google.com>	2020-10-07 23:08:08 -0700
committer	Erick Reyes <erickreyes@google.com>	2020-10-09 18:22:40 -0700
commit	b96c6d828313120c0dc07b4754c6642a139e876e (patch)
tree	89ccf54339565c2966c7066ed5a910acd15325af
parent	14f470f43738ff1aa682aed5394a4d5d86f557bd (diff)
download	abrolhos-b96c6d828313120c0dc07b4754c6642a139e876e.tar.gz