gxp: [Copybara Auto Merge] Merge branch 'gs201-u' into 'android13-gs-pixel-5.10-udc'android-u-preview-2_r0.4 android-u-preview-2_r0.3 android-gs-pantah-5.10-u-preview-2 android-gs-bluejay-5.10-u-preview-2

gcip: use v*alloc without node GCIP_MAIN_REV_ID: e48d8c14dff78d70064f93e5faee61c17b0479c2 gxp: create debug dir root earlier on probe gxp: fix code style/formatting issues gxp: Implement debug dump handling for mcu mode Bug: 265092842 gcip: fix code style/formatting issues GCIP_HEADERS_REV_ID: 8f57799df52bf60cb1805ab5e9ee48034bb139c8 gcip: expose config size calculation to header Bug: 265605775 gcip: sync RKCI codes with fw gcip: update comments of gcip_dma_fence_init Bug: 264220687 gcip: add to_gcip_fence to gcip-dma-fence Bug: 264220687 (repeat) gcip: fix GCIP_DMA_FENCE_LIST_UNLOCK Bug: 258876786 gcip: correct path of gcip-dma-fence.h Bug: 258876786 (repeat) gcip: add gcip-dma-fence.h Bug: 258876786 (repeat) gcip: introduce firmware crash type Bug: 237739631 GCIP_HEADERS_REV_ID: 0a85dc4b06195de6efa46e2ca314548f07f30097 gxp: move core config region to second half of buf gxp: boot status and doorbell for suspend/resume Bug: 265742153 gxp: set VD and core config region Bug: 265742153 (repeat) gxp: use core cfg region for firmware handshake Bug: 265742153 (repeat) gxp: boot with virt_core on per-VD config method Bug: 265742153 (repeat) gxp: only map fwdata region in legacy protocol Bug: 265742153 (repeat) gxp: populate system config on fw data create Bug: 265742153 (repeat) gxp: flush SGT after map iova sgt Bug: 265564221 gxp: remove gxp_vd_phys_core_list gxp: signal eventfd when client is invalidated Bug: 264830822 gxp: introduce GXP_(UN)REGISTER_INVALIDATED_EVENTFD ioctl Bug: 264830822 (repeat) gxp: add create/destroy app legacy functions Bug: 265742153 (repeat) gxp: pass vd to fw_data_create_app Bug: 265742153 (repeat) gxp: add structures for new app config Bug: 265742153 (repeat) gxp: add sys_cfg mapping support Bug: 265605775 (repeat) gxp: all chips/modes have shared region Bug: 265605775 (repeat) gxp: map config regions per image config Bug: 265605775 (repeat) gxp: set shared_buf vaddr Bug: 265605775 (repeat) gxp: map image config NS mappings Bug: 265564221 (repeat) gxp: use per VD scratchpad region Bug: 261797596 gxp: conditionally map FW image region Bug: 265105395 gxp: add lock_class_key to struct client gxp: add image config parser to VD Bug: 265105395 (repeat) gxp: record image config on firmware requested Bug: 265105395 (repeat) gxp: add missing up_read in gxp_debugfs_mailbox gxp: implement vd invalidation Bug: 264831024 gxp: pass timeout_ms to gxp_pm_is_blk_down gxp: set is_firmware_requested on SysFS FW load gxp: set SSMT to bypass in MCU mode Bug: 264950137 Revert "gxp: fetch segment boundaries from ELF header" gxp: add gcip_dma_fence_manager to gxp_dev Bug: 258876786 (repeat) gxp: add gxp-dma-fence support Bug: 258876786 (repeat) gxp: introduce GXP_NAME macro gxp: return vdid on VD allocation ioctl Bug: 264827584 gxp: add VDID support Bug: 264827584 (repeat) gxp: remove a cleanup TODO from vd.h gxp: add refcount to virtual device Bug: 264739996 gxp: add sync fence interfaces Bug: 258876786 (repeat) gxp: detach vd->domain when the state is invalid Bug: 263830035 gxp: call vd_block_unready when failed to run core fw Bug: 263830035 (repeat) gxp: apply clang-format to gxp-vd.c Bug: 263830035 (repeat) gxp: allow debugfs mailbox only with valid vd Bug: 264629015 gxp: add debugfs_client to client_list Bug: 264629015 (repeat) gxp: hold vd_semaphore only when direct mode Bug: 263830035 (repeat) gxp: fix vd_semaphore locking Bug: 263215610 gxp: revert VD state on after_vd_block_ready fail Bug: 263215610 (repeat) gcip: conditionally zero the non-contiguous region gcip: expose config size calculation to header Bug: 265605775 (repeat) gcip: Use strscpy instead of memcpy gcip: enhance image config NS mapping decoding Bug: 265565307 gcip: don't fail dma_fence_init on long name Bug: 264220687 (repeat) gcip: implement gcip_dma_fence_show Bug: 264220687 (repeat) gcip: implement DMA fence status and signal Bug: 264220687 (repeat) gcip: add gcip-dma-fence.c Bug: 258876786 (repeat) GCIP_MAIN_REV_ID: 0a2e7b7e345705db78c8108890781cbe5c8c4eb7 GitOrigin-RevId: 277fffe779cbaf54c2d811928835e9a5ec67e832 Change-Id: Ied761f3000df6a44856db9438d21afd9b3ccb9aa
author: Aurora pro automerger <aurora-pro-automerger@google.com> 2023-02-02 11:46:21 +0000
committer: davidchiang <davidchiang@google.com> 2023-02-02 12:17:17 +0000
commit: 51c89d9e9a7664ff4068fa1405fca5a5fe60aac0 (patch)
tree: 8fa3c34a8dee0d30896f6efc3effa8f86d313e42
parent: 8a4b0d93e1f8b9ae23fb476eef4d43f18bc3cc1d (diff)
download: gs201-51c89d9e9a7664ff4068fa1405fca5a5fe60aac0.tar.gz
33 files changed, 2686 insertions, 699 deletions
diff --git a/Makefile b/Makefile
index 21decc8..3b8a05e 100644
--- a/Makefile
+++ b/Makefile
@@ -15,6 +15,7 @@ gxp-objs += \
 		gxp-core-telemetry.o \
 		gxp-debug-dump.o \
 		gxp-debugfs.o \
+		gxp-dma-fence.o \
 		gxp-dma-iommu.o \
 		gxp-dmabuf.o \
 		gxp-domain-pool.o \
diff --git a/amalthea/config.h b/amalthea/config.h
index 79f9eb5..bc81e42 100644
--- a/amalthea/config.h
+++ b/amalthea/config.h
@@ -8,6 +8,8 @@
 #ifndef __AMALTHEA_CONFIG_H__
 #define __AMALTHEA_CONFIG_H__
 
+#include <linux/sizes.h>
+
 #define GXP_DRIVER_NAME "gxp_platform"
 #define DSP_FIRMWARE_DEFAULT_PREFIX "gxp_fw_core"
 
@@ -17,6 +19,13 @@
 #define GXP_NUM_MAILBOXES GXP_NUM_CORES
 #define GXP_NUM_WAKEUP_DOORBELLS GXP_NUM_CORES
 
+/* The total size of the configuration region. */
+#define GXP_SHARED_BUFFER_SIZE SZ_256K
+/* Size of slice per VD. */
+#define GXP_SHARED_SLICE_SIZE 0x9000 /* 36K */
+/* At most GXP_NUM_CORES VDs can be supported on Amalthea. */
+#define GXP_NUM_SHARED_SLICES GXP_NUM_CORES
+
 #define GXP_USE_LEGACY_MAILBOX 1
 
 #define GXP_HAS_MCU 0
diff --git a/gcip-kernel-driver/drivers/gcip/Makefile b/gcip-kernel-driver/drivers/gcip/Makefile
index c3424ee..ab68776 100644
--- a/gcip-kernel-driver/drivers/gcip/Makefile
+++ b/gcip-kernel-driver/drivers/gcip/Makefile
@@ -6,8 +6,14 @@
 CONFIG_GCIP		?= m
 obj-$(CONFIG_GCIP)	+= gcip.o
 
-gcip-objs		:= gcip-alloc-helper.o gcip-domain-pool.o gcip-firmware.o \
-			   gcip-image-config.o gcip-kci.o gcip-mailbox.o gcip-mem-pool.o \
+gcip-objs		:= gcip-alloc-helper.o \
+			   gcip-dma-fence.o \
+			   gcip-domain-pool.o \
+			   gcip-firmware.o \
+			   gcip-image-config.o \
+			   gcip-kci.o \
+			   gcip-mailbox.o \
+			   gcip-mem-pool.o \
 			   gcip-telemetry.o
 
 CURRENT_DIR=$(dir $(abspath $(lastword $(MAKEFILE_LIST))))
diff --git a/gcip-kernel-driver/drivers/gcip/gcip-alloc-helper.c b/gcip-kernel-driver/drivers/gcip/gcip-alloc-helper.c
index 33c95e2..85af8e5 100644
--- a/gcip-kernel-driver/drivers/gcip/gcip-alloc-helper.c
+++ b/gcip-kernel-driver/drivers/gcip/gcip-alloc-helper.c
@@ -7,6 +7,7 @@
 
 #include <asm/page.h>
 #include <linux/device.h>
+#include <linux/gfp.h>
 #include <linux/mm_types.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
@@ -47,7 +48,10 @@ struct sg_table *gcip_alloc_noncontiguous(struct device *dev, size_t size, gfp_t
 
 	size = PAGE_ALIGN(size);
 	count = size >> PAGE_SHIFT;
-	mem = vzalloc_node(size, dev_to_node(dev));
+	if (gfp & __GFP_ZERO)
+		mem = vzalloc(size);
+	else
+		mem = vmalloc(size);
 	if (!mem) {
 		dev_err(dev, "GCIP noncontiguous alloc size=%#zx failed", size);
 		goto err_free_sh;
diff --git a/gcip-kernel-driver/drivers/gcip/gcip-dma-fence.c b/gcip-kernel-driver/drivers/gcip/gcip-dma-fence.c
new file mode 100644
index 0000000..4f83670
--- /dev/null
+++ b/gcip-kernel-driver/drivers/gcip/gcip-dma-fence.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GCIP support of DMA fences.
+ *
+ * Copyright (C) 2023 Google LLC
+ */
+
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/dma-fence.h>
+#include <linux/err.h>
+#include <linux/file.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/sync_file.h>
+#include <linux/time.h>
+
+#include <gcip/gcip-dma-fence.h>
+
+#define to_gfence(fence) container_of(fence, struct gcip_dma_fence, fence)
+
+static int _gcip_dma_fence_signal(struct dma_fence *fence, int error, bool ignore_signaled)
+{
+	int ret;
+
+	if (error > 0)
+		error = -error;
+	if (unlikely(error < -MAX_ERRNO))
+		return -EINVAL;
+
+	spin_lock_irq(fence->lock);
+	/* don't signal fence twice */
+	if (unlikely(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) {
+		ret = ignore_signaled ? 0 : -EBUSY;
+		goto out_unlock;
+	}
+	if (error)
+		dma_fence_set_error(fence, error);
+	ret = dma_fence_signal_locked(fence);
+
+out_unlock:
+	spin_unlock_irq(fence->lock);
+	return ret;
+}
+
+static const char *sync_status_str(int status)
+{
+	if (status < 0)
+		return "error";
+	if (status > 0)
+		return "signaled";
+	return "active";
+}
+
+struct gcip_dma_fence_manager *gcip_dma_fence_manager_create(struct device *dev)
+{
+	struct gcip_dma_fence_manager *mgr = devm_kzalloc(dev, sizeof(*mgr), GFP_KERNEL);
+
+	if (!mgr)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&mgr->fence_list_head);
+	spin_lock_init(&mgr->fence_list_lock);
+	mgr->dev = dev;
+
+	return mgr;
+}
+
+const char *gcip_dma_fence_get_timeline_name(struct dma_fence *fence)
+{
+	struct gcip_dma_fence *gfence = to_gfence(fence);
+
+	return gfence->timeline_name;
+}
+
+bool gcip_dma_fence_always_true(struct dma_fence *fence)
+{
+	return true;
+}
+
+int gcip_dma_fence_init(struct gcip_dma_fence_manager *mgr, struct gcip_dma_fence *gfence,
+			struct gcip_dma_fence_data *data)
+{
+	unsigned long flags;
+	int fd;
+	struct sync_file *sync_file;
+	int ret;
+
+	strscpy(gfence->timeline_name, data->timeline_name, GCIP_FENCE_TIMELINE_NAME_LEN);
+
+	spin_lock_init(&gfence->lock);
+	INIT_LIST_HEAD(&gfence->fence_list);
+	gfence->mgr = mgr;
+
+	dma_fence_init(&gfence->fence, data->ops, &gfence->lock, dma_fence_context_alloc(1),
+		       data->seqno);
+	GCIP_DMA_FENCE_LIST_LOCK(mgr, flags);
+	list_add_tail(&gfence->fence_list, &mgr->fence_list_head);
+	GCIP_DMA_FENCE_LIST_UNLOCK(mgr, flags);
+
+	if (data->after_init) {
+		ret = data->after_init(gfence);
+		if (ret) {
+			dev_err(mgr->dev, "DMA fence init failed on after_init: %d", ret);
+			goto err_put_fence;
+		}
+	}
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		ret = fd;
+		dev_err(mgr->dev, "Failed to get FD: %d", ret);
+		goto err_put_fence;
+	}
+	sync_file = sync_file_create(&gfence->fence);
+	if (!sync_file) {
+		dev_err(mgr->dev, "Failed to create sync file");
+		ret = -ENOMEM;
+		goto err_put_fd;
+	}
+	/* sync_file holds the reference to fence, so we can drop our reference. */
+	dma_fence_put(&gfence->fence);
+
+	fd_install(fd, sync_file->file);
+	data->fence = fd;
+	return 0;
+
+err_put_fd:
+	put_unused_fd(fd);
+err_put_fence:
+	dma_fence_put(&gfence->fence);
+	return ret;
+}
+
+void gcip_dma_fence_exit(struct gcip_dma_fence *gfence)
+{
+	unsigned long flags;
+
+	GCIP_DMA_FENCE_LIST_LOCK(gfence->mgr, flags);
+	list_del(&gfence->fence_list);
+	GCIP_DMA_FENCE_LIST_UNLOCK(gfence->mgr, flags);
+}
+
+int gcip_dma_fence_status(int fence, int *status)
+{
+	struct dma_fence *fencep;
+
+	fencep = sync_file_get_fence(fence);
+	if (!fencep)
+		return -EBADF;
+	*status = dma_fence_get_status(fencep);
+	dma_fence_put(fencep);
+	return 0;
+}
+
+int gcip_dma_fence_signal(int fence, int error, bool ignore_signaled)
+{
+	struct dma_fence *fencep;
+	int ret;
+
+	fencep = sync_file_get_fence(fence);
+	if (!fencep)
+		return -EBADF;
+	ret = _gcip_dma_fence_signal(fencep, error, ignore_signaled);
+	dma_fence_put(fencep);
+	return ret;
+}
+
+int gcip_dma_fenceptr_signal(struct gcip_dma_fence *gfence, int error, bool ignore_signaled)
+{
+	return _gcip_dma_fence_signal(&gfence->fence, error, ignore_signaled);
+}
+
+void gcip_dma_fence_show(struct gcip_dma_fence *gfence, struct seq_file *s)
+{
+	struct dma_fence *fence = &gfence->fence;
+
+	spin_lock_irq(&gfence->lock);
+
+	seq_printf(s, "%s-%s %llu-%llu %s", fence->ops->get_driver_name(fence),
+		   fence->ops->get_timeline_name(fence), fence->context, fence->seqno,
+		   sync_status_str(dma_fence_get_status_locked(fence)));
+
+	if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) {
+		struct timespec64 ts = ktime_to_timespec64(fence->timestamp);
+
+		seq_printf(s, " @%lld.%09ld", (s64)ts.tv_sec, ts.tv_nsec);
+	}
+
+	if (fence->error)
+		seq_printf(s, " err=%d", fence->error);
+
+	spin_unlock_irq(&gfence->lock);
+}
diff --git a/gcip-kernel-driver/drivers/gcip/gcip-image-config.c b/gcip-kernel-driver/drivers/gcip/gcip-image-config.c
index 312bbdc..62acd0b 100644
--- a/gcip-kernel-driver/drivers/gcip/gcip-image-config.c
+++ b/gcip-kernel-driver/drivers/gcip/gcip-image-config.c
@@ -12,28 +12,6 @@
 
 #include <gcip/gcip-image-config.h>
 
-#define ADDR_SHIFT 12
-#define SIZE_MODE_BIT BIT(ADDR_SHIFT - 1)
-#define SECURE_SIZE_MASK (SIZE_MODE_BIT - 1u)
-#define NS_SIZE_MASK (BIT(ADDR_SHIFT) - 1u)
-#define ADDR_MASK ~(BIT(ADDR_SHIFT) - 1u)
-
-/* used by ns_iommu_mappings */
-#define CONFIG_TO_MBSIZE(a) (((a) & NS_SIZE_MASK) << 20)
-
-/* used by iommu_mappings */
-static inline __u32 config_to_size(__u32 cfg)
-{
-	__u32 page_size;
-
-	if (cfg & SIZE_MODE_BIT)
-		page_size = cfg & SECURE_SIZE_MASK;
-	else
-		page_size = BIT(cfg & SECURE_SIZE_MASK);
-
-	return page_size << PAGE_SHIFT;
-}
-
 static int setup_iommu_mappings(struct gcip_image_config_parser *parser,
 				struct gcip_image_config *config)
 {
@@ -49,8 +27,8 @@ static int setup_iommu_mappings(struct gcip_image_config_parser *parser,
 			ret = -EIO;
 			goto err;
 		}
-		size = config_to_size(config->iommu_mappings[i].image_config_value);
-		paddr = config->iommu_mappings[i].image_config_value & ADDR_MASK;
+		size = gcip_config_to_size(config->iommu_mappings[i].image_config_value);
+		paddr = config->iommu_mappings[i].image_config_value & GCIP_IMG_CFG_ADDR_MASK;
 
 		dev_dbg(parser->dev, "Image config adding IOMMU mapping: %pad -> %pap", &daddr,
 			&paddr);
@@ -74,7 +52,7 @@ static int setup_iommu_mappings(struct gcip_image_config_parser *parser,
 err:
 	while (i--) {
 		daddr = config->iommu_mappings[i].virt_address;
-		size = config_to_size(config->iommu_mappings[i].image_config_value);
+		size = gcip_config_to_size(config->iommu_mappings[i].image_config_value);
 		parser->ops->unmap(parser->data, daddr, size, GCIP_IMAGE_CONFIG_FLAGS_SECURE);
 	}
 	return ret;
@@ -89,7 +67,7 @@ static void clear_iommu_mappings(struct gcip_image_config_parser *parser,
 
 	for (i = config->num_iommu_mappings - 1; i >= 0; i--) {
 		daddr = config->iommu_mappings[i].virt_address;
-		size = config_to_size(config->iommu_mappings[i].image_config_value);
+		size = gcip_config_to_size(config->iommu_mappings[i].image_config_value);
 		dev_dbg(parser->dev, "Image config removing IOMMU mapping: %pad size=%#lx", &daddr,
 			size);
 		parser->ops->unmap(parser->data, daddr, size, GCIP_IMAGE_CONFIG_FLAGS_SECURE);
@@ -105,13 +83,13 @@ static int setup_ns_iommu_mappings(struct gcip_image_config_parser *parser,
 	phys_addr_t paddr = 0;
 
 	for (i = 0; i < config->num_ns_iommu_mappings; i++) {
-		daddr = config->ns_iommu_mappings[i] & ADDR_MASK;
+		daddr = config->ns_iommu_mappings[i] & GCIP_IMG_CFG_ADDR_MASK;
 		if (unlikely(!daddr)) {
 			dev_warn(parser->dev, "Invalid config, device address is zero");
 			ret = -EIO;
 			goto err;
 		}
-		size = CONFIG_TO_MBSIZE(config->ns_iommu_mappings[i]);
+		size = gcip_ns_config_to_size(config->ns_iommu_mappings[i]);
 		dev_dbg(parser->dev, "Image config adding NS IOMMU mapping: %pad -> %pap", &daddr,
 			&paddr);
 		if (unlikely(daddr + size <= daddr || paddr + size <= paddr)) {
@@ -128,8 +106,8 @@ static int setup_ns_iommu_mappings(struct gcip_image_config_parser *parser,
 
 err:
 	while (i--) {
-		size = CONFIG_TO_MBSIZE(config->ns_iommu_mappings[i]);
-		daddr = config->ns_iommu_mappings[i] & ADDR_MASK;
+		size = gcip_ns_config_to_size(config->ns_iommu_mappings[i]);
+		daddr = config->ns_iommu_mappings[i] & GCIP_IMG_CFG_ADDR_MASK;
 		parser->ops->unmap(parser->data, daddr, size, 0);
 	}
 	return ret;
@@ -143,8 +121,8 @@ static void clear_ns_iommu_mappings(struct gcip_image_config_parser *parser,
 	int i;
 
 	for (i = config->num_ns_iommu_mappings - 1; i >= 0; i--) {
-		size = CONFIG_TO_MBSIZE(config->ns_iommu_mappings[i]);
-		daddr = config->ns_iommu_mappings[i] & ADDR_MASK;
+		size = gcip_ns_config_to_size(config->ns_iommu_mappings[i]);
+		daddr = config->ns_iommu_mappings[i] & GCIP_IMG_CFG_ADDR_MASK;
 		dev_dbg(parser->dev, "Image config removing NS IOMMU mapping: %pad size=%#lx",
 			&daddr, size);
 		parser->ops->unmap(parser->data, daddr, size, 0);
diff --git a/gcip-kernel-driver/include/gcip/gcip-dma-fence.h b/gcip-kernel-driver/include/gcip/gcip-dma-fence.h
new file mode 100644
index 0000000..c0a7d68
--- /dev/null
+++ b/gcip-kernel-driver/include/gcip/gcip-dma-fence.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GCIP support of DMA fences.
+ *
+ * Copyright (C) 2023 Google LLC
+ */
+
+#ifndef __GCIP_DMA_FENCE_H__
+#define __GCIP_DMA_FENCE_H__
+
+#include <linux/device.h>
+#include <linux/dma-fence.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define GCIP_FENCE_TIMELINE_NAME_LEN 128
+
+/* Used before accessing the list headed by mgr->fence_list_head. */
+#define GCIP_DMA_FENCE_LIST_LOCK(mgr, flags) spin_lock_irqsave(&mgr->fence_list_lock, flags)
+#define GCIP_DMA_FENCE_LIST_UNLOCK(mgr, flags) spin_unlock_irqrestore(&mgr->fence_list_lock, flags)
+
+/*
+ * A macro to loop through all fences under a gcip_dma_fence_manager.
+ * @mgr: struct gcip_dma_fence_manager
+ * @gfence: struct gcip_dma_fence
+ *
+ * This macro must be wrapped by GCIP_DMA_FENCE_LIST_(UN)LOCK.
+ */
+#define gcip_for_each_fence(mgr, gfence)                                                           \
+	list_for_each_entry(gfence, &mgr->fence_list_head, fence_list)
+
+#define to_gcip_fence(fence) container_of(fence, struct gcip_dma_fence, fence)
+
+struct gcip_dma_fence_manager {
+	/* The list of all fence objects for debugging. */
+	struct list_head fence_list_head;
+	/* Protects the list headed by @fence_list_head. */
+	spinlock_t fence_list_lock;
+	/* For logging. */
+	struct device *dev;
+};
+
+struct gcip_dma_fence {
+	struct dma_fence fence;
+	/* The manager used to init this object. */
+	struct gcip_dma_fence_manager *mgr;
+	char timeline_name[GCIP_FENCE_TIMELINE_NAME_LEN];
+	/* Protects @fence. */
+	spinlock_t lock;
+	/* Is protected by manager->fence_list_lock. */
+	struct list_head fence_list;
+};
+
+struct gcip_dma_fence_data {
+	/*
+	 * A null-terminated string with length less than GCIP_FENCE_TIMELINE_NAME_LEN.
+	 * The content of this buffer will be copied so it's fine to release this pointer after
+	 * the gcip_dma_fence_init() call.
+	 */
+	char *timeline_name;
+	/*
+	 * The DMA fence operators to initialize the fence with.
+	 */
+	const struct dma_fence_ops *ops;
+	/* The sequence number to initialize the fence with. */
+	u32 seqno;
+	/* Output: The fd of the new sync_file with the new fence. */
+	int fence;
+	/*
+	 * The callback to be called after @gfence is initialized, before an FD has been installed.
+	 * Returns 0 on success. A non-zero return value will revert the initialization of
+	 * @gfence and the returned error is returned by gcip_dma_fence_init().
+	 *
+	 * There is no 'before_exit' callback because the user is supposed to set a custom
+	 * dma_fence_ops.release callback which does the revert of after_init and then call
+	 * gcip_dma_fence_exit().
+	 *
+	 * This callback is optional.
+	 */
+	int (*after_init)(struct gcip_dma_fence *gfence);
+};
+
+/*
+ * Allocates and returns a GCIP DMA fence manager. Memory is allocated as @dev managed so there is
+ * no release function of the manager.
+ *
+ * Returns a negative errno on error.
+ */
+struct gcip_dma_fence_manager *gcip_dma_fence_manager_create(struct device *dev);
+
+/* Helpers for setting dma_fence_ops. */
+
+/* Returns the timeline name. @fence must be contained within a gcip_dma_fence. */
+const char *gcip_dma_fence_get_timeline_name(struct dma_fence *fence);
+
+/* Always return true. Can be used for the enable_signaling callback. */
+bool gcip_dma_fence_always_true(struct dma_fence *fence);
+
+/* End of helpers for setting dma_fence_ops. */
+
+/*
+ * This function does
+ *  1. Initialize the DMA fence object
+ *  2. Call after_init() if present
+ *  3. Install an FD associates to the created DMA fence
+ *
+ * This function never fails on step 1, so this function returns an error only if after_init() fails
+ * (step 2) or FD allocation fails (step 3).
+ * In either failure case, @ops->release is always called. Therefore @ops->release may need to
+ * distinguish whether after_init() succeeded.
+ *
+ * It's always safe to call gcip_dma_fence_exit() in @ops->release because that function reverts
+ * step 1.
+ */
+int gcip_dma_fence_init(struct gcip_dma_fence_manager *mgr, struct gcip_dma_fence *gfence,
+			struct gcip_dma_fence_data *data);
+
+/*
+ * Reverts gcip_dma_fence_init(). Removes @gfence from the manager's list.
+ * This function will not free @gfence.
+ */
+void gcip_dma_fence_exit(struct gcip_dma_fence *gfence);
+
+/*
+ * Sets @status to the DMA fence status of DMA fence FD @fence.
+ * @status is only set when this function returns 0.
+ *
+ * It is OK if @fence does not refer to a gcip_dma_fence.
+ *
+ * Returns 0 on success. Otherwise a negative errno.
+ */
+int gcip_dma_fence_status(int fence, int *status);
+
+/*
+ * Signals the fence error of DMA fence FD @fence.
+ *
+ * If the fence has been signaled,
+ *  - if @ignore_signaled is true, this function does nothing.
+ *  - otherwise, returns -EALREADY.
+ *
+ * It is OK if @fence does not refer to a gcip_dma_fence.
+ *
+ * Returns 0 on success. Otherwise a negative errno.
+ */
+int gcip_dma_fence_signal(int fence, int error, bool ignore_signaled);
+/* Identical to gcip_dma_fence_signal except this function accepts gcip_dma_fence as the input. */
+int gcip_dma_fenceptr_signal(struct gcip_dma_fence *gfence, int error, bool ignore_signaled);
+
+/* Prints data of @gfence to the sequence file @s. For debug purpose only. */
+void gcip_dma_fence_show(struct gcip_dma_fence *gfence, struct seq_file *s);
+
+#endif /* __GCIP_DMA_FENCE_H__ */
diff --git a/gcip-kernel-driver/include/gcip/gcip-firmware.h b/gcip-kernel-driver/include/gcip/gcip-firmware.h
index b856e5c..012a79a 100644
--- a/gcip-kernel-driver/include/gcip/gcip-firmware.h
+++ b/gcip-kernel-driver/include/gcip/gcip-firmware.h
@@ -35,6 +35,22 @@ enum gcip_fw_flavor {
 	GCIP_FW_FLAVOR_CUSTOM = 4,
 };
 
+/* Type of firmware crash which will be sent by GCIP_RKCI_FIRMWARE_CRASH RKCI command. */
+enum gcip_fw_crash_type {
+	/* Assert happened. */
+	GCIP_FW_CRASH_ASSERT_FAIL = 0,
+	/* Data abort exception. */
+	GCIP_FW_CRASH_DATA_ABORT = 1,
+	/* Prefetch abort exception. */
+	GCIP_FW_CRASH_PREFETCH_ABORT = 2,
+	/* Undefined exception. */
+	GCIP_FW_CRASH_UNDEFINED_EXCEPTION = 3,
+	/* Exception which cannot be recovered by the firmware itself. */
+	GCIP_FW_CRASH_UNRECOVERABLE_FAULT = 4,
+	/* Used in debug dump. */
+	GCIP_FW_CRASH_DUMMY_CRASH_TYPE = 0xFF,
+};
+
 /* Firmware info filled out via KCI FIRMWARE_INFO command. */
 struct gcip_fw_info {
 	uint64_t fw_build_time; /* BuildData::Timestamp() */
diff --git a/gcip-kernel-driver/include/gcip/gcip-image-config.h b/gcip-kernel-driver/include/gcip/gcip-image-config.h
index a995188..bcc506f 100644
--- a/gcip-kernel-driver/include/gcip/gcip-image-config.h
+++ b/gcip-kernel-driver/include/gcip/gcip-image-config.h
@@ -8,6 +8,8 @@
 #ifndef __GCIP_IMAGE_CONFIG_H__
 #define __GCIP_IMAGE_CONFIG_H__
 
+#include <asm/page.h>
+#include <linux/bits.h>
 #include <linux/types.h>
 
 #define GCIP_FW_NUM_VERSIONS 4
@@ -80,6 +82,31 @@ struct gcip_image_config_parser {
 	struct gcip_image_config last_config;
 };
 
+#define GCIP_IMG_CFG_ADDR_SHIFT 12
+#define GCIP_IMG_CFG_MB_SHIFT 20
+#define GCIP_IMG_CFG_SIZE_MODE_BIT BIT(GCIP_IMG_CFG_ADDR_SHIFT - 1)
+#define GCIP_IMG_CFG_SECURE_SIZE_MASK (GCIP_IMG_CFG_SIZE_MODE_BIT - 1u)
+#define GCIP_IMG_CFG_NS_SIZE_MASK (GCIP_IMG_CFG_SIZE_MODE_BIT - 1u)
+#define GCIP_IMG_CFG_ADDR_MASK ~(BIT(GCIP_IMG_CFG_ADDR_SHIFT) - 1u)
+
+/* For decoding the size of ns_iommu_mappings. */
+static inline u32 gcip_ns_config_to_size(u32 cfg)
+{
+	if (cfg & GCIP_IMG_CFG_SIZE_MODE_BIT)
+		return (cfg & GCIP_IMG_CFG_NS_SIZE_MASK) << PAGE_SHIFT;
+
+	return (cfg & GCIP_IMG_CFG_NS_SIZE_MASK) << GCIP_IMG_CFG_MB_SHIFT;
+}
+
+/* For decoding the size of iommu_mappings. */
+static inline u32 gcip_config_to_size(u32 cfg)
+{
+	if (cfg & GCIP_IMG_CFG_SIZE_MODE_BIT)
+		return (cfg & GCIP_IMG_CFG_SECURE_SIZE_MASK) << PAGE_SHIFT;
+
+	return BIT(cfg & GCIP_IMG_CFG_SECURE_SIZE_MASK) << PAGE_SHIFT;
+}
+
 /*
  * Initializes the image configuration parser.
  *
diff --git a/gcip-kernel-driver/include/gcip/gcip-kci.h b/gcip-kernel-driver/include/gcip/gcip-kci.h
index bda1b40..c95d6a7 100644
--- a/gcip-kernel-driver/include/gcip/gcip-kci.h
+++ b/gcip-kernel-driver/include/gcip/gcip-kci.h
@@ -109,6 +109,11 @@ enum gcip_kci_code {
  */
 enum gcip_reverse_kci_code {
 	GCIP_RKCI_CHIP_CODE_FIRST = 0,
+	GCIP_RKCI_PM_QOS_REQUEST,
+	GCIP_RKCI_CHANGE_BTS_SCENARIO,
+	GCIP_RKCI_PM_QOS_BTS_REQUEST,
+	GCIP_RKCI_DSP_CORE_TELEMETRY_TRY_READ,
+	GCIP_RKCI_CLIENT_FATAL_ERROR_NOTIFY,
 	GCIP_RKCI_CHIP_CODE_LAST = 0x7FFF,
 	GCIP_RKCI_GENERIC_CODE_FIRST = 0x8000,
 	GCIP_RKCI_FIRMWARE_CRASH = GCIP_RKCI_GENERIC_CODE_FIRST + 0,
diff --git a/gxp-client.c b/gxp-client.c
index f96a100..813de48 100644
--- a/gxp-client.c
+++ b/gxp-client.c
@@ -25,7 +25,8 @@ struct gxp_client *gxp_client_create(struct gxp_dev *gxp)
 		return ERR_PTR(-ENOMEM);
 
 	client->gxp = gxp;
-	init_rwsem(&client->semaphore);
+	lockdep_register_key(&client->key);
+	__init_rwsem(&client->semaphore, "&client->semaphore", &client->key);
 	client->has_block_wakelock = false;
 	client->has_vd_wakelock = false;
 	client->requested_states = off_states;
@@ -53,6 +54,9 @@ void gxp_client_destroy(struct gxp_client *client)
 			gxp_eventfd_put(client->mb_eventfds[core]);
 	}
 
+	if (client->vd_invalid_eventfd)
+		gxp_eventfd_put(client->vd_invalid_eventfd);
+
 #if (IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_ANDROID)) &&             \
 	!IS_ENABLED(CONFIG_GXP_GEM5)
 	if (client->tpu_file) {
@@ -84,6 +88,7 @@ void gxp_client_destroy(struct gxp_client *client)
 		up_write(&gxp->vd_semaphore);
 	}
 
+	lockdep_unregister_key(&client->key);
 	kfree(client);
 }
 
@@ -181,7 +186,9 @@ int gxp_client_acquire_block_wakelock(struct gxp_client *client,
 			return ret;
 		*acquired_wakelock = true;
 		if (client->vd) {
+			down_write(&gxp->vd_semaphore);
 			ret = gxp_vd_block_ready(client->vd);
+			up_write(&gxp->vd_semaphore);
 			if (ret)
 				goto err_wakelock_release;
 		}
diff --git a/gxp-client.h b/gxp-client.h
index 01d0b2c..935b23d 100644
--- a/gxp-client.h
+++ b/gxp-client.h
@@ -28,6 +28,7 @@ struct gxp_client {
 	 * lock this semaphore for reading for the duration of that operation.
 	 */
 	struct rw_semaphore semaphore;
+	struct lock_class_key key;
 
 	bool has_block_wakelock;
 	bool has_vd_wakelock;
@@ -39,6 +40,7 @@ struct gxp_client {
 	struct gxp_tpu_mbx_desc mbx_desc;
 
 	struct gxp_eventfd *mb_eventfds[GXP_NUM_CORES];
+	struct gxp_eventfd *vd_invalid_eventfd;
 
 	/* client process thread group ID is really the main process ID. */
 	pid_t tgid;
diff --git a/gxp-common-platform.c b/gxp-common-platform.c
index d9ac532..e8c5572 100644
--- a/gxp-common-platform.c
+++ b/gxp-common-platform.c
@@ -22,6 +22,8 @@
 #include <linux/uaccess.h>
 #include <linux/uidgid.h>
 
+#include <gcip/gcip-dma-fence.h>
+
 #include "gxp-client.h"
 #include "gxp-config.h"
 #include "gxp-core-telemetry.h"
@@ -570,8 +572,20 @@ static int gxp_allocate_vd(struct gxp_client *client,
 	ret = gxp_client_allocate_virtual_device(client, ibuf.core_count,
 						 ibuf.flags);
 	up_write(&client->semaphore);
+	if (ret)
+		return ret;
 
-	return ret;
+	ibuf.vdid = client->vd->vdid;
+	if (copy_to_user(argp, &ibuf, sizeof(ibuf))) {
+		/*
+		 * VD will be released once the client FD has been closed, we
+		 * don't need to release VD here as this branch should never
+		 * happen in usual cases.
+		 */
+		return -EFAULT;
+	}
+
+	return 0;
 }
 
 static int
@@ -875,7 +889,7 @@ static int map_tpu_mbx_queue(struct gxp_client *client,
 	down_read(&gxp->vd_semaphore);
 
 	core_count = client->vd->num_cores;
-	phys_core_list = gxp_vd_phys_core_list(client->vd);
+	phys_core_list = client->vd->core_list;
 
 	mbx_info = kmalloc(
 		sizeof(struct edgetpu_ext_mailbox_info) +
@@ -1525,6 +1539,48 @@ out_unlock_client_semaphore:
 	return ret;
 }
 
+static int gxp_register_invalidated_eventfd(
+	struct gxp_client *client,
+	struct gxp_register_invalidated_eventfd_ioctl __user *argp)
+{
+	struct gxp_register_invalidated_eventfd_ioctl ibuf;
+	struct gxp_eventfd *eventfd;
+	int ret = 0;
+
+	if (copy_from_user(&ibuf, argp, sizeof(ibuf)))
+		return -EFAULT;
+
+	down_write(&client->semaphore);
+
+	eventfd = gxp_eventfd_create(ibuf.eventfd);
+	if (IS_ERR(eventfd)) {
+		ret = PTR_ERR(eventfd);
+		goto out;
+	}
+
+	if (client->vd_invalid_eventfd)
+		gxp_eventfd_put(client->vd_invalid_eventfd);
+	client->vd_invalid_eventfd = eventfd;
+
+out:
+	up_write(&client->semaphore);
+	return ret;
+}
+
+static int gxp_unregister_invalidated_eventfd(
+	struct gxp_client *client,
+	struct gxp_register_invalidated_eventfd_ioctl __user *argp)
+{
+	down_write(&client->semaphore);
+
+	if (client->vd_invalid_eventfd)
+		gxp_eventfd_put(client->vd_invalid_eventfd);
+	client->vd_invalid_eventfd = NULL;
+
+	up_write(&client->semaphore);
+	return 0;
+}
+
 static long gxp_ioctl(struct file *file, uint cmd, ulong arg)
 {
 	struct gxp_client *client = file->private_data;
@@ -1616,6 +1672,12 @@ static long gxp_ioctl(struct file *file, uint cmd, ulong arg)
 	case GXP_TRIGGER_DEBUG_DUMP:
 		ret = gxp_trigger_debug_dump(client, argp);
 		break;
+	case GXP_REGISTER_INVALIDATED_EVENTFD:
+		ret = gxp_register_invalidated_eventfd(client, argp);
+		break;
+	case GXP_UNREGISTER_INVALIDATED_EVENTFD:
+		ret = gxp_unregister_invalidated_eventfd(client, argp);
+		break;
 	default:
 		ret = -ENOTTY; /* unknown command */
 	}
@@ -1846,10 +1908,11 @@ static int gxp_common_platform_probe(struct platform_device *pdev, struct gxp_de
 	if (ret)
 		return ret;
 
+	gxp_create_debugdir(gxp);
 	ret = gxp_wakelock_init(gxp);
 	if (ret) {
 		dev_err(dev, "failed to init wakelock: %d", ret);
-		return ret;
+		goto err_remove_debugdir;
 	}
 
 	ret = gxp_pm_init(gxp);
@@ -1943,16 +2006,23 @@ static int gxp_common_platform_probe(struct platform_device *pdev, struct gxp_de
 		dev_warn(dev, "Failed to init thermal driver: %d\n", ret);
 	}
 
+	gxp->gfence_mgr = gcip_dma_fence_manager_create(gxp->dev);
+	if (IS_ERR(gxp->gfence_mgr)) {
+		ret = PTR_ERR(gxp->gfence_mgr);
+		dev_err(dev, "Failed to init DMA fence manager: %d\n", ret);
+		goto err_thermal_destroy;
+	}
+
 	INIT_LIST_HEAD(&gxp->client_list);
 	mutex_init(&gxp->client_list_lock);
 	if (gxp->after_probe) {
 		ret = gxp->after_probe(gxp);
 		if (ret)
-			goto err_thermal_destroy;
+			goto err_dma_fence_destroy;
 	}
 
 	gxp->misc_dev.minor = MISC_DYNAMIC_MINOR;
-	gxp->misc_dev.name = "gxp";
+	gxp->misc_dev.name = GXP_NAME;
 	gxp->misc_dev.fops = &gxp_fops;
 	ret = misc_register(&gxp->misc_dev);
 	if (ret) {
@@ -1969,6 +2039,8 @@ static int gxp_common_platform_probe(struct platform_device *pdev, struct gxp_de
 err_before_remove:
 	if (gxp->before_remove)
 		gxp->before_remove(gxp);
+err_dma_fence_destroy:
+	/* DMA fence manager creation doesn't need revert */
 err_thermal_destroy:
 	/* thermal init doesn't need revert */
 	gxp_core_telemetry_exit(gxp);
@@ -1992,6 +2064,8 @@ err_put_tpu_dev:
 	gxp_pm_destroy(gxp);
 err_wakelock_destroy:
 	/* wakelock init doesn't need revert */
+err_remove_debugdir:
+	gxp_remove_debugdir(gxp);
 	return ret;
 }
 
@@ -1999,7 +2073,7 @@ static int gxp_common_platform_remove(struct platform_device *pdev)
 {
 	struct gxp_dev *gxp = platform_get_drvdata(pdev);
 
-	gxp_remove_debugfs(gxp);
+	gxp_remove_debugdir(gxp);
 	misc_deregister(&gxp->misc_dev);
 	if (gxp->before_remove)
 		gxp->before_remove(gxp);
diff --git a/gxp-debug-dump.c b/gxp-debug-dump.c
index a29d6af..5589ea3 100644
--- a/gxp-debug-dump.c
+++ b/gxp-debug-dump.c
@@ -18,6 +18,7 @@
 #include <linux/platform_data/sscoredump.h>
 #endif
 
+#include "gxp-client.h"
 #include "gxp-debug-dump.h"
 #include "gxp-dma.h"
 #include "gxp-doorbell.h"
@@ -366,7 +367,12 @@ static void gxp_user_buffers_vunmap(struct gxp_dev *gxp,
 	 * gxp->core_to_vd[], and up_read(&gxp->vd_semaphore) must be re-added
 	 * after.
 	 */
-	vd = gxp->core_to_vd[core_header->core_id];
+	if (gxp_is_direct_mode(gxp)) {
+		vd = gxp->core_to_vd[core_header->core_id];
+	} else {
+		vd = gxp->debug_dump_mgr
+			     ->crashed_core_to_vd[core_header->core_id];
+	}
 	if (!vd) {
 		dev_err(gxp->dev,
 			"Virtual device is not available for vunmap\n");
@@ -414,7 +420,12 @@ static int gxp_user_buffers_vmap(struct gxp_dev *gxp,
 	 * gxp->core_to_vd[], and up_read(&gxp->vd_semaphore) must be re-added
 	 * after.
 	 */
-	vd = gxp->core_to_vd[core_header->core_id];
+	if (gxp_is_direct_mode(gxp)) {
+		vd = gxp->core_to_vd[core_header->core_id];
+	} else {
+		vd = gxp->debug_dump_mgr
+			     ->crashed_core_to_vd[core_header->core_id];
+	}
 	if (!vd) {
 		dev_err(gxp->dev, "Virtual device is not available for vmap\n");
 		goto out;
@@ -468,7 +479,7 @@ out:
 }
 #endif
 
-static void gxp_invalidate_segments(struct gxp_dev *gxp, uint32_t core_id)
+void gxp_debug_dump_invalidate_segments(struct gxp_dev *gxp, uint32_t core_id)
 {
 	int i;
 	struct gxp_debug_dump_manager *mgr = gxp->debug_dump_mgr;
@@ -581,6 +592,10 @@ static int gxp_handle_debug_dump(struct gxp_dev *gxp, uint32_t core_id)
 		ret = -EFAULT;
 		goto out_efault;
 	}
+	/*
+	 * TODO(b/265105909): Implement the logic for collecting fw rw section
+	 * separately for mcu mode.
+	 */
 	mgr->segs[core_id][seg_idx].addr = gxp->fwbufs[core_id].vaddr;
 	mgr->segs[core_id][seg_idx].size = gxp->fwbufs[core_id].size;
 	seg_idx++;
@@ -613,7 +628,7 @@ out_efault:
 #endif
 
 out:
-	gxp_invalidate_segments(gxp, core_id);
+	gxp_debug_dump_invalidate_segments(gxp, core_id);
 
 	return ret;
 }
@@ -663,18 +678,15 @@ out:
 	return ret;
 }
 
-static void gxp_debug_dump_process_dump(struct work_struct *work)
+static void gxp_generate_debug_dump(struct gxp_dev *gxp, uint core_id,
+				    struct gxp_virtual_device *crashed_vd)
 {
-	struct gxp_debug_dump_work *debug_dump_work =
-		container_of(work, struct gxp_debug_dump_work, work);
-
-	uint core_id = debug_dump_work->core_id;
-	struct gxp_dev *gxp = debug_dump_work->gxp;
 	u32 boot_mode;
 	bool gxp_generate_coredump_called = false;
 
 	mutex_lock(&gxp->debug_dump_mgr->debug_dump_lock);
-
+	/* crashed_core_to_vd[] is only relevant in case of mcu mode.*/
+	gxp->debug_dump_mgr->crashed_core_to_vd[core_id] = crashed_vd;
 	/*
 	 * Lock the VD semaphore to ensure no suspend/resume/start/stop requests
 	 * can be made on core `core_id` while generating debug dump.
@@ -685,7 +697,11 @@ static void gxp_debug_dump_process_dump(struct work_struct *work)
 	 */
 	down_read(&gxp->vd_semaphore);
 
-	boot_mode = gxp_firmware_get_boot_mode(gxp, core_id);
+	/*
+	 * TODO(b/265105909): Checks below to be verified after implementation for
+	 * firmware loading for mcu mode are completed.
+	 */
+	boot_mode = gxp_firmware_get_boot_mode(gxp, crashed_vd, core_id);
 
 	if (gxp_is_fw_running(gxp, core_id) &&
 	    (boot_mode == GXP_BOOT_MODE_STATUS_COLD_BOOT_COMPLETED ||
@@ -696,7 +712,7 @@ static void gxp_debug_dump_process_dump(struct work_struct *work)
 	}
 
 	/* Invalidate segments to prepare for the next debug dump trigger */
-	gxp_invalidate_segments(gxp, core_id);
+	gxp_debug_dump_invalidate_segments(gxp, core_id);
 
 	up_read(&gxp->vd_semaphore);
 
@@ -709,9 +725,46 @@ static void gxp_debug_dump_process_dump(struct work_struct *work)
 	if (gxp_generate_coredump_called)
 		msleep(1000);
 
+	/* crashed_core_to_vd[] is only relevant in case of mcu mode.*/
+	gxp->debug_dump_mgr->crashed_core_to_vd[core_id] = NULL;
 	mutex_unlock(&gxp->debug_dump_mgr->debug_dump_lock);
 }
 
+static void gxp_debug_dump_process_dump(struct work_struct *work)
+{
+	struct gxp_debug_dump_work *debug_dump_work =
+		container_of(work, struct gxp_debug_dump_work, work);
+	uint core_id = debug_dump_work->core_id;
+	struct gxp_dev *gxp = debug_dump_work->gxp;
+
+	gxp_generate_debug_dump(gxp, core_id, NULL /*Not used*/);
+}
+
+int gxp_debug_dump_process_dump_mcu_mode(struct gxp_dev *gxp, uint core_list,
+					 struct gxp_virtual_device *crashed_vd)
+{
+	uint core;
+	struct gxp_core_dump_header *core_dump_header;
+	struct gxp_debug_dump_manager *mgr = gxp->debug_dump_mgr;
+
+	if (crashed_vd->state != GXP_VD_UNAVAILABLE) {
+		dev_dbg(gxp->dev, "Invalid vd state=%u for processing dumps.\n",
+			crashed_vd->state);
+		return -EINVAL;
+	}
+
+	for (core = 0; core < GXP_NUM_CORES; core++) {
+		if (!(BIT(core) & core_list))
+			continue;
+		core_dump_header = &mgr->core_dump->core_dump_header[core];
+		/* Check if dump has been generated by core firmware */
+		if (core_dump_header &&
+		    core_dump_header->core_header.dump_available == 1)
+			gxp_generate_debug_dump(gxp, core, crashed_vd);
+	}
+	return 0;
+}
+
 struct work_struct *gxp_debug_dump_get_notification_handler(struct gxp_dev *gxp,
 							    uint core)
 {
@@ -758,7 +811,7 @@ int gxp_debug_dump_init(struct gxp_dev *gxp, void *sscd_dev, void *sscd_pdata)
 	gxp_init_segments(gxp);
 
 	for (core = 0; core < GXP_NUM_CORES; core++) {
-		gxp_invalidate_segments(gxp, core);
+		gxp_debug_dump_invalidate_segments(gxp, core);
 		mgr->debug_dump_works[core].gxp = gxp;
 		mgr->debug_dump_works[core].core_id = core;
 		INIT_WORK(&mgr->debug_dump_works[core].work,
diff --git a/gxp-debug-dump.h b/gxp-debug-dump.h
index 2a5d1ce..66ab782 100644
--- a/gxp-debug-dump.h
+++ b/gxp-debug-dump.h
@@ -23,22 +23,22 @@
 #define GXP_NUM_CORE_SEGMENTS 8
 #define GXP_NUM_BUFFER_MAPPINGS 32
 #define GXP_SEG_HEADER_NAME_LENGTH 32
-#define GXP_NUM_SEGMENTS_PER_CORE \
-	(GXP_NUM_COMMON_SEGMENTS + GXP_NUM_CORE_SEGMENTS + \
+#define GXP_NUM_SEGMENTS_PER_CORE                                              \
+	(GXP_NUM_COMMON_SEGMENTS + GXP_NUM_CORE_SEGMENTS +                     \
 	 GXP_NUM_BUFFER_MAPPINGS + 1)
 
 #define GXP_Q7_ICACHE_SIZE 131072 /* I-cache size in bytes */
 #define GXP_Q7_ICACHE_LINESIZE 64 /* I-cache line size in bytes */
 #define GXP_Q7_ICACHE_WAYS 4
-#define GXP_Q7_ICACHE_SETS ((GXP_Q7_ICACHE_SIZE / GXP_Q7_ICACHE_WAYS) / \
-			    GXP_Q7_ICACHE_LINESIZE)
+#define GXP_Q7_ICACHE_SETS                                                     \
+	((GXP_Q7_ICACHE_SIZE / GXP_Q7_ICACHE_WAYS) / GXP_Q7_ICACHE_LINESIZE)
 #define GXP_Q7_ICACHE_WORDS_PER_LINE (GXP_Q7_ICACHE_LINESIZE / sizeof(u32))
 
 #define GXP_Q7_DCACHE_SIZE 65536 /* D-cache size in bytes */
-#define GXP_Q7_DCACHE_LINESIZE 64  /* D-cache line size in bytes */
+#define GXP_Q7_DCACHE_LINESIZE 64 /* D-cache line size in bytes */
 #define GXP_Q7_DCACHE_WAYS 4
-#define GXP_Q7_DCACHE_SETS ((GXP_Q7_DCACHE_SIZE / GXP_Q7_DCACHE_WAYS) / \
-			    GXP_Q7_DCACHE_LINESIZE)
+#define GXP_Q7_DCACHE_SETS                                                     \
+	((GXP_Q7_DCACHE_SIZE / GXP_Q7_DCACHE_WAYS) / GXP_Q7_DCACHE_LINESIZE)
 #define GXP_Q7_DCACHE_WORDS_PER_LINE (GXP_Q7_DCACHE_LINESIZE / sizeof(u32))
 #define GXP_Q7_NUM_AREGS 64
 #define GXP_Q7_DCACHE_TAG_RAMS 2
@@ -188,6 +188,12 @@ struct gxp_debug_dump_manager {
 	 * time
 	 */
 	struct mutex debug_dump_lock;
+	/*
+	 * Array index maps to dsp cores. Array stores the pointer to the
+	 * crashed VD that was running on the respective core. This is used
+	 * only in mcu mode.
+	 */
+	struct gxp_virtual_device *crashed_core_to_vd[GXP_NUM_CORES];
 #if IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP)
 	struct sscd_segment segs[GXP_NUM_CORES][GXP_NUM_SEGMENTS_PER_CORE];
 #endif
@@ -199,4 +205,31 @@ struct work_struct *gxp_debug_dump_get_notification_handler(struct gxp_dev *gxp,
 							    uint core);
 bool gxp_debug_dump_is_enabled(void);
 
+/**
+ * gxp_debug_dump_invalidate_segments() - Invalidate debug dump segments to enable
+ *                                        firmware to populate them on next debug
+ *                                        dump trigger.
+ *
+ * This function is not thread safe. Caller should take the necessary precautions.
+ *
+ * @gxp: The GXP device to obtain the handler for
+ * @core_id: physical id of core whose dump segments need to be invalidated.
+ */
+void gxp_debug_dump_invalidate_segments(struct gxp_dev *gxp, uint32_t core_id);
+
+/**
+ * gxp_debug_dump_process_dump_mcu_mode() - Checks and process the debug dump
+ *                                          for cores from core_list.
+ * @gxp: The GXP device to obtain the handler for
+ * @core_list: A bitfield enumerating the physical cores on which crash is
+ *             reported from firmware.
+ * @crashed_vd: vd that has crashed.
+ *
+ * Return:
+ * * 0       - Success.
+ * * -EINVAL - If vd state is not GXP_VD_UNAVAILABLE.
+ */
+int gxp_debug_dump_process_dump_mcu_mode(struct gxp_dev *gxp, uint core_list,
+					 struct gxp_virtual_device *crashed_vd);
+
 #endif /* __GXP_DEBUG_DUMP_H__ */
diff --git a/gxp-debugfs.c b/gxp-debugfs.c
index ae31914..47a45a1 100644
--- a/gxp-debugfs.c
+++ b/gxp-debugfs.c
@@ -44,6 +44,7 @@ static int gxp_debugfs_mailbox(void *data, u64 val)
 	u16 status;
 	struct gxp_dev *gxp = (struct gxp_dev *)data;
 	struct gxp_mailbox *mbx;
+	struct gxp_client *client;
 	struct gxp_power_states power_states = {
 		.power = GXP_POWER_STATE_NOM,
 		.memory = MEMORY_POWER_STATE_UNDEFINED,
@@ -52,6 +53,7 @@ static int gxp_debugfs_mailbox(void *data, u64 val)
 	int ret;
 
 	mutex_lock(&gxp->debugfs_client_lock);
+	client = gxp->debugfs_client;
 
 #if GXP_HAS_MCU
 	if (gxp_is_direct_mode(gxp)) {
@@ -74,17 +76,29 @@ static int gxp_debugfs_mailbox(void *data, u64 val)
 			goto out;
 		}
 
+		/* Create a dummy client to access @client->gxp from the `execute_cmd` callback. */
+		if (!client)
+			client = gxp_client_create(gxp);
 		mbx = gxp->mailbox_mgr->mailboxes[core];
 		cmd_code = GXP_MBOX_CODE_DISPATCH;
 #if GXP_HAS_MCU
 	} else {
-		if (!gxp->debugfs_client) {
+		if (!client) {
 			dev_err(gxp->dev,
 				"You should load firmwares via gxp/firmware_run first\n");
 			ret = -EIO;
 			goto out;
 		}
 
+		down_read(&gxp->debugfs_client->semaphore);
+		if (!gxp_client_has_available_vd(gxp->debugfs_client,
+						 "GXP_MAILBOX_COMMAND")) {
+			ret = -ENODEV;
+			up_read(&gxp->debugfs_client->semaphore);
+			goto out;
+		}
+		up_read(&gxp->debugfs_client->semaphore);
+
 		mbx = to_mcu_dev(gxp)->mcu.uci.mbx;
 		if (!mbx) {
 			dev_err(gxp->dev, "UCI is not initialized.\n");
@@ -96,12 +110,9 @@ static int gxp_debugfs_mailbox(void *data, u64 val)
 	}
 #endif
 
-	down_read(&gxp->vd_semaphore);
-	/* In direct mode, gxp->debugfs_client and core will be ignored. */
-	retval = gxp->mailbox_mgr->execute_cmd(gxp->debugfs_client, mbx, core,
-					       cmd_code, 0, 0, 0, 0, 1,
-					       power_states, NULL, &status);
-	up_read(&gxp->vd_semaphore);
+	retval = gxp->mailbox_mgr->execute_cmd(client, mbx, core, cmd_code, 0,
+					       0, 0, 0, 1, power_states, NULL,
+					       &status);
 
 	dev_info(
 		gxp->dev,
@@ -109,6 +120,8 @@ static int gxp_debugfs_mailbox(void *data, u64 val)
 		core, status, retval);
 	ret = 0;
 out:
+	if (client && client != gxp->debugfs_client)
+		gxp_client_destroy(client);
 	mutex_unlock(&gxp->debugfs_client_lock);
 	return ret;
 }
@@ -168,9 +181,14 @@ static int gxp_firmware_run_set(void *data, u64 val)
 		}
 		gxp->debugfs_client = client;
 
+		mutex_lock(&gxp->client_list_lock);
+		list_add(&client->list_entry, &gxp->client_list);
+		mutex_unlock(&gxp->client_list_lock);
+
 		down_write(&client->semaphore);
 
-		ret = gxp_client_allocate_virtual_device(client, GXP_NUM_CORES, 0);
+		ret = gxp_client_allocate_virtual_device(client, GXP_NUM_CORES,
+							 0);
 		if (ret) {
 			dev_err(gxp->dev, "Failed to allocate VD\n");
 			goto err_destroy_client;
@@ -201,8 +219,7 @@ static int gxp_firmware_run_set(void *data, u64 val)
 		 * Cleaning up the client will stop the VD it owns and release
 		 * the BLOCK wakelock it is holding.
 		 */
-		gxp_client_destroy(gxp->debugfs_client);
-		gxp->debugfs_client = NULL;
+		goto out_destroy_client;
 	}
 
 out:
@@ -214,8 +231,13 @@ err_release_block_wakelock:
 	gxp_client_release_block_wakelock(client);
 err_destroy_client:
 	up_write(&client->semaphore);
+out_destroy_client:
+	mutex_lock(&gxp->client_list_lock);
+	list_del(&gxp->debugfs_client->list_entry);
+	mutex_unlock(&gxp->client_list_lock);
+
 	/* Destroying a client cleans up any VDss or wakelocks it held. */
-	gxp_client_destroy(client);
+	gxp_client_destroy(gxp->debugfs_client);
 	gxp->debugfs_client = NULL;
 	mutex_unlock(&gxp->debugfs_client_lock);
 	return ret;
@@ -488,10 +510,19 @@ static int gxp_cmu_mux2_get(void *data, u64 *val)
 DEFINE_DEBUGFS_ATTRIBUTE(gxp_cmu_mux2_fops, gxp_cmu_mux2_get, gxp_cmu_mux2_set,
 			 "%llu\n");
 
+void gxp_create_debugdir(struct gxp_dev *gxp)
+{
+	gxp->d_entry = debugfs_create_dir(GXP_NAME, NULL);
+	if (IS_ERR_OR_NULL(gxp->d_entry)) {
+		dev_warn(gxp->dev, "Create debugfs dir failed: %d",
+			 PTR_ERR_OR_ZERO(gxp->d_entry));
+		gxp->d_entry = NULL;
+	}
+}
+
 void gxp_create_debugfs(struct gxp_dev *gxp)
 {
-	gxp->d_entry = debugfs_create_dir("gxp", NULL);
-	if (IS_ERR_OR_NULL(gxp->d_entry))
+	if (!gxp->d_entry)
 		return;
 
 	mutex_init(&gxp->debugfs_client_lock);
@@ -518,9 +549,9 @@ void gxp_create_debugfs(struct gxp_dev *gxp)
 			    &gxp_cmu_mux2_fops);
 }
 
-void gxp_remove_debugfs(struct gxp_dev *gxp)
+void gxp_remove_debugdir(struct gxp_dev *gxp)
 {
-	if (IS_GXP_TEST && !gxp->d_entry)
+	if (!gxp->d_entry)
 		return;
 	debugfs_remove_recursive(gxp->d_entry);
 
diff --git a/gxp-debugfs.h b/gxp-debugfs.h
index 4b42546..6ea8688 100644
--- a/gxp-debugfs.h
+++ b/gxp-debugfs.h
@@ -9,7 +9,12 @@
 
 #include "gxp-internal.h"
 
+/*
+ * Creates the GXP debug FS directory and assigns to @gxp->d_entry.
+ * On failure a warning is logged and @gxp->d_entry is NULL.
+ */
+void gxp_create_debugdir(struct gxp_dev *gxp);
 void gxp_create_debugfs(struct gxp_dev *gxp);
-void gxp_remove_debugfs(struct gxp_dev *gxp);
+void gxp_remove_debugdir(struct gxp_dev *gxp);
 
 #endif /* __GXP_DEBUGFS_H__ */
diff --git a/gxp-dma-fence.c b/gxp-dma-fence.c
new file mode 100644
index 0000000..900ea23
--- /dev/null
+++ b/gxp-dma-fence.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GXP support for DMA fence.
+ *
+ * Copyright (C) 2023 Google LLC
+ */
+
+#include <linux/slab.h>
+
+#include <gcip/gcip-dma-fence.h>
+
+#include "gxp-dma-fence.h"
+#include "gxp-internal.h"
+#include "gxp-vd.h"
+#include "gxp.h"
+
+static const char *gxp_get_driver_name(struct dma_fence *fence)
+{
+	return GXP_NAME;
+}
+
+static void gxp_dma_fence_release(struct dma_fence *fence)
+{
+	struct gxp_dma_fence *gxp_fence = to_gxp_fence(fence);
+
+	gcip_dma_fence_exit(&gxp_fence->gfence);
+	kfree(gxp_fence);
+}
+
+static const struct dma_fence_ops gxp_dma_fence_ops = {
+	.get_driver_name = gxp_get_driver_name,
+	.get_timeline_name = gcip_dma_fence_get_timeline_name,
+	.wait = dma_fence_default_wait,
+	.enable_signaling = gcip_dma_fence_always_true,
+	.release = gxp_dma_fence_release,
+};
+
+int gxp_dma_fence_create(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
+			 struct gxp_create_sync_fence_data *datap)
+{
+	struct gcip_dma_fence_data data = {
+		.timeline_name = datap->timeline_name,
+		.ops = &gxp_dma_fence_ops,
+		.seqno = datap->seqno,
+	};
+	struct gxp_dma_fence *gxp_fence =
+		kzalloc(sizeof(*gxp_fence), GFP_KERNEL);
+	int ret;
+
+	if (!gxp_fence)
+		return -ENOMEM;
+
+	/* TODO(b/264855736): add VD association support */
+
+	ret = gcip_dma_fence_init(gxp->gfence_mgr, &gxp_fence->gfence, &data);
+	if (!ret)
+		datap->fence = data.fence;
+	/*
+	 * We don't need to kfree(gxp_fence) on error because that's called in
+	 * gxp_dma_fence_release.
+	 */
+
+	return ret;
+}
diff --git a/gxp-dma-fence.h b/gxp-dma-fence.h
new file mode 100644
index 0000000..c7ad95e
--- /dev/null
+++ b/gxp-dma-fence.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GXP support for DMA fence.
+ *
+ * Copyright (C) 2023 Google LLC
+ */
+
+#ifndef __GXP_DMA_FENCE_H__
+#define __GXP_DMA_FENCE_H__
+
+#include <gcip/gcip-dma-fence.h>
+
+#include "gxp-vd.h"
+#include "gxp.h"
+
+/* Converts struct dma_fence to gxp_dma_fence. */
+#define to_gxp_fence(fence)                                                    \
+	container_of(to_gcip_fence(fence), struct gxp_dma_fence, gfence)
+
+struct gxp_dma_fence {
+	struct gcip_dma_fence gfence;
+	/* The owner of this DMA fence */
+	struct gxp_virtual_device *vd;
+	/* List of DMA fences owned by the same VD. */
+	struct list_head fence_list;
+};
+
+/*
+ * Creates a DMA fence associates with @vd.
+ *
+ * @datap->fence is set to the fence FD on success.
+ *
+ * Returns 0 on success. Otherwise a negative errno.
+ */
+int gxp_dma_fence_create(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
+			 struct gxp_create_sync_fence_data *datap);
+
+#endif /* __GXP_DMA_FENCE_H__ */
diff --git a/gxp-dma-iommu.c b/gxp-dma-iommu.c
index 1480761..3dfd70b 100644
--- a/gxp-dma-iommu.c
+++ b/gxp-dma-iommu.c
@@ -15,6 +15,7 @@
 
 #include "gxp-config.h"
 #include "gxp-dma.h"
+#include "gxp-firmware.h" /* gxp_core_boot */
 #include "gxp-mailbox.h"
 #include "gxp-mapping.h"
 #include "gxp-pm.h"
@@ -79,13 +80,19 @@ static int gxp_dma_ssmt_program(struct gxp_dev *gxp,
 	int pasid;
 	uint core;
 
-	pasid = iommu_aux_get_pasid(domain, gxp->dev);
-	for (core = 0; core < GXP_NUM_CORES; core++)
-		if (BIT(core) & core_list) {
-			dev_dbg(gxp->dev, "Assign core%u to PASID %d\n", core,
-				pasid);
-			gxp_ssmt_set_core_vid(&mgr->ssmt, core, pasid);
-		}
+	/* Program VID only when cores are managed by us. */
+	if (gxp_is_direct_mode(gxp) || gxp_core_boot) {
+		pasid = iommu_aux_get_pasid(domain, gxp->dev);
+		for (core = 0; core < GXP_NUM_CORES; core++)
+			if (BIT(core) & core_list) {
+				dev_dbg(gxp->dev, "Assign core%u to PASID %d\n",
+					core, pasid);
+				gxp_ssmt_set_core_vid(&mgr->ssmt, core, pasid);
+			}
+	} else {
+		for (core = 0; core < GXP_NUM_CORES; core++)
+			gxp_ssmt_set_core_bypass(&mgr->ssmt, core);
+	}
 	return 0;
 }
 
@@ -167,31 +174,6 @@ static void gxp_unmap_csrs(struct gxp_dev *gxp, struct iommu_domain *domain,
 
 #endif /* GXP_HAS_LAP */
 
-/* Maps the shared buffer region to @domain. */
-static int gxp_map_core_shared_buffer(struct gxp_dev *gxp,
-				      struct iommu_domain *domain,
-				      u8 slice_index)
-{
-	size_t shared_size = gxp->shared_slice_size;
-
-	if (!gxp->shared_buf.paddr)
-		return 0;
-	return iommu_map(domain, gxp->shared_buf.daddr,
-			 gxp->shared_buf.paddr + shared_size * slice_index,
-			 shared_size, IOMMU_READ | IOMMU_WRITE);
-}
-
-/* Reverts gxp_map_core_shared_buffer. */
-static void gxp_unmap_core_shared_buffer(struct gxp_dev *gxp,
-					 struct iommu_domain *domain)
-{
-	size_t shared_size = gxp->shared_slice_size;
-
-	if (!gxp->shared_buf.paddr)
-		return;
-	iommu_unmap(domain, gxp->shared_buf.daddr, shared_size);
-}
-
 /* gxp-dma.h Interface */
 
 uint gxp_iommu_aux_get_pasid(struct gxp_dev *gxp,
@@ -343,20 +325,11 @@ int gxp_dma_map_core_resources(struct gxp_dev *gxp,
 		if (ret)
 			goto err;
 	}
-	/*
-	 * TODO(b/202213606): Map FW regions of all cores in a VD for
-	 * each other at VD creation.
-	 */
-	ret = iommu_map(domain, gxp->fwbufs[0].daddr, gxp->fwbufs[0].paddr,
-			gxp->fwbufs[0].size * GXP_NUM_CORES,
-			IOMMU_READ | IOMMU_WRITE);
-	if (ret)
-		goto err;
-	ret = iommu_map(domain, gxp->fwdatabuf.daddr, gxp->fwdatabuf.paddr,
-			gxp->fwdatabuf.size, IOMMU_READ | IOMMU_WRITE);
-	if (ret)
-		goto err;
-	ret = gxp_map_core_shared_buffer(gxp, domain, slice_index);
+	/* TODO(b/265748027): directly remove this map */
+	if (gxp->fwdatabuf.daddr)
+		ret = iommu_map(domain, gxp->fwdatabuf.daddr,
+				gxp->fwdatabuf.paddr, gxp->fwdatabuf.size,
+				IOMMU_READ | IOMMU_WRITE);
 	if (ret)
 		goto err;
 	/* Only map the TPU mailboxes if they were found on probe */
@@ -402,15 +375,8 @@ void gxp_dma_unmap_core_resources(struct gxp_dev *gxp,
 				    EXT_TPU_MBX_SIZE);
 		}
 	}
-	gxp_unmap_core_shared_buffer(gxp, domain);
-	iommu_unmap(domain, gxp->fwdatabuf.daddr, gxp->fwdatabuf.size);
-	/*
-	 * TODO(b/202213606): A core should only have access to the FW
-	 * of other cores if they're in the same VD, and have the FW
-	 * region unmapped on VD destruction.
-	 */
-	iommu_unmap(domain, gxp->fwbufs[0].daddr,
-		    gxp->fwbufs[0].size * GXP_NUM_CORES);
+	if (gxp->fwdatabuf.daddr)
+		iommu_unmap(domain, gxp->fwdatabuf.daddr, gxp->fwdatabuf.size);
 	for (i = 0; i < GXP_NUM_CORES; i++) {
 		if (!(BIT(i) & core_list))
 			continue;
@@ -710,6 +676,8 @@ int gxp_dma_map_iova_sgt(struct gxp_dev *gxp, struct gxp_iommu_domain *gdomain,
 			return -EINVAL;
 		return size_mapped;
 	}
+	dma_sync_sg_for_device(gxp->dev, sgt->sgl, sgt->orig_nents,
+			       DMA_BIDIRECTIONAL);
 
 	return 0;
 }
diff --git a/gxp-firmware-data.c b/gxp-firmware-data.c
index 6f22f8d..684d08c 100644
--- a/gxp-firmware-data.c
+++ b/gxp-firmware-data.c
@@ -11,9 +11,11 @@
 
 #include "gxp-debug-dump.h"
 #include "gxp-firmware-data.h"
+#include "gxp-firmware.h" /* gxp_core_boot */
 #include "gxp-host-device-structs.h"
 #include "gxp-internal.h"
 #include "gxp-range-alloc.h"
+#include "gxp-vd.h"
 #include "gxp.h"
 
 /*
@@ -89,11 +91,23 @@ struct gxp_fw_data_manager {
 	struct fw_memory wdog_mem;
 	struct fw_memory core_telemetry_mem;
 	struct fw_memory debug_dump_mem;
+
+	/*
+	 * A host-view of the System configuration descriptor. This same desc
+	 * is provided to all VDs and all cores. This is the R/O section.
+	 */
+	struct gxp_system_descriptor_ro *sys_desc_ro;
+	/*
+	 * A host-view of the System configuration descriptor. This same desc
+	 * is provided to all VDs and all cores. This is the R/W section.
+	 */
+	struct gxp_system_descriptor_rw *sys_desc_rw;
 };
 
 /* A container holding information for a single GXP application. */
 struct app_metadata {
 	struct gxp_fw_data_manager *mgr;
+	struct gxp_virtual_device *vd;
 	uint application_id;
 	uint core_count;
 	uint core_list; /* bitmap of cores allocated to this app */
@@ -463,6 +477,208 @@ static struct fw_memory init_application(struct app_metadata *app)
 	return mem;
 }
 
+static struct app_metadata *gxp_fw_data_create_app_legacy(struct gxp_dev *gxp,
+							  uint core_list)
+{
+	struct gxp_fw_data_manager *mgr = gxp->data_mgr;
+	struct app_metadata *app;
+	void *err;
+	int i;
+
+	app = kzalloc(sizeof(*app), GFP_KERNEL);
+	if (!app)
+		return ERR_PTR(-ENOMEM);
+
+	/* Create resource and memory allocations for new app */
+	app->mgr = mgr;
+	app->application_id = DEFAULT_APP_ID;
+	app->core_count = hweight_long(core_list);
+	app->core_list = core_list;
+
+	/* User doorbells */
+	app->user_doorbells_count = DEFAULT_APP_USER_DOORBELL_COUNT;
+	app->user_doorbells =
+		kcalloc(app->user_doorbells_count, sizeof(int), GFP_KERNEL);
+	if (!app->user_doorbells) {
+		err = ERR_PTR(-ENOMEM);
+		goto err_user_doorbells;
+	}
+
+	for (i = 0; i < app->user_doorbells_count; i++) {
+		range_alloc_get_any(mgr->doorbell_allocator,
+				    &app->user_doorbells[i]);
+	}
+
+	/* User sync barrier */
+	app->user_barriers_count = DEFAULT_APP_USER_BARRIER_COUNT;
+	app->user_barriers =
+		kcalloc(app->user_barriers_count, sizeof(int), GFP_KERNEL);
+	if (!app->user_barriers) {
+		err = ERR_PTR(-ENOMEM);
+		goto err_user_barriers;
+	}
+
+	for (i = 0; i < app->user_barriers_count; i++) {
+		range_alloc_get_any(mgr->sync_barrier_allocator,
+				    &app->user_barriers[i]);
+	}
+
+	/* Application region. */
+	app->app_mem = init_application(app);
+	for (i = 0; i < GXP_NUM_CORES; i++) {
+		if (core_list & BIT(i)) {
+			mgr->system_desc->app_descriptor_dev_addr[i] =
+				app->app_mem.device_addr;
+		}
+	}
+
+	return app;
+
+err_user_barriers:
+	for (i = 0; i < app->user_doorbells_count; i++)
+		range_alloc_put(mgr->doorbell_allocator,
+				app->user_doorbells[i]);
+	kfree(app->user_doorbells);
+err_user_doorbells:
+	kfree(app);
+
+	return err;
+}
+
+static void gxp_fw_data_destroy_app_legacy(struct gxp_dev *gxp,
+					   struct app_metadata *app)
+{
+	struct gxp_fw_data_manager *mgr = gxp->data_mgr;
+	int i;
+
+	for (i = 0; i < app->user_doorbells_count; i++)
+		range_alloc_put(mgr->doorbell_allocator,
+				app->user_doorbells[i]);
+	kfree(app->user_doorbells);
+
+	for (i = 0; i < app->user_barriers_count; i++)
+		range_alloc_put(mgr->sync_barrier_allocator,
+				app->user_barriers[i]);
+	kfree(app->user_barriers);
+
+	mem_alloc_free(mgr->allocator, &app->user_mem);
+	mem_alloc_free(mgr->allocator, &app->doorbells_mem);
+	mem_alloc_free(mgr->allocator, &app->sync_barriers_mem);
+	mem_alloc_free(mgr->allocator, &app->semaphores_mem);
+	mem_alloc_free(mgr->allocator, &app->cores_mem);
+	for (i = 0; i < app->core_count; i++) {
+		mem_alloc_free(mgr->allocator, &app->core_cmd_queues_mem[i]);
+		mem_alloc_free(mgr->allocator, &app->core_rsp_queues_mem[i]);
+	}
+	mem_alloc_free(mgr->allocator, &app->app_mem);
+
+	kfree(app);
+}
+
+/*
+ * Here assumes sys_cfg contains gxp_system_descriptor_ro in the first page and
+ * gxp_system_descriptor_rw in the second page.
+ */
+static void set_system_cfg_region(struct gxp_dev *gxp, void *sys_cfg)
+{
+	struct gxp_system_descriptor_ro *des_ro = sys_cfg;
+	struct gxp_system_descriptor_rw *des_rw = sys_cfg + PAGE_SIZE;
+	struct gxp_core_telemetry_descriptor *descriptor =
+		gxp->data_mgr->core_telemetry_mem.host_addr;
+	struct telemetry_descriptor_ro *tel_ro;
+	struct telemetry_descriptor_rw *tel_rw;
+	struct core_telemetry_descriptor *tel_des;
+	int i;
+
+	if (gxp->debug_dump_mgr)
+		des_ro->debug_dump_dev_addr = gxp->debug_dump_mgr->buf.dsp_addr;
+	else
+		des_ro->debug_dump_dev_addr = 0;
+
+#define COPY_FIELDS                                                            \
+	do {                                                                   \
+		tel_ro->host_status = tel_des->host_status;                    \
+		tel_ro->buffer_addr = tel_des->buffer_addr;                    \
+		tel_ro->buffer_size = tel_des->buffer_size;                    \
+		tel_rw->device_status = tel_des->device_status;                \
+		tel_rw->data_available = tel_des->watermark_level;             \
+	} while (0)
+	for (i = 0; i < GXP_NUM_CORES; i++) {
+		tel_ro = &des_ro->telemetry_desc.per_core_loggers[i];
+		tel_rw = &des_rw->telemetry_desc.per_core_loggers[i];
+		tel_des = &descriptor->per_core_loggers[i];
+		COPY_FIELDS;
+		tel_ro = &des_ro->telemetry_desc.per_core_tracers[i];
+		tel_rw = &des_rw->telemetry_desc.per_core_tracers[i];
+		tel_des = &descriptor->per_core_tracers[i];
+		COPY_FIELDS;
+	}
+#undef COPY_FIELDS
+}
+
+static struct app_metadata *
+_gxp_fw_data_create_app(struct gxp_dev *gxp, struct gxp_virtual_device *vd)
+{
+	struct app_metadata *app;
+	struct gxp_host_control_region *core_cfg;
+	struct gxp_job_descriptor job;
+	struct gxp_vd_descriptor *vd_desc;
+	int i;
+
+	/*
+	 * If we are able to know where sys_cfg's virt is on init() then we
+	 * don't need this here, but to keep compatibility with
+	 * !use_per_vd_config, we keep gxp_fw_data_init() doing the
+	 * initialization of legacy mode, and have here copy the values to the
+	 * config region.
+	 */
+	if (vd->vdid == 0)
+		set_system_cfg_region(gxp, vd->sys_cfg.vaddr);
+	app = kzalloc(sizeof(*app), GFP_KERNEL);
+	if (!app)
+		return ERR_PTR(-ENOMEM);
+
+	if (!gxp_core_boot) {
+		dev_info(gxp->dev, "Skip setting VD and core CFG");
+		return app;
+	}
+	/* Set up VD config region. */
+	vd_desc = vd->vd_cfg.vaddr;
+	vd_desc->application_id = DEFAULT_APP_ID;
+	vd_desc->vd_is_initialized = 0;
+	/* Set up core config region. */
+	job.workers_count = vd->num_cores;
+	for (i = 0; i < ARRAY_SIZE(job.worker_to_fw); i++) {
+		/*
+		 * Kernel-initiated workloads always act like the entire VD is
+		 * one giant N-core job where N is the number of cores allocated
+		 * to that VD.
+		 * The MCU, on the other hand, can have multiple jobs dispatched
+		 * to the same VD at the same time.
+		 */
+		if (i < job.workers_count)
+			job.worker_to_fw[i] = i;
+		else
+			job.worker_to_fw[i] = -1;
+	}
+	/* Give each VD a unique HW resources slot. */
+	job.hardware_resources_slot = gxp_vd_hw_slot_id(vd);
+	/* Assign the same job descriptor to all cores in this VD */
+	for (i = 0; i < GXP_NUM_CORES; i++) {
+		core_cfg = vd->core_cfg.vaddr +
+			   vd->core_cfg.size / GXP_NUM_CORES * i;
+		core_cfg->job_descriptor = job;
+	}
+
+	return app;
+}
+
+static void _gxp_fw_data_destroy_app(struct gxp_dev *gxp,
+				     struct app_metadata *app)
+{
+	kfree(app);
+}
+
 int gxp_fw_data_init(struct gxp_dev *gxp)
 {
 	struct gxp_fw_data_manager *mgr;
@@ -486,6 +702,7 @@ int gxp_fw_data_init(struct gxp_dev *gxp)
 		res = -ENODEV;
 		goto err;
 	}
+	gxp->fwdatabuf.vaddr = mgr->fw_data_virt;
 
 	/* Instantiate the doorbells allocator with all doorbells */
 	mgr->doorbell_allocator =
@@ -607,101 +824,31 @@ err:
 	return res;
 }
 
-void *gxp_fw_data_create_app(struct gxp_dev *gxp, uint core_list)
+void *gxp_fw_data_create_app(struct gxp_dev *gxp, struct gxp_virtual_device *vd)
 {
-	struct gxp_fw_data_manager *mgr = gxp->data_mgr;
 	struct app_metadata *app;
-	void *err;
-	int i;
-
-	app = kzalloc(sizeof(struct app_metadata), GFP_KERNEL);
-	if (!app)
-		return ERR_PTR(-ENOMEM);
-
-	/* Create resource and memory allocations for new app */
-	app->mgr = mgr;
-	app->application_id = DEFAULT_APP_ID;
-	app->core_count = hweight_long(core_list);
-	app->core_list = core_list;
 
-	/* User doorbells */
-	app->user_doorbells_count = DEFAULT_APP_USER_DOORBELL_COUNT;
-	app->user_doorbells =
-		kcalloc(app->user_doorbells_count, sizeof(int), GFP_KERNEL);
-	if (!app->user_doorbells) {
-		err = ERR_PTR(-ENOMEM);
-		goto err_user_doorbells;
-	}
-
-	for (i = 0; i < app->user_doorbells_count; i++) {
-		range_alloc_get_any(mgr->doorbell_allocator,
-				    &app->user_doorbells[i]);
-	}
-
-	/* User sync barrier */
-	app->user_barriers_count = DEFAULT_APP_USER_BARRIER_COUNT;
-	app->user_barriers =
-		kcalloc(app->user_barriers_count, sizeof(int), GFP_KERNEL);
-	if (!app->user_barriers) {
-		err = ERR_PTR(-ENOMEM);
-		goto err_user_barriers;
-	}
+	if (gxp_fw_data_use_per_vd_config(vd))
+		app = _gxp_fw_data_create_app(gxp, vd);
+	else
+		app = gxp_fw_data_create_app_legacy(gxp, vd->core_list);
 
-	for (i = 0; i < app->user_barriers_count; i++) {
-		range_alloc_get_any(mgr->sync_barrier_allocator,
-				    &app->user_barriers[i]);
-	}
-
-	/* Application region. */
-	app->app_mem = init_application(app);
-	for (i = 0; i < GXP_NUM_CORES; i++) {
-		if (core_list & BIT(i)) {
-			mgr->system_desc->app_descriptor_dev_addr[i] =
-				app->app_mem.device_addr;
-		}
-	}
+	if (IS_ERR(app))
+		return app;
+	app->vd = vd;
 
 	return app;
-
-err_user_barriers:
-	for (i = 0; i < app->user_doorbells_count; i++)
-		range_alloc_put(mgr->doorbell_allocator,
-				app->user_doorbells[i]);
-	kfree(app->user_doorbells);
-err_user_doorbells:
-	kfree(app);
-
-	return err;
 }
 
 void gxp_fw_data_destroy_app(struct gxp_dev *gxp, void *application)
 {
 	struct app_metadata *app = application;
-	struct gxp_fw_data_manager *mgr = gxp->data_mgr;
-	int i;
-
-	for (i = 0; i < app->user_doorbells_count; i++)
-		range_alloc_put(mgr->doorbell_allocator,
-				app->user_doorbells[i]);
-	kfree(app->user_doorbells);
-
-	for (i = 0; i < app->user_barriers_count; i++)
-		range_alloc_put(mgr->sync_barrier_allocator,
-				app->user_barriers[i]);
-	kfree(app->user_barriers);
-
-	mem_alloc_free(mgr->allocator, &app->user_mem);
-	mem_alloc_free(mgr->allocator, &app->doorbells_mem);
-	mem_alloc_free(mgr->allocator, &app->sync_barriers_mem);
-	mem_alloc_free(mgr->allocator, &app->semaphores_mem);
-	mem_alloc_free(mgr->allocator, &app->cores_mem);
-	for (i = 0; i < app->core_count; i++) {
-		mem_alloc_free(mgr->allocator, &app->core_cmd_queues_mem[i]);
-		mem_alloc_free(mgr->allocator, &app->core_rsp_queues_mem[i]);
-	}
-	mem_alloc_free(mgr->allocator, &app->app_mem);
 
-	kfree(app);
+	if (!app)
+		return;
+	if (gxp_fw_data_use_per_vd_config(app->vd))
+		return _gxp_fw_data_destroy_app(gxp, app);
+	return gxp_fw_data_destroy_app_legacy(gxp, app);
 }
 
 void gxp_fw_data_destroy(struct gxp_dev *gxp)
diff --git a/gxp-firmware-data.h b/gxp-firmware-data.h
index a947cb8..e2296bd 100644
--- a/gxp-firmware-data.h
+++ b/gxp-firmware-data.h
@@ -11,6 +11,12 @@
 
 #include "gxp-dma.h"
 #include "gxp-internal.h"
+#include "gxp-vd.h"
+
+enum gxp_fw_data_protocol {
+	/* Use the per-VD configuration region. */
+	FW_DATA_PROTOCOL_PER_VD_CONFIG = 2,
+};
 
 /**
  * gxp_fw_data_init() - Initializes the FW data manager submodule.
@@ -30,14 +36,15 @@ int gxp_fw_data_init(struct gxp_dev *gxp);
  *                            virtual device) used by the specified physical
  *                            cores.
  * @gxp: The parent GXP device
- * @core_list: A bitmap of the physical cores used in this application
+ * @vd: The virtual device this app is being created for
  *
  * Return:
  * ptr     - A pointer of the newly created application handle, an error pointer
  *           (PTR_ERR) otherwise.
  * -ENOMEM - Insufficient memory to create the application
  */
-void *gxp_fw_data_create_app(struct gxp_dev *gxp, uint core_list);
+void *gxp_fw_data_create_app(struct gxp_dev *gxp,
+			     struct gxp_virtual_device *vd);
 
 /**
  * gxp_fw_data_destroy_app() - Deallocates the HW and memory resources used by
@@ -94,4 +101,9 @@ int gxp_fw_data_set_core_telemetry_descriptors(struct gxp_dev *gxp, u8 type,
 u32 gxp_fw_data_get_core_telemetry_device_status(struct gxp_dev *gxp, uint core,
 						 u8 type);
 
+static inline bool gxp_fw_data_use_per_vd_config(struct gxp_virtual_device *vd)
+{
+	return vd->config_version >= FW_DATA_PROTOCOL_PER_VD_CONFIG;
+}
+
 #endif /* __GXP_FIRMWARE_DATA_H__ */
diff --git a/gxp-firmware.c b/gxp-firmware.c
index fcf6a6f..32d5dc3 100644
--- a/gxp-firmware.c
+++ b/gxp-firmware.c
@@ -16,11 +16,16 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
+#include <gcip/gcip-alloc-helper.h>
+#include <gcip/gcip-common-image-header.h>
+#include <gcip/gcip-image-config.h>
+
 #include "gxp-bpm.h"
 #include "gxp-config.h"
 #include "gxp-core-telemetry.h"
 #include "gxp-debug-dump.h"
 #include "gxp-doorbell.h"
+#include "gxp-firmware-data.h"
 #include "gxp-firmware.h"
 #include "gxp-host-device-structs.h"
 #include "gxp-internal.h"
@@ -34,15 +39,47 @@
 #include "unittests/factory/fake-gxp-firmware.h"
 #endif
 
-#define FW_HEADER_SIZE		(0x1000)
-#define FW_IMAGE_TYPE_OFFSET	(0x400)
+#define FW_HEADER_SIZE		GCIP_FW_HEADER_SIZE
 
 static int gxp_dsp_fw_auth_disable;
 module_param_named(dsp_fw_auth_disable, gxp_dsp_fw_auth_disable, int, 0660);
 
-static bool gxp_core_boot = true;
+bool gxp_core_boot = true;
 module_param_named(core_boot, gxp_core_boot, bool, 0660);
 
+/*
+ * Fetches and records image config of the first firmware.
+ */
+static void gxp_firmware_get_image_config(struct gxp_dev *gxp,
+					  struct gxp_firmware_manager *mgr)
+{
+	struct gcip_common_image_header *hdr =
+		(struct gcip_common_image_header *)mgr->firmwares[0]->data;
+	struct gcip_image_config *cfg;
+
+	if (unlikely(mgr->firmwares[0]->size < FW_HEADER_SIZE))
+		return;
+	cfg = get_image_config_from_hdr(hdr);
+	if (cfg)
+		mgr->img_cfg = *cfg;
+	else
+		dev_warn(gxp->dev,
+			 "Firmware doesn't have a valid image config");
+}
+
+/*
+ * Call this function when mgr->firmwares have been populated.
+ * This function sets is_firmware_requested to true.
+ *
+ * Caller holds mgr->dsp_firmware_lock.
+ */
+static void gxp_firmware_has_requested(struct gxp_dev *gxp,
+				       struct gxp_firmware_manager *mgr)
+{
+	gxp_firmware_get_image_config(gxp, mgr);
+	mgr->is_firmware_requested = true;
+}
+
 static int
 request_dsp_firmware(struct gxp_dev *gxp, char *name_prefix,
 		     const struct firmware *out_firmwares[GXP_NUM_CORES])
@@ -97,25 +134,67 @@ static int elf_load_segments(struct gxp_dev *gxp, const u8 *elf_data,
 	ehdr = (struct elf32_hdr *)elf_data;
 	phdr = (struct elf32_phdr *)(elf_data + ehdr->e_phoff);
 
+	if ((ehdr->e_ident[EI_MAG0] != ELFMAG0) ||
+	    (ehdr->e_ident[EI_MAG1] != ELFMAG1) ||
+	    (ehdr->e_ident[EI_MAG2] != ELFMAG2) ||
+	    (ehdr->e_ident[EI_MAG3] != ELFMAG3)) {
+		dev_err(gxp->dev, "Invalid ELF format.");
+		return -EINVAL;
+	}
+
 	/* go through the available ELF segments */
 	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
 		const u64 da = phdr->p_paddr;
 		const u32 memsz = phdr->p_memsz;
 		const u32 filesz = phdr->p_filesz;
+		const u32 offset = phdr->p_offset;
+		const u32 p_flags = phdr->p_flags;
 		void *ptr;
 
-		if (phdr->p_type != PT_LOAD || !phdr->p_flags || !memsz)
+		if (phdr->p_type != PT_LOAD)
+			continue;
+
+		if (!phdr->p_flags)
+			continue;
+
+		if (!memsz)
 			continue;
 
 		if (!(da >= buffer->daddr &&
-		      da + memsz <= buffer->daddr + buffer->size))
+		      da + memsz <= buffer->daddr + buffer->size)) {
+			/*
+			 * Some BSS data may be referenced from TCM, and can be
+			 * skipped while loading
+			 */
+			dev_err(gxp->dev,
+				"Segment out of bounds: da %#llx mem %#x. Skipping...",
+				da, memsz);
 			continue;
+		}
+
+		dev_info(gxp->dev,
+			 "phdr: da %#llx memsz %#x filesz %#x perm %d", da,
+			 memsz, filesz, p_flags);
+
+		if (filesz > memsz) {
+			dev_err(gxp->dev, "Bad phdr filesz %#x memsz %#x",
+				filesz, memsz);
+			ret = -EINVAL;
+			break;
+		}
+
+		if (offset + filesz > size) {
+			dev_err(gxp->dev, "Truncated fw: need %#x avail %#zx",
+				offset + filesz, size);
+			ret = -EINVAL;
+			break;
+		}
 
 		/* grab the kernel address for this device address */
 		ptr = buffer->vaddr + (da - buffer->daddr);
 		if (!ptr) {
-			dev_err(gxp->dev, "Bad phdr: da 0x%llx mem 0x%x\n",
-				da, memsz);
+			dev_err(gxp->dev, "Bad phdr: da %#llx mem %#x", da,
+				memsz);
 			ret = -EINVAL;
 			break;
 		}
@@ -232,137 +311,91 @@ error:
 	return ret;
 }
 
-static int gxp_firmware_fetch_boundary(struct gxp_dev *gxp, const u8 *elf_data,
-				       size_t size,
-				       const struct gxp_mapped_resource *buffer,
-				       dma_addr_t *boundary_ptr)
-{
-	struct elf32_hdr *ehdr = (struct elf32_hdr *)elf_data;
-	struct elf32_phdr *phdr = (struct elf32_phdr *)(elf_data + ehdr->e_phoff);
-	int i, ret = 0;
-	dma_addr_t boundary = 0;
-
-	if ((ehdr->e_ident[EI_MAG0] != ELFMAG0) ||
-	    (ehdr->e_ident[EI_MAG1] != ELFMAG1) ||
-	    (ehdr->e_ident[EI_MAG2] != ELFMAG2) ||
-	    (ehdr->e_ident[EI_MAG3] != ELFMAG3)) {
-		dev_err(gxp->dev, "Invalid ELF format.");
-		return -EINVAL;
-	}
-
-	/* go through the available ELF segments */
-	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
-		const u64 da = phdr->p_paddr;
-		const u32 memsz = phdr->p_memsz;
-		const u32 filesz = phdr->p_filesz;
-		const u32 offset = phdr->p_offset;
-		const u32 p_flags = phdr->p_flags;
-
-		if (phdr->p_type != PT_LOAD || !p_flags || !memsz)
-			continue;
-
-		if (!(da >= buffer->daddr &&
-		      da + memsz <= buffer->daddr + buffer->size)) {
-			/*
-			 * Some BSS data may be referenced from TCM, and can be
-			 * skipped while loading
-			 */
-			dev_err(gxp->dev, "Segment out of bounds: da 0x%llx mem 0x%x. Skipping...",
-				da, memsz);
-			continue;
-		}
-
-		dev_info(gxp->dev,
-			 "phdr: da %#llx memsz %#x filesz %#x perm %d", da,
-			 memsz, filesz, p_flags);
-
-		if (filesz > memsz) {
-			dev_err(gxp->dev, "Bad phdr filesz %#x memsz %#x",
-				filesz, memsz);
-			ret = -EINVAL;
-			break;
-		}
-
-		if (offset + filesz > size) {
-			dev_err(gxp->dev, "Truncated fw: need %#x avail %#zx",
-				offset + filesz, size);
-			ret = -EINVAL;
-			break;
-		}
-		if (p_flags & PF_W) {
-			if (!boundary)
-				boundary = da;
-		} else if (boundary) {
-			dev_err(gxp->dev,
-				"Found RO region after a writable segment");
-			ret = -EINVAL;
-			break;
-		}
-	}
-	/* no boundary has been found - assume the whole image is RO */
-	if (!boundary)
-		boundary = buffer->daddr + buffer->size;
-	if (!ret)
-		*boundary_ptr = boundary;
-
-	return ret;
-}
-
-/*
- * Sets @rw_boundaries by analyzing LOAD segments in ELF headers.
- *
- * Assumes the LOAD segments are arranged with RO first then RW. Returns -EINVAL
- * if this is not true.
- */
-static int gxp_firmware_fetch_boundaries(struct gxp_dev *gxp,
-					 struct gxp_firmware_manager *mgr)
-{
-	int core, ret;
-
-	for (core = 0; core < GXP_NUM_CORES; core++) {
-		ret = gxp_firmware_fetch_boundary(
-			gxp, mgr->firmwares[core]->data + FW_HEADER_SIZE,
-			mgr->firmwares[core]->size - FW_HEADER_SIZE,
-			&gxp->fwbufs[core], &mgr->rw_boundaries[core]);
-		if (ret) {
-			dev_err(gxp->dev,
-				"failed to fetch boundary of core %d: %d", core,
-				ret);
-			goto error;
-		}
-	}
-	return 0;
-
-error:
-	memset(mgr->rw_boundaries, 0, sizeof(mgr->rw_boundaries));
-	return ret;
-}
-
 /* Forward declaration for usage inside gxp_firmware_load(..). */
 static void gxp_firmware_unload(struct gxp_dev *gxp, uint core);
 
-static void gxp_program_reset_vector(struct gxp_dev *gxp, uint core, bool verbose)
+static void gxp_program_reset_vector(struct gxp_dev *gxp, uint core,
+				     uint phys_core, bool verbose)
 {
 	u32 reset_vec;
 
-	reset_vec = gxp_read_32(gxp, GXP_CORE_REG_ALT_RESET_VECTOR(core));
+	reset_vec = gxp_read_32(gxp, GXP_CORE_REG_ALT_RESET_VECTOR(phys_core));
 	if (verbose)
 		dev_notice(gxp->dev,
 			   "Current Aurora reset vector for core %u: 0x%x\n",
-			   core, reset_vec);
-	gxp_write_32(gxp, GXP_CORE_REG_ALT_RESET_VECTOR(core),
+			   phys_core, reset_vec);
+	gxp_write_32(gxp, GXP_CORE_REG_ALT_RESET_VECTOR(phys_core),
 		     gxp->firmware_mgr->entry_points[core]);
 	if (verbose)
 		dev_notice(gxp->dev,
 			   "New Aurora reset vector for core %u: 0x%x\n",
-			   core, gxp->firmware_mgr->entry_points[core]);
+			   phys_core, gxp->firmware_mgr->entry_points[core]);
 }
 
-static int gxp_firmware_load(struct gxp_dev *gxp, uint core)
+static void *get_scratchpad_base(struct gxp_dev *gxp,
+				 struct gxp_virtual_device *vd, uint core)
+{
+	void *mem;
+	size_t rw_size;
+
+	if (vd && gxp_fw_data_use_per_vd_config(vd))
+		return vd->core_cfg.vaddr +
+		       (vd->core_cfg.size / GXP_NUM_CORES) * core;
+
+	if (!vd || !vd->rwdata_sgt[core])
+		return gxp->fwbufs[core].vaddr + AURORA_SCRATCHPAD_OFF;
+
+	/* Return the last AURORA_SCRATCHPAD_LEN of rwdata_sgt. */
+	mem = gcip_noncontiguous_sgt_to_mem(vd->rwdata_sgt[core]);
+	rw_size = gxp->fwbufs[core].size - vd->fw_ro_size;
+	return mem + rw_size - AURORA_SCRATCHPAD_LEN;
+}
+
+/* TODO(b/265562894): remove scratchpad region support */
+static void flush_scratchpad_region(struct gxp_dev *gxp,
+				   struct gxp_virtual_device *vd, uint core)
+{
+	if (!vd || gxp_fw_data_use_per_vd_config(vd) || !vd->rwdata_sgt[core])
+		return;
+	dma_sync_sg_for_device(gxp->dev, vd->rwdata_sgt[core]->sgl,
+			       vd->rwdata_sgt[core]->orig_nents,
+			       DMA_BIDIRECTIONAL);
+}
+
+static void invalidate_scratchpad_region(struct gxp_dev *gxp,
+					struct gxp_virtual_device *vd,
+					uint core)
+{
+	if (!vd || gxp_fw_data_use_per_vd_config(vd) || !vd->rwdata_sgt[core])
+		return;
+	dma_sync_sg_for_cpu(gxp->dev, vd->rwdata_sgt[core]->sgl,
+			    vd->rwdata_sgt[core]->orig_nents,
+			    DMA_BIDIRECTIONAL);
+}
+
+static void reset_core_config_region(struct gxp_dev *gxp,
+				     struct gxp_virtual_device *vd, uint core)
+{
+	struct gxp_host_control_region *core_cfg;
+
+	core_cfg = get_scratchpad_base(gxp, vd, core);
+	if (gxp_fw_data_use_per_vd_config(vd)) {
+		core_cfg->core_alive_magic = 0;
+		core_cfg->top_access_ok = 0;
+		core_cfg->boot_status = GXP_BOOT_STATUS_NONE;
+		gxp_firmware_set_boot_mode(gxp, vd, core,
+					   GXP_BOOT_MODE_COLD_BOOT);
+	} else {
+		memset(core_cfg, 0, AURORA_SCRATCHPAD_LEN);
+		gxp_firmware_set_boot_mode(gxp, vd, core,
+					   GXP_BOOT_MODE_REQUEST_COLD_BOOT);
+	}
+}
+
+static int gxp_firmware_load(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
+			     uint core)
 {
 	struct gxp_firmware_manager *mgr = gxp->firmware_mgr;
-	u32 offset;
-	void __iomem *core_scratchpad_base;
 	int ret;
 
 	if (!mgr->firmwares[core])
@@ -382,24 +415,11 @@ static int gxp_firmware_load(struct gxp_dev *gxp, uint core)
 			      mgr->firmwares[core]->data + FW_HEADER_SIZE,
 			      core);
 
-	memset(gxp->fwbufs[core].vaddr + AURORA_SCRATCHPAD_OFF, 0,
-	       AURORA_SCRATCHPAD_LEN);
-
-	core_scratchpad_base = gxp->fwbufs[core].vaddr + AURORA_SCRATCHPAD_OFF;
-	offset = SCRATCHPAD_MSG_OFFSET(MSG_CORE_ALIVE);
-	writel(0, core_scratchpad_base + offset);
-	offset = SCRATCHPAD_MSG_OFFSET(MSG_TOP_ACCESS_OK);
-	writel(0, core_scratchpad_base + offset);
-
 	/* TODO(b/188970444): Cleanup logging of addresses */
 	dev_notice(gxp->dev,
 		   "ELF loaded at virtual: %pK and physical: 0x%llx\n",
 		   gxp->fwbufs[core].vaddr, gxp->fwbufs[core].paddr);
 
-	/* Configure bus performance monitors */
-	gxp_bpm_configure(gxp, core, INST_BPM_OFFSET, BPM_EVENT_READ_XFER);
-	gxp_bpm_configure(gxp, core, DATA_BPM_OFFSET, BPM_EVENT_WRITE_XFER);
-
 	return 0;
 
 out_firmware_unload:
@@ -407,18 +427,20 @@ out_firmware_unload:
 	return ret;
 }
 
-static int gxp_firmware_handshake(struct gxp_dev *gxp, uint core)
+static int gxp_firmware_handshake(struct gxp_dev *gxp,
+				  struct gxp_virtual_device *vd, uint core,
+				  uint phys_core)
 {
-	u32 offset;
 	u32 __maybe_unused expected_top_value;
-	void __iomem *core_scratchpad_base;
+	/* Prevent the read loop below from being optimized. */
+	volatile struct gxp_host_control_region *core_cfg;
 	int ctr;
 
 	/* Wait for core to come up */
-	dev_notice(gxp->dev, "Waiting for core %u to power up...\n", core);
+	dev_notice(gxp->dev, "Waiting for core %u to power up...\n", phys_core);
 	ctr = 1000;
 	while (ctr) {
-		if (gxp_lpm_is_powered(gxp, CORE_TO_PSM(core)))
+		if (gxp_lpm_is_powered(gxp, CORE_TO_PSM(phys_core)))
 			break;
 		udelay(1 * GXP_TIME_DELAY_FACTOR);
 		ctr--;
@@ -432,9 +454,9 @@ static int gxp_firmware_handshake(struct gxp_dev *gxp, uint core)
 
 	/* Wait for 500ms. Then check if Q7 core is alive */
 	dev_notice(gxp->dev, "Waiting for core %u to respond...\n",
-		   core);
+		   phys_core);
 
-	core_scratchpad_base = gxp->fwbufs[core].vaddr + AURORA_SCRATCHPAD_OFF;
+	core_cfg = get_scratchpad_base(gxp, vd, core);
 
 	/*
 	 * Currently, the hello_world FW writes a magic number
@@ -442,7 +464,6 @@ static int gxp_firmware_handshake(struct gxp_dev *gxp, uint core)
 	 * space as an alive message
 	 */
 	ctr = 5000;
-	offset = SCRATCHPAD_MSG_OFFSET(MSG_CORE_ALIVE);
 #if IS_ENABLED(CONFIG_GXP_TEST)
 	fake_gxp_firmware_flush_work_all();
 	/*
@@ -454,16 +475,18 @@ static int gxp_firmware_handshake(struct gxp_dev *gxp, uint core)
 #endif
 	usleep_range(50 * GXP_TIME_DELAY_FACTOR, 60 * GXP_TIME_DELAY_FACTOR);
 	while (ctr--) {
-		if (readl(core_scratchpad_base + offset) == Q7_ALIVE_MAGIC)
+		invalidate_scratchpad_region(gxp, vd, core);
+		if (core_cfg->core_alive_magic == Q7_ALIVE_MAGIC)
 			break;
 		usleep_range(1 * GXP_TIME_DELAY_FACTOR,
 			     10 * GXP_TIME_DELAY_FACTOR);
 	}
-	if (readl(core_scratchpad_base + offset) != Q7_ALIVE_MAGIC) {
-		dev_err(gxp->dev, "Core %u did not respond!\n", core);
+	invalidate_scratchpad_region(gxp, vd, core);
+	if (core_cfg->core_alive_magic != Q7_ALIVE_MAGIC) {
+		dev_err(gxp->dev, "Core %u did not respond!\n", phys_core);
 		return -EIO;
 	}
-	dev_notice(gxp->dev, "Core %u is alive!\n", core);
+	dev_notice(gxp->dev, "Core %u is alive!\n", phys_core);
 
 #if !IS_ENABLED(CONFIG_GXP_GEM5)
 	/*
@@ -477,26 +500,27 @@ static int gxp_firmware_handshake(struct gxp_dev *gxp, uint core)
 	 * handshakes in Gem5.
 	 */
 	ctr = 1000;
-	offset = SCRATCHPAD_MSG_OFFSET(MSG_TOP_ACCESS_OK);
-	expected_top_value = BIT(CORE_WAKEUP_DOORBELL(core));
+	expected_top_value = BIT(CORE_WAKEUP_DOORBELL(phys_core));
 	while (ctr--) {
-		if (readl(core_scratchpad_base + offset) == expected_top_value)
+		invalidate_scratchpad_region(gxp, vd, core);
+		if (core_cfg->top_access_ok == expected_top_value)
 			break;
 		udelay(1 * GXP_TIME_DELAY_FACTOR);
 	}
-	if (readl(core_scratchpad_base + offset) != expected_top_value) {
-		dev_err(gxp->dev, "TOP access from core %u failed!\n", core);
+	if (core_cfg->top_access_ok != expected_top_value) {
+		dev_err(gxp->dev, "TOP access from core %u failed!\n", phys_core);
 		return -EIO;
 	}
-	dev_notice(gxp->dev, "TOP access from core %u successful!\n", core);
+	dev_notice(gxp->dev, "TOP access from core %u successful!\n", phys_core);
 #endif
 
 	/* Stop bus performance monitors */
-	gxp_bpm_stop(gxp, core);
+	gxp_bpm_stop(gxp, phys_core);
 	dev_notice(gxp->dev, "Core%u Instruction read transactions: 0x%x\n",
-		   core, gxp_bpm_read_counter(gxp, core, INST_BPM_OFFSET));
-	dev_notice(gxp->dev, "Core%u Data write transactions: 0x%x\n", core,
-		   gxp_bpm_read_counter(gxp, core, DATA_BPM_OFFSET));
+		   core, gxp_bpm_read_counter(gxp, phys_core, INST_BPM_OFFSET));
+	dev_notice(gxp->dev, "Core%u Data write transactions: 0x%x\n",
+		   phys_core,
+		   gxp_bpm_read_counter(gxp, phys_core, DATA_BPM_OFFSET));
 
 	return 0;
 }
@@ -597,20 +621,14 @@ static ssize_t load_dsp_firmware_store(struct device *dev,
 		mgr->firmwares[core] = firmwares[core];
 	}
 
-	ret = gxp_firmware_fetch_boundaries(gxp, mgr);
-	if (ret)
-		goto err_fetch_boundaries;
-
 	kfree(mgr->firmware_name);
 	mgr->firmware_name = name_buf;
+	gxp_firmware_has_requested(gxp, mgr);
 
 	mutex_unlock(&mgr->dsp_firmware_lock);
 	up_read(&gxp->vd_semaphore);
 	return count;
 
-err_fetch_boundaries:
-	for (core = 0; core < GXP_NUM_CORES; core++)
-		mgr->firmwares[core] = NULL;
 err_authenticate_firmware:
 	for (core = 0; core < GXP_NUM_CORES; core++)
 		release_firmware(firmwares[core]);
@@ -776,11 +794,7 @@ int gxp_firmware_request_if_needed(struct gxp_dev *gxp)
 	if (ret)
 		goto err_authenticate_firmware;
 
-	ret = gxp_firmware_fetch_boundaries(gxp, mgr);
-	if (ret)
-		goto err_authenticate_firmware;
-
-	mgr->is_firmware_requested = true;
+	gxp_firmware_has_requested(gxp, mgr);
 
 out:
 	mutex_unlock(&mgr->dsp_firmware_lock);
@@ -813,37 +827,49 @@ static void disable_core_interrupts(struct gxp_dev *gxp, uint core)
 	gxp_write_32(gxp, GXP_CORE_REG_DEDICATED_INT_MASK(core), 0);
 }
 
-static int gxp_firmware_setup(struct gxp_dev *gxp, uint core)
+static inline uint select_core(struct gxp_virtual_device *vd, uint virt_core,
+			       uint phys_core)
+{
+	return gxp_fw_data_use_per_vd_config(vd) ? virt_core : phys_core;
+}
+
+static int gxp_firmware_setup(struct gxp_dev *gxp,
+			      struct gxp_virtual_device *vd, uint core,
+			      uint phys_core)
 {
 	int ret = 0;
 	struct gxp_firmware_manager *mgr = gxp->firmware_mgr;
 
-	if (mgr->firmware_running & BIT(core)) {
+	if (gxp_core_boot && mgr->firmware_running & BIT(phys_core)) {
 		dev_err(gxp->dev, "Firmware is already running on core %u\n",
-			core);
+			phys_core);
 		return -EBUSY;
 	}
 
-	ret = gxp_firmware_load(gxp, core);
+	ret = gxp_firmware_load(gxp, vd, core);
 	if (ret) {
-		dev_err(gxp->dev, "Failed to load firmware on core %u\n", core);
+		dev_err(gxp->dev, "Failed to load firmware on core %u\n",
+			phys_core);
 		return ret;
 	}
+	/* Configure bus performance monitors */
+	gxp_bpm_configure(gxp, phys_core, INST_BPM_OFFSET, BPM_EVENT_READ_XFER);
+	gxp_bpm_configure(gxp, phys_core, DATA_BPM_OFFSET, BPM_EVENT_WRITE_XFER);
 
 	/* Mark this as a cold boot */
-	if (gxp_core_boot)
-		gxp_firmware_set_boot_mode(gxp, core,
-					   GXP_BOOT_MODE_REQUEST_COLD_BOOT);
-
-	ret = gxp_firmware_setup_hw_after_block_off(gxp, core,
-						    /*verbose=*/true);
-	if (ret) {
-		dev_err(gxp->dev, "Failed to power up core %u\n", core);
-		gxp_firmware_unload(gxp, core);
-		return ret;
+	if (gxp_core_boot) {
+		reset_core_config_region(gxp, vd, core);
+		ret = gxp_firmware_setup_hw_after_block_off(gxp, core,
+							    phys_core,
+							    /*verbose=*/true);
+		if (ret) {
+			dev_err(gxp->dev, "Failed to power up core %u\n", core);
+			gxp_firmware_unload(gxp, core);
+			return ret;
+		}
 	}
 
-	enable_core_interrupts(gxp, core);
+	enable_core_interrupts(gxp, phys_core);
 	return ret;
 }
 
@@ -865,82 +891,85 @@ static void gxp_firmware_wakeup_cores(struct gxp_dev *gxp, uint core_list)
 
 static int gxp_firmware_finish_startup(struct gxp_dev *gxp,
 				       struct gxp_virtual_device *vd,
-				       uint virt_core, uint core)
+				       uint virt_core, uint phys_core)
 {
 	struct work_struct *work;
 	struct gxp_firmware_manager *mgr = gxp->firmware_mgr;
 	int ret = 0;
+	uint core = select_core(vd, virt_core, phys_core);
 
 	if (gxp_core_boot) {
-		ret = gxp_firmware_handshake(gxp, core);
+		ret = gxp_firmware_handshake(gxp, vd, core, phys_core);
 		if (ret) {
 			dev_err(gxp->dev,
-				"Firmware handshake failed on core %u\n", core);
+				"Firmware handshake failed on core %u\n",
+				phys_core);
 			goto err_firmware_off;
 		}
 
 		/* Initialize mailbox */
 		if (gxp->mailbox_mgr->allocate_mailbox) {
-			gxp->mailbox_mgr->mailboxes[core] =
+			gxp->mailbox_mgr->mailboxes[phys_core] =
 				gxp->mailbox_mgr->allocate_mailbox(
-					gxp->mailbox_mgr, vd, virt_core, core);
-			if (IS_ERR(gxp->mailbox_mgr->mailboxes[core])) {
+					gxp->mailbox_mgr, vd, virt_core, phys_core);
+			if (IS_ERR(gxp->mailbox_mgr->mailboxes[phys_core])) {
 				dev_err(gxp->dev,
 					"Unable to allocate mailbox (core=%u, ret=%ld)\n",
-					core,
+					phys_core,
 					PTR_ERR(gxp->mailbox_mgr
-							->mailboxes[core]));
+							->mailboxes[phys_core]));
 				ret = PTR_ERR(
-					gxp->mailbox_mgr->mailboxes[core]);
-				gxp->mailbox_mgr->mailboxes[core] = NULL;
+					gxp->mailbox_mgr->mailboxes[phys_core]);
+				gxp->mailbox_mgr->mailboxes[phys_core] = NULL;
 				goto err_firmware_off;
 			}
 		}
+		mgr->firmware_running |= BIT(phys_core);
 	}
 
-	work = gxp_debug_dump_get_notification_handler(gxp, core);
+	work = gxp_debug_dump_get_notification_handler(gxp, phys_core);
 	if (work)
 		gxp_notification_register_handler(
-			gxp, core, HOST_NOTIF_DEBUG_DUMP_READY, work);
+			gxp, phys_core, HOST_NOTIF_DEBUG_DUMP_READY, work);
 
-	work = gxp_core_telemetry_get_notification_handler(gxp, core);
+	work = gxp_core_telemetry_get_notification_handler(gxp, phys_core);
 	if (work)
 		gxp_notification_register_handler(
-			gxp, core, HOST_NOTIF_CORE_TELEMETRY_STATUS, work);
-
-	mgr->firmware_running |= BIT(core);
+			gxp, phys_core, HOST_NOTIF_CORE_TELEMETRY_STATUS, work);
 
 	return ret;
 
 err_firmware_off:
 	if (gxp_core_boot)
-		gxp_pm_core_off(gxp, core);
+		gxp_pm_core_off(gxp, phys_core);
 	gxp_firmware_unload(gxp, core);
 	return ret;
 }
 
 static void gxp_firmware_stop_core(struct gxp_dev *gxp,
 				   struct gxp_virtual_device *vd,
-				   uint virt_core, uint core)
+				   uint virt_core, uint phys_core)
 {
 	struct gxp_firmware_manager *mgr = gxp->firmware_mgr;
 
-	if (!(mgr->firmware_running & BIT(core)))
-		dev_err(gxp->dev, "Firmware is not running on core %u\n", core);
+	if (gxp_core_boot && !(mgr->firmware_running & BIT(phys_core)))
+		dev_err(gxp->dev, "Firmware is not running on core %u\n",
+			phys_core);
 
-	mgr->firmware_running &= ~BIT(core);
+	mgr->firmware_running &= ~BIT(phys_core);
 
-	gxp_notification_unregister_handler(gxp, core,
+	gxp_notification_unregister_handler(gxp, phys_core,
 					    HOST_NOTIF_DEBUG_DUMP_READY);
-	gxp_notification_unregister_handler(gxp, core,
+	gxp_notification_unregister_handler(gxp, phys_core,
 					    HOST_NOTIF_CORE_TELEMETRY_STATUS);
 
 	if (gxp_core_boot) {
 		if (gxp->mailbox_mgr->release_mailbox) {
 			gxp->mailbox_mgr->release_mailbox(
 				gxp->mailbox_mgr, vd, virt_core,
-				gxp->mailbox_mgr->mailboxes[core]);
-			dev_notice(gxp->dev, "Mailbox %u released\n", core);
+				gxp->mailbox_mgr->mailboxes[phys_core]);
+			dev_notice(gxp->dev, "Mailbox %u released\n",
+				   phys_core);
 		}
 
 		if (vd->state == GXP_VD_RUNNING) {
@@ -948,46 +977,55 @@ static void gxp_firmware_stop_core(struct gxp_dev *gxp,
 			 * Disable interrupts to prevent cores from being woken up
 			 * unexpectedly.
 			 */
-			disable_core_interrupts(gxp, core);
-			gxp_pm_core_off(gxp, core);
+			disable_core_interrupts(gxp, phys_core);
+			gxp_pm_core_off(gxp, phys_core);
 		}
 	}
 
-	gxp_firmware_unload(gxp, core);
+	gxp_firmware_unload(gxp, select_core(vd, virt_core, phys_core));
 }
 
 int gxp_firmware_run(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
 		     uint core_list)
 {
 	int ret;
-	uint core, virt_core;
+	uint phys_core, virt_core;
 	uint failed_cores = 0;
 	int failed_ret;
 
-	for (core = 0; core < GXP_NUM_CORES; core++) {
-		if (core_list & BIT(core)) {
-			ret = gxp_firmware_setup(gxp, core);
-			if (ret) {
-				failed_cores |= BIT(core);
-				failed_ret = ret;
-				dev_err(gxp->dev, "Failed to run firmware on core %u\n",
-					core);
-			}
+	virt_core = 0;
+	for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
+		uint core = select_core(vd, virt_core, phys_core);
+
+		if (!(core_list & BIT(phys_core)))
+			continue;
+
+		ret = gxp_firmware_setup(gxp, vd, core, phys_core);
+		if (ret) {
+			failed_cores |= BIT(phys_core);
+			failed_ret = ret;
+			dev_err(gxp->dev, "Failed to run firmware on core %u\n",
+				phys_core);
 		}
+		virt_core++;
 	}
 	if (failed_cores != 0) {
 		/*
 		 * Shut down the cores which call `gxp_firmware_setup`
 		 * successfully
 		 */
-		for (core = 0; core < GXP_NUM_CORES; core++) {
-			if (core_list & BIT(core)) {
-				if (!(failed_cores & BIT(core))) {
-					if (gxp_core_boot)
-						gxp_pm_core_off(gxp, core);
-					gxp_firmware_unload(gxp, core);
-				}
+		virt_core = 0;
+		for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
+			uint core = select_core(vd, virt_core, phys_core);
+
+			if (!(core_list & BIT(phys_core)))
+				continue;
+			if (!(failed_cores & BIT(phys_core))) {
+				if (gxp_core_boot)
+					gxp_pm_core_off(gxp, phys_core);
+				gxp_firmware_unload(gxp, core);
 			}
+			virt_core++;
 		}
 		return failed_ret;
 	}
@@ -997,11 +1035,11 @@ int gxp_firmware_run(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
 	 * gxp_doorbell_enable_for_core here to set GXP_REG_COMMON_INT_MASK_0
 	 * first to enable the firmware handshakes.
 	 */
-	for (core = 0; core < GXP_NUM_CORES; core++) {
-		if (!(core_list & BIT(core)))
+	for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
+		if (!(core_list & BIT(phys_core)))
 			continue;
-		gxp_doorbell_enable_for_core(gxp, CORE_WAKEUP_DOORBELL(core),
-					     core);
+		gxp_doorbell_enable_for_core(
+			gxp, CORE_WAKEUP_DOORBELL(phys_core), phys_core);
 	}
 #endif
 	/* Switch clock mux to the normal state to guarantee LPM works */
@@ -1011,30 +1049,28 @@ int gxp_firmware_run(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
 	}
 
 	virt_core = 0;
-	for (core = 0; core < GXP_NUM_CORES; core++) {
-		if (core_list & BIT(core)) {
-			ret = gxp_firmware_finish_startup(gxp, vd, virt_core,
-							  core);
-			if (ret) {
-				failed_cores |= BIT(core);
-				dev_err(gxp->dev,
-					"Failed to run firmware on core %u\n",
-					core);
-			}
-			virt_core++;
+	for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
+		if (!(core_list & BIT(phys_core)))
+			continue;
+		ret = gxp_firmware_finish_startup(gxp, vd, virt_core,
+						  phys_core);
+		if (ret) {
+			failed_cores |= BIT(phys_core);
+			dev_err(gxp->dev, "Failed to run firmware on core %u\n",
+				phys_core);
 		}
+		virt_core++;
 	}
 
 	if (failed_cores != 0) {
 		virt_core = 0;
-		for (core = 0; core < GXP_NUM_CORES; core++) {
-			if (core_list & BIT(core)) {
-				if (!(failed_cores & BIT(core))) {
-					gxp_firmware_stop_core(gxp, vd,
-							       virt_core, core);
-				}
-				virt_core++;
-			}
+		for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
+			if (!(core_list & BIT(phys_core)))
+				continue;
+			if (!(failed_cores & BIT(phys_core)))
+				gxp_firmware_stop_core(gxp, vd, virt_core,
+						       phys_core);
+			virt_core++;
 		}
 	}
 	/* Check if we need to set clock mux to low state as requested */
@@ -1045,14 +1081,12 @@ int gxp_firmware_run(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
 }
 
 int gxp_firmware_setup_hw_after_block_off(struct gxp_dev *gxp, uint core,
-					  bool verbose)
+					  uint phys_core, bool verbose)
 {
-	gxp_program_reset_vector(gxp, core, verbose);
-
-	return gxp_core_boot ? gxp_pm_core_on(gxp, core, verbose) : 0;
+	gxp_program_reset_vector(gxp, core, phys_core, verbose);
+	return gxp_pm_core_on(gxp, phys_core, verbose);
 }
 
-
 void gxp_firmware_stop(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
 		       uint core_list)
 {
@@ -1066,30 +1100,50 @@ void gxp_firmware_stop(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
 	}
 }
 
-void gxp_firmware_set_boot_mode(struct gxp_dev *gxp, uint core, u32 mode)
+void gxp_firmware_set_boot_mode(struct gxp_dev *gxp,
+				struct gxp_virtual_device *vd, uint core,
+				u32 mode)
 {
-	void __iomem *boot_mode_addr;
+	struct gxp_host_control_region *core_cfg;
 
 	/* Callers shouldn't call the function under this condition. */
 	if (!gxp->fwbufs[core].vaddr)
 		return;
 
-	boot_mode_addr = gxp->fwbufs[core].vaddr + AURORA_SCRATCHPAD_OFF +
-			 SCRATCHPAD_MSG_OFFSET(MSG_BOOT_MODE);
-
-	writel(mode, boot_mode_addr);
+	core_cfg = get_scratchpad_base(gxp, vd, core);
+	core_cfg->boot_mode = mode;
+	flush_scratchpad_region(gxp, vd, core);
 }
 
-u32 gxp_firmware_get_boot_mode(struct gxp_dev *gxp, uint core)
+u32 gxp_firmware_get_boot_mode(struct gxp_dev *gxp,
+			       struct gxp_virtual_device *vd, uint core)
 {
-	void __iomem *boot_mode_addr;
+	struct gxp_host_control_region *core_cfg;
 
 	/* Callers shouldn't call the function under this condition. */
 	if (!gxp->fwbufs[core].vaddr)
 		return 0;
 
-	boot_mode_addr = gxp->fwbufs[core].vaddr + AURORA_SCRATCHPAD_OFF +
-			 SCRATCHPAD_MSG_OFFSET(MSG_BOOT_MODE);
+	core_cfg = get_scratchpad_base(gxp, vd, core);
+	invalidate_scratchpad_region(gxp, vd, core);
+	return core_cfg->boot_mode;
+}
+
+void gxp_firmware_set_boot_status(struct gxp_dev *gxp,
+				  struct gxp_virtual_device *vd, uint core,
+				  u32 status)
+{
+	struct gxp_host_control_region *core_cfg;
+
+	core_cfg = get_scratchpad_base(gxp, vd, core);
+	core_cfg->boot_status = status;
+}
+
+u32 gxp_firmware_get_boot_status(struct gxp_dev *gxp,
+				 struct gxp_virtual_device *vd, uint core)
+{
+	struct gxp_host_control_region *core_cfg;
 
-	return readl(boot_mode_addr);
+	core_cfg = get_scratchpad_base(gxp, vd, core);
+	return core_cfg->boot_status;
 }
diff --git a/gxp-firmware.h b/gxp-firmware.h
index aff602a..f19adc9 100644
--- a/gxp-firmware.h
+++ b/gxp-firmware.h
@@ -10,6 +10,8 @@
 #include <linux/bitops.h>
 #include <linux/sizes.h>
 
+#include <gcip/gcip-image-config.h>
+
 #include "gxp-config.h"
 #include "gxp-internal.h"
 
@@ -41,6 +43,15 @@
 #define PRIVATE_FW_DATA_SIZE SZ_2M
 #define SHARED_FW_DATA_SIZE SZ_1M
 
+extern bool gxp_core_boot;
+
+/* Indexes same as image_config.IommuMappingIdx in the firmware side. */
+enum gxp_imgcfg_idx {
+	CORE_CFG_REGION_IDX,
+	VD_CFG_REGION_IDX,
+	SYS_CFG_REGION_IDX,
+};
+
 struct gxp_firmware_manager {
 	const struct firmware *firmwares[GXP_NUM_CORES];
 	char *firmware_name;
@@ -49,18 +60,15 @@ struct gxp_firmware_manager {
 	struct mutex dsp_firmware_lock;
 	/* Firmware status bitmap. Accessors must hold `vd_semaphore`. */
 	u32 firmware_running;
-	/*
-	 * The boundary of readonly segments and writable segments.
-	 * The mappings are programmed as
-	 *   [fwbufs[i].daddr, rw_boundaries[i]): RO
-	 *   [rw_boundaries[i], daddr + fwbufs[i].size): RW
-	 *
-	 * The boundary information is collected by parsing the ELF
-	 * header after @firmwares have been fetched.
-	 */
-	dma_addr_t rw_boundaries[GXP_NUM_CORES];
 	/* Store the entry point of the DSP core firmware. */
 	u32 entry_points[GXP_NUM_CORES];
+	/*
+	 * Cached image config, for easier fetching config entries.
+	 * Not a pointer to the firmware buffer because we want to forcely change the
+	 * privilege level to NS.
+	 * Only valid on firmware requested.
+	 */
+	struct gcip_image_config img_cfg;
 };
 
 enum aurora_msg {
@@ -102,9 +110,12 @@ int gxp_firmware_request_if_needed(struct gxp_dev *gxp);
 /*
  * Re-program the reset vector and power on the core's LPM if the block had
  * been shut down.
+ *
+ * @core should be virt core when using per-VD config method, otherwise should
+ * be phys core.
  */
 int gxp_firmware_setup_hw_after_block_off(struct gxp_dev *gxp, uint core,
-					  bool verbose);
+					  uint phys_core, bool verbose);
 
 /*
  *  Loads the firmware for the cores in system memory and powers up the cores
@@ -123,12 +134,29 @@ void gxp_firmware_stop(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
  * Sets the specified core's boot mode or suspend request value.
  * This function should be called only after the firmware has been run.
  */
-void gxp_firmware_set_boot_mode(struct gxp_dev *gxp, uint core, u32 mode);
+void gxp_firmware_set_boot_mode(struct gxp_dev *gxp,
+				struct gxp_virtual_device *vd, uint core,
+				u32 mode);
 
 /*
  * Returns the specified core's boot mode or boot status.
  * This function should be called only after the firmware has been run.
  */
-u32 gxp_firmware_get_boot_mode(struct gxp_dev *gxp, uint core);
+u32 gxp_firmware_get_boot_mode(struct gxp_dev *gxp,
+			       struct gxp_virtual_device *vd, uint core);
+
+/*
+ * Sets the specified core's boot status or suspend request value.
+ */
+void gxp_firmware_set_boot_status(struct gxp_dev *gxp,
+				  struct gxp_virtual_device *vd, uint core,
+				  u32 status);
+
+/*
+ * Returns the specified core's boot status or boot status.
+ * This function should be called only after the firmware has been run.
+ */
+u32 gxp_firmware_get_boot_status(struct gxp_dev *gxp,
+				 struct gxp_virtual_device *vd, uint core);
 
 #endif /* __GXP_FIRMWARE_H__ */
diff --git a/gxp-host-device-structs.h b/gxp-host-device-structs.h
index 1c993f1..4597a28 100644
--- a/gxp-host-device-structs.h
+++ b/gxp-host-device-structs.h
@@ -17,6 +17,85 @@
 #define MAX_NUM_CORES 4
 #define NUM_SYSTEM_SEMAPHORES 64
 
+/* The number of physical doorbells and sync barriers allocated to each VD */
+#define GXP_NUM_DOORBELLS_PER_VD 7
+#define GXP_NUM_SYNC_BARRIERS_PER_VD 4
+
+/* The first allowed doorbell and sync barrier to be used for VDs' usage */
+#define GXP_DOORBELLS_START 4 /* The first 4 are used for boot */
+#define GXP_SYNC_BARRIERS_START 1 /* The first 1 is used for UART */
+
+/* Definitions for host->device boot mode requests */
+/*
+ * No boot action is needed. This is a valid mode once a core is running.
+ * However, it's an invalid state when a FW is powering on. The DSP core will
+ * write it to the boot mode register once it starts a transition.
+ * This is helpful in case the core reboots/crashes while performing the
+ * transition so it doesn't get stuck in a boot loop.
+ */
+#define GXP_BOOT_MODE_NONE 0
+
+/*
+ * Request that the core performs a normal cold boot on the next power-on event.
+ * This does not actually wake the core up, but is required before powering the
+ * core up if cold boot is desired.
+ * Core power-on could be performed using any wake-up source like the doorbells.
+ * Upon success, the boot status should be GXP_BOOT_STATUS_ACTIVE.
+ */
+#define GXP_BOOT_MODE_COLD_BOOT 1
+
+/*
+ * Request that the core suspends on the next suspend signal arrival. This does
+ * not trigger a suspend operation. A subsequent mailbox command or notification
+ * is needed to trigger the actual transition. Upon success, the boot status
+ * should be GXP_BOOT_STATUS_SUSPENDED.
+ */
+#define GXP_BOOT_MODE_SUSPEND 2
+
+/*
+ * Request that the core to preempt the active workload on the next suspend
+ * signal arrival.Upon success, the boot status should be
+ * GXP_BOOT_STATUS_SUSPENDED.
+ */
+#define GXP_BOOT_MODE_PREEMPT 3
+
+/*
+ * Request the core resumes on the next power on-event. This does not trigger a
+ * resume operation, but is required before powering the core up if warm
+ * boot/resume is desired.
+ * Core power-on could be performed using any wake-up source like direct LPM
+ * transition into PS0. Upon success, the boot status should be
+ * GXP_BOOT_STATUS_ACTIVE
+ */
+#define GXP_BOOT_MODE_RESUME 4
+
+/*
+ * Request the core shutdown. A subsequent mailbox command or notification
+ * is needed to trigger the actual transition. Upon success, the boot status
+ * should be GXP_BOOT_STATUS_OFF.
+ */
+#define GXP_BOOT_MODE_SHUTDOWN 5
+
+/* Definitions for host->device boot status */
+/* Initial status */
+#define GXP_BOOT_STATUS_NONE 0
+
+/* Final status */
+#define GXP_BOOT_STATUS_ACTIVE 1
+#define GXP_BOOT_STATUS_SUSPENDED 2
+#define GXP_BOOT_STATUS_OFF 3
+
+/* Transition status */
+#define GXP_BOOT_STATUS_INVALID_MODE 4
+#define GXP_BOOT_STATUS_BOOTING 5
+#define GXP_BOOT_STATUS_BOOTING_FAILED 6
+#define GXP_BOOT_STATUS_SUSPENDING 7
+#define GXP_BOOT_STATUS_SUSPENDING_FAILED 8
+#define GXP_BOOT_STATUS_SUSPENDING_FAILED_ACTIVE_WL 9
+#define GXP_BOOT_STATUS_WAITING_FOR_WORKLOAD 10
+#define GXP_BOOT_STATUS_WAITING_FOR_DMA 11
+#define GXP_BOOT_STATUS_SHUTTING_DOWN 12
+
 /* Bit masks for the status fields in the core telemetry structures. */
 /* The core telemetry buffers have been setup by the host. */
 #define GXP_CORE_TELEMETRY_HOST_STATUS_ENABLED (1 << 0)
@@ -324,4 +403,178 @@ struct gxp_core_to_core_response {
 	int32_t cmd_retval;
 };
 
+/*
+ * A structure for describing the state of the job this worker core is part of.
+ * This struct is expected to change per dispatch/context switch/preepmtion as
+ * it describes the HW resources, FW IDs, and other parameters that may change
+ * across job dispatches.
+ * It also establishes a slot used for the various HW resources this VD is
+ * expected to use.
+ * Each FW in a VD is expected to be provided its own copy of this structure
+ * based on the job that it's part of.
+ */
+struct gxp_job_descriptor {
+	/* The number of workers participating in this job. */
+	uint32_t workers_count;
+
+	/*
+	 * A mapping between a worker ID and the FW ID handling it. The FW ID
+	 * used for handling worker 'w' is defined in worker_to_fw[w].
+	 */
+	int32_t worker_to_fw[MAX_NUM_CORES];
+
+	/*
+	 * A slot ID between 0 and MAX_NUM_CORES (exclusive) that indicates
+	 * which block of HW resources this VD is expected to use. All system
+	 * HW resources (such as doorbells, sync barriers, etc) are split across
+	 * the slots evenly; usually starting at a specific physical ID and
+	 * spanning a number consecutive instances. The start ID for each HW
+	 * resource category is defined in GXP_<resource_name>_START; and the
+	 * number of resources alloted to each slot is defined in
+	 * GXP_NUM_<resource_name>_PER_VD.
+	 */
+	uint32_t hardware_resources_slot;
+};
+
+/*
+ * A per-FW control structure used to communicate between the host (MCU or
+ * kernel) and the DSP core. The region is expected to be hosted in uncached
+ * memory.
+ */
+struct gxp_host_control_region {
+	/*
+	 * Written to by the FW to indicate to the host that the core is
+	 * alive.
+	 */
+	uint32_t core_alive_magic;
+
+	/*
+	 * Written to by the FW to indicate to the host that the core can read
+	 * TOP registers.
+	 */
+	uint32_t top_access_ok;
+
+	/*
+	 * Written to by the host to specify the request FW boot mode. See the
+	 * GXP_BOOT_MODE_* definitions for valid values. Always set by the FW to
+	 * GXP_BOOT_MODE_NONE once the requested boot mode transition is
+	 * completed.
+	 */
+	uint32_t boot_mode;
+
+	/*
+	 * Written to by the FW to indicate the boot status. See
+	 * GXP_BOOT_STATUS_* definitions for valid values.
+	 */
+	uint32_t boot_status;
+
+	/* Reserved fields for future expansion */
+	uint32_t reserved_boot[12];
+
+	/* To be used to communicate statistics for timing events during boot */
+	uint32_t timing_entries[16];
+
+	/* To be used to communicate crash events in case of failures */
+	uint32_t valid_crash_info;
+	uint32_t crash_exccause;
+	uint32_t crash_excvaddr;
+	uint32_t crash_epc1;
+	uint32_t reserved_crash_info[12];
+
+	/* Reserved for more categories */
+	uint32_t reserved[16];
+
+	/*
+	 * The per-core job descriptor. This struct will be inspected by the FW
+	 * at the beginning of every dispatch.
+	 */
+	struct gxp_job_descriptor job_descriptor;
+};
+
+/*
+ * A structure describing the telemetry (logging and tracing) parameters and
+ * buffers; this describes R/O aspects of the telemetry buffers.
+ */
+struct gxp_telemetry_descriptor_ro {
+	struct telemetry_descriptor_ro {
+		/*
+		 * The telemetry status from the host's point of view. See the
+		 * top of the file for the appropriate flags.
+		 */
+		uint32_t host_status;
+
+		/*
+		 * The device address for the buffer used for storing events.
+		 * The head and tail indices are described inside the data
+		 * pointed to by `buffer_addr`.
+		 */
+		uint32_t buffer_addr;
+
+		/* The size of the buffer (in bytes) */
+		uint32_t buffer_size;
+	} per_core_loggers[MAX_NUM_CORES], per_core_tracers[MAX_NUM_CORES];
+};
+
+/*
+ * A structure describing the external state of the VD. This structure is read
+ * once by the FW upon the first cold boot and is never checked again.
+ */
+struct gxp_vd_descriptor {
+	/* The ID for this GXP application. */
+	uint32_t application_id;
+
+	/*
+	 * Whether or not this VD has been initialized by one of its cores.
+	 * This variable is protected by sync barrier at offset 0. Should be
+	 * initialized by the host to 0.
+	 */
+	uint32_t vd_is_initialized;
+};
+
+/*
+ * A descriptor for data that is common to the entire system; usually accessed
+ * by physical core. This region is mapped as R/O for all VDs. Should be
+ * writable by the host (MCU/Kernel)
+ */
+struct gxp_system_descriptor_ro {
+	/* A device address for the common debug dump region */
+	uint32_t debug_dump_dev_addr;
+
+	/*
+	 * A R/O descriptor for the telemetry data. Describing buffer
+	 * parameters.
+	 */
+	struct gxp_telemetry_descriptor_ro telemetry_desc;
+};
+
+/*
+ * A structure describing the telemetry (logging and tracing) parameters; this
+ * describes R/W aspects of the telemetry system.
+ */
+struct gxp_telemetry_descriptor_rw {
+	/* A struct for describing R/W status parameters of the buffer  */
+	struct telemetry_descriptor_rw {
+		/*
+		 * The telemetry status from the device point of view. See the
+		 * top of the file for the appropriate flags.
+		 */
+		uint32_t device_status;
+
+		/*
+		 * Whether or not this telemetry category has data available
+		 * for the host
+		 */
+		uint32_t data_available;
+	} per_core_loggers[MAX_NUM_CORES], per_core_tracers[MAX_NUM_CORES];
+};
+
+/*
+ * A descriptor for data that is common to the entire system; usually accessed
+ * by physical core. This region is mapped as R/W for all VDs.
+ */
+struct gxp_system_descriptor_rw {
+	/* A R/W descriptor for the telemetry data */
+	struct gxp_telemetry_descriptor_rw telemetry_desc;
+};
+
 #endif /* __GXP_HOST_DEVICE_STRUCTURES_H__ */
diff --git a/gxp-internal.h b/gxp-internal.h
index 6988bf8..1eaa5c0 100644
--- a/gxp-internal.h
+++ b/gxp-internal.h
@@ -7,6 +7,7 @@
 #ifndef __GXP_INTERNAL_H__
 #define __GXP_INTERNAL_H__
 
+#include <linux/atomic.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/firmware.h>
@@ -26,6 +27,8 @@
 
 #define IS_GXP_TEST IS_ENABLED(CONFIG_GXP_TEST)
 
+#define GXP_NAME "gxp"
+
 enum gxp_chip_revision {
 	GXP_CHIP_A0,
 	GXP_CHIP_B0,
@@ -124,8 +127,6 @@ struct gxp_dev {
 	/*
 	 * Buffer shared across firmware.
 	 * Its paddr is 0 if the shared buffer is not available.
-	 * Its vaddr is always 0 as this region is not expected to be accessible
-	 * to us.
 	 */
 	struct gxp_mapped_resource shared_buf;
 	/*
@@ -133,15 +134,14 @@ struct gxp_dev {
 	 * which indexes of slices are used by ID allocator.
 	 */
 	struct ida shared_slice_idp;
-	size_t shared_slice_size; /* The size of each slice. */
-	/*
-	 * The total number of slices.
-	 * It can be zero if there is no shared buffer support.
-	 */
-	unsigned int num_shared_slices;
 	struct gxp_usage_stats *usage_stats; /* Stores the usage stats */
 
 	void __iomem *sysreg_shareability; /* sysreg shareability csr base */
+	/* Next virtual device ID. */
+	atomic_t next_vdid;
+
+	/* To manage DMA fences. */
+	struct gcip_dma_fence_manager *gfence_mgr;
 
 	/* callbacks for chip-dependent implementations */
 
diff --git a/gxp-mailbox-impl.c b/gxp-mailbox-impl.c
index 4ea4130..4044620 100644
--- a/gxp-mailbox-impl.c
+++ b/gxp-mailbox-impl.c
@@ -521,6 +521,7 @@ static int gxp_mailbox_manager_execute_cmd(
 	u32 cmd_flags, u8 num_cores, struct gxp_power_states power_states,
 	u64 *resp_seq, u16 *resp_status)
 {
+	struct gxp_dev *gxp = client->gxp;
 	struct gxp_command cmd;
 	struct gxp_response resp;
 	struct buffer_descriptor buffer;
@@ -535,7 +536,9 @@ static int gxp_mailbox_manager_execute_cmd(
 	cmd.priority = cmd_priority; /* currently unused */
 	cmd.buffer_descriptor = buffer;
 
+	down_read(&gxp->vd_semaphore);
 	ret = gxp_mailbox_execute_cmd(mailbox, &cmd, &resp);
+	up_read(&gxp->vd_semaphore);
 
 	/* resp.seq and resp.status can be updated even though it failed to process the command */
 	if (resp_seq)
diff --git a/gxp-pm.c b/gxp-pm.c
index ead9d7c..4c30284 100644
--- a/gxp-pm.c
+++ b/gxp-pm.c
@@ -24,7 +24,6 @@
 
 #define SHUTDOWN_DELAY_US_MIN 200
 #define SHUTDOWN_DELAY_US_MAX 400
-#define SHUTDOWN_MAX_DELAY_COUNT 20
 
 /*
  * The order of this array decides the voting priority, should be increasing in
@@ -268,14 +267,16 @@ int gxp_pm_blk_off(struct gxp_dev *gxp)
 	return ret;
 }
 
-bool gxp_pm_is_blk_down(struct gxp_dev *gxp)
+bool gxp_pm_is_blk_down(struct gxp_dev *gxp, uint timeout_ms)
 {
-	int timeout_cnt = 0;
+	int timeout_cnt = 0, max_delay_count;
 	int curr_state;
 
 	if (!gxp->power_mgr->aur_status)
 		return gxp->power_mgr->curr_state == AUR_OFF;
 
+	max_delay_count = (timeout_ms * 1000) / SHUTDOWN_DELAY_US_MIN;
+
 	do {
 		/* Delay 200~400us per retry till blk shutdown finished */
 		usleep_range(SHUTDOWN_DELAY_US_MIN, SHUTDOWN_DELAY_US_MAX);
@@ -283,7 +284,7 @@ bool gxp_pm_is_blk_down(struct gxp_dev *gxp)
 		if (!curr_state)
 			return true;
 		timeout_cnt++;
-	} while (timeout_cnt < SHUTDOWN_MAX_DELAY_COUNT);
+	} while (timeout_cnt < max_delay_count);
 
 	return false;
 }
diff --git a/gxp-pm.h b/gxp-pm.h
index 188f449..334435e 100644
--- a/gxp-pm.h
+++ b/gxp-pm.h
@@ -167,11 +167,12 @@ int gxp_pm_blk_off(struct gxp_dev *gxp);
 /**
  * gxp_pm_is_blk_down() - Check weather the blk is turned off or not.
  * @gxp: The GXP device to check
+ * @timeout_ms: Wait for the block to be turned off for this duration.
  *
  * Return:
  * * true       - blk is turned off.
  */
-bool gxp_pm_is_blk_down(struct gxp_dev *gxp);
+bool gxp_pm_is_blk_down(struct gxp_dev *gxp, uint timeout_ms);
 
 /**
  * gxp_pm_get_blk_state() - Get the blk power state
diff --git a/gxp-ssmt.h b/gxp-ssmt.h
index f3458df..6cf8971 100644
--- a/gxp-ssmt.h
+++ b/gxp-ssmt.h
@@ -10,6 +10,8 @@
 
 #include "gxp-internal.h"
 
+#define SSMT_CLAMP_MODE_BYPASS (1u << 31)
+
 struct gxp_ssmt {
 	struct gxp_dev *gxp;
 	void __iomem *idma_ssmt_base;
@@ -31,4 +33,9 @@ int gxp_ssmt_init(struct gxp_dev *gxp, struct gxp_ssmt *ssmt);
  */
 void gxp_ssmt_set_core_vid(struct gxp_ssmt *ssmt, uint core, uint vid);
 
+static inline void gxp_ssmt_set_core_bypass(struct gxp_ssmt *ssmt, uint core)
+{
+	gxp_ssmt_set_core_vid(ssmt, core, SSMT_CLAMP_MODE_BYPASS);
+}
+
 #endif /* __GXP_SSMT_H__ */
diff --git a/gxp-vd.c b/gxp-vd.c
index 3bd01cd..adf0daa 100644
--- a/gxp-vd.c
+++ b/gxp-vd.c
@@ -5,18 +5,23 @@
  * Copyright (C) 2021 Google LLC
  */
 
+#include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/idr.h>
+#include <linux/iommu.h>
+#include <linux/refcount.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
 #include <gcip/gcip-alloc-helper.h>
+#include <gcip/gcip-image-config.h>
 
 #include "gxp-config.h"
 #include "gxp-core-telemetry.h"
 #include "gxp-debug-dump.h"
 #include "gxp-dma.h"
 #include "gxp-domain-pool.h"
+#include "gxp-doorbell.h"
 #include "gxp-firmware.h"
 #include "gxp-firmware-data.h"
 #include "gxp-host-device-structs.h"
@@ -43,11 +48,418 @@ void gxp_vd_init(struct gxp_dev *gxp)
 	/* All cores start as free */
 	for (core = 0; core < GXP_NUM_CORES; core++)
 		gxp->core_to_vd[core] = NULL;
+	atomic_set(&gxp->next_vdid, 0);
+	ida_init(&gxp->shared_slice_idp);
 }
 
 void gxp_vd_destroy(struct gxp_dev *gxp)
 {
-	/* NO-OP for now. */
+	ida_destroy(&gxp->shared_slice_idp);
+}
+
+/* Allocates an SGT and map @daddr to it. */
+static int map_ns_region(struct gxp_virtual_device *vd, dma_addr_t daddr,
+			 size_t size)
+{
+	struct gxp_dev *gxp = vd->gxp;
+	struct sg_table *sgt;
+	size_t idx;
+	const size_t n_reg = ARRAY_SIZE(vd->ns_regions);
+	int ret;
+
+	for (idx = 0; idx < n_reg; idx++) {
+		if (!vd->ns_regions[idx].sgt)
+			break;
+	}
+	if (idx == n_reg) {
+		dev_err(gxp->dev, "NS regions array %zx is full", n_reg);
+		return -ENOSPC;
+	}
+	sgt = gcip_alloc_noncontiguous(gxp->dev, size, GFP_KERNEL);
+	if (!sgt)
+		return -ENOMEM;
+
+	ret = gxp_dma_map_iova_sgt(gxp, vd->domain, daddr, sgt,
+				   IOMMU_READ | IOMMU_WRITE);
+	if (ret) {
+		dev_err(gxp->dev, "NS map %pad with size %#zx failed", &daddr,
+			size);
+		gcip_free_noncontiguous(sgt);
+		return ret;
+	}
+	vd->ns_regions[idx].daddr = daddr;
+	vd->ns_regions[idx].sgt = sgt;
+
+	return 0;
+}
+
+static void unmap_ns_region(struct gxp_virtual_device *vd, dma_addr_t daddr)
+{
+	struct gxp_dev *gxp = vd->gxp;
+	struct sg_table *sgt;
+	size_t idx;
+	const size_t n_reg = ARRAY_SIZE(vd->ns_regions);
+
+	for (idx = 0; idx < n_reg; idx++) {
+		if (daddr == vd->ns_regions[idx].daddr)
+			break;
+	}
+	if (idx == n_reg) {
+		dev_warn(gxp->dev, "unable to find NS mapping @ %pad", &daddr);
+		return;
+	}
+
+	sgt = vd->ns_regions[idx].sgt;
+	vd->ns_regions[idx].sgt = NULL;
+	vd->ns_regions[idx].daddr = 0;
+	gxp_dma_unmap_iova_sgt(gxp, vd->domain, daddr, sgt);
+	gcip_free_noncontiguous(sgt);
+}
+
+/* Maps the shared buffer region to @vd->domain. */
+static int map_core_shared_buffer(struct gxp_virtual_device *vd)
+{
+	struct gxp_dev *gxp = vd->gxp;
+	struct iommu_domain *domain = vd->domain->domain;
+	const size_t shared_size = GXP_SHARED_SLICE_SIZE;
+
+	if (!gxp->shared_buf.paddr)
+		return 0;
+	return iommu_map(domain, gxp->shared_buf.daddr,
+			 gxp->shared_buf.paddr + shared_size * vd->slice_index,
+			 shared_size, IOMMU_READ | IOMMU_WRITE);
+}
+
+/* Reverts map_core_shared_buffer. */
+static void unmap_core_shared_buffer(struct gxp_virtual_device *vd)
+{
+	struct gxp_dev *gxp = vd->gxp;
+	struct iommu_domain *domain = vd->domain->domain;
+	const size_t shared_size = GXP_SHARED_SLICE_SIZE;
+
+	if (!gxp->shared_buf.paddr)
+		return;
+	iommu_unmap(domain, gxp->shared_buf.daddr, shared_size);
+}
+
+/* Maps @res->daddr to @res->paddr to @vd->domain. */
+static int map_resource(struct gxp_virtual_device *vd,
+			struct gxp_mapped_resource *res)
+{
+	if (res->daddr == 0)
+		return 0;
+	return iommu_map(vd->domain->domain, res->daddr, res->paddr, res->size,
+			 IOMMU_READ | IOMMU_WRITE);
+}
+
+/* Reverts map_resource. */
+static void unmap_resource(struct gxp_virtual_device *vd,
+			   struct gxp_mapped_resource *res)
+{
+	if (res->daddr == 0)
+		return;
+	iommu_unmap(vd->domain->domain, res->daddr, res->size);
+}
+
+/*
+ * Assigns @res's IOVA, size from image config.
+ */
+static void assign_resource(struct gxp_mapped_resource *res,
+			    struct gcip_image_config *img_cfg,
+			    enum gxp_imgcfg_idx idx)
+{
+	res->daddr = img_cfg->iommu_mappings[idx].virt_address;
+	res->size = gcip_config_to_size(
+		img_cfg->iommu_mappings[idx].image_config_value);
+}
+
+/*
+ * This function does follows:
+ *  - Get CORE_CFG, VD_CFG, SYS_CFG's IOVAs and sizes from image config.
+ *  - Map above regions with this layout:
+ * Pool
+ *  +------------------------------------+
+ *  |          SLICE_0: CORE_CFG         |
+ *  |           SLICE_0: VD_CFG          |
+ *  | <padding to GXP_SHARED_SLICE_SIZE> |
+ *  +------------------------------------+
+ *  |          SLICE_1: CORE_CFG         |
+ *  |           SLICE_1: VD_CFG          |
+ *  | <padding to GXP_SHARED_SLICE_SIZE> |
+ *  +------------------------------------+
+ *  |            ... SLICE_N             |
+ *  +------------------------------------+
+ *  |             <padding>              |
+ *  +------------------------------------+
+ *  |              SYS_CFG               |
+ *  +------------------------------------+
+ *
+ * To keep compatibility, if not both mapping[0, 1] present then this function
+ * falls back to map the MCU-core shared region with hard-coded IOVA and size.
+ */
+static int map_cfg_regions(struct gxp_virtual_device *vd,
+			   struct gcip_image_config *img_cfg)
+{
+	struct gxp_dev *gxp = vd->gxp;
+	struct gxp_mapped_resource *pool;
+	struct gxp_mapped_resource res, tmp;
+	size_t offset;
+	int ret;
+
+	if (img_cfg->num_iommu_mappings < 2)
+		return map_core_shared_buffer(vd);
+
+	/*
+	 * For direct mode, the config regions are programmed by host (us); for
+	 * MCU mode, the config regions are programmed by MCU.
+	 */
+	if (gxp_is_direct_mode(gxp)) {
+		tmp = gxp->fwdatabuf;
+		/* Leave the first piece be used for gxp_fw_data_init() */
+		tmp.vaddr += tmp.size / 2;
+		tmp.paddr += tmp.size / 2;
+		pool = &tmp;
+	} else {
+		pool = &gxp->shared_buf;
+	}
+
+	assign_resource(&res, img_cfg, CORE_CFG_REGION_IDX);
+	offset = vd->slice_index * GXP_SHARED_SLICE_SIZE;
+	res.vaddr = pool->vaddr + offset;
+	res.paddr = pool->paddr + offset;
+	ret = map_resource(vd, &res);
+	if (ret) {
+		dev_err(gxp->dev, "map core config %pad -> offset %#zx failed",
+			&res.daddr, offset);
+		return ret;
+	}
+	vd->core_cfg = res;
+
+	assign_resource(&res, img_cfg, VD_CFG_REGION_IDX);
+	offset += vd->core_cfg.size;
+	res.vaddr = pool->vaddr + offset;
+	res.paddr = pool->paddr + offset;
+	ret = map_resource(vd, &res);
+	if (ret) {
+		dev_err(gxp->dev, "map VD config %pad -> offset %#zx failed",
+			&res.daddr, offset);
+		goto err_unmap_core;
+	}
+	vd->vd_cfg = res;
+	/* image config correctness check */
+	if (vd->core_cfg.size + vd->vd_cfg.size > GXP_SHARED_SLICE_SIZE) {
+		dev_err(gxp->dev,
+			"Core CFG (%#llx) + VD CFG (%#llx) exceeds %#x",
+			vd->core_cfg.size, vd->vd_cfg.size,
+			GXP_SHARED_SLICE_SIZE);
+		ret = -ENOSPC;
+		goto err_unmap_vd;
+	}
+	/*
+	 * It's okay when mappings[sys_cfg_region_idx] is not set, in which case
+	 * map_resource does nothing.
+	 */
+	assign_resource(&res, img_cfg, SYS_CFG_REGION_IDX);
+	/* Use the end of the shared region for system cfg. */
+	offset = GXP_SHARED_BUFFER_SIZE - res.size;
+	res.vaddr = pool->vaddr + offset;
+	res.paddr = pool->paddr + offset;
+	ret = map_resource(vd, &res);
+	if (ret) {
+		dev_err(gxp->dev, "map sys config %pad -> offset %#zx failed",
+			&res.daddr, offset);
+		goto err_unmap_vd;
+	}
+	vd->sys_cfg = res;
+
+	return 0;
+
+err_unmap_vd:
+	unmap_resource(vd, &vd->vd_cfg);
+	vd->vd_cfg.daddr = 0;
+err_unmap_core:
+	unmap_resource(vd, &vd->core_cfg);
+	vd->core_cfg.daddr = 0;
+	return ret;
+}
+
+static void unmap_cfg_regions(struct gxp_virtual_device *vd)
+{
+	if (vd->core_cfg.daddr == 0)
+		return unmap_core_shared_buffer(vd);
+
+	unmap_resource(vd, &vd->sys_cfg);
+	unmap_resource(vd, &vd->vd_cfg);
+	unmap_resource(vd, &vd->core_cfg);
+}
+
+static int gxp_vd_imgcfg_map(void *data, dma_addr_t daddr, phys_addr_t paddr,
+			     size_t size, unsigned int flags)
+{
+	struct gxp_virtual_device *vd = data;
+
+	if (flags & GCIP_IMAGE_CONFIG_FLAGS_SECURE)
+		return 0;
+
+	return map_ns_region(vd, daddr, size);
+}
+
+static void gxp_vd_imgcfg_unmap(void *data, dma_addr_t daddr, size_t size,
+				unsigned int flags)
+{
+	struct gxp_virtual_device *vd = data;
+
+	if (flags & GCIP_IMAGE_CONFIG_FLAGS_SECURE)
+		return;
+
+	unmap_ns_region(vd, daddr);
+}
+
+static int map_fw_image_config(struct gxp_dev *gxp,
+			       struct gxp_virtual_device *vd,
+			       struct gxp_firmware_manager *fw_mgr)
+{
+	int ret;
+	struct gcip_image_config *cfg;
+	static const struct gcip_image_config_ops gxp_vd_imgcfg_ops = {
+		.map = gxp_vd_imgcfg_map,
+		.unmap = gxp_vd_imgcfg_unmap,
+	};
+
+	/*
+	 * Allow to skip for test suites need VD but doesn't need the FW module.
+	 */
+	if (IS_ENABLED(CONFIG_GXP_TEST) && !fw_mgr)
+		return 0;
+	cfg = &fw_mgr->img_cfg;
+	ret = gcip_image_config_parser_init(&vd->cfg_parser, &gxp_vd_imgcfg_ops,
+					    gxp->dev, vd);
+	/* parser_init() never fails unless we pass invalid OPs. */
+	if (unlikely(ret))
+		return ret;
+	ret = gcip_image_config_parse(&vd->cfg_parser, cfg);
+	if (ret) {
+		dev_err(gxp->dev, "Image config mapping failed");
+		return ret;
+	}
+	ret = map_cfg_regions(vd, cfg);
+	if (ret) {
+		dev_err(gxp->dev, "Config regions mapping failed");
+		gcip_image_config_clear(&vd->cfg_parser);
+		return ret;
+	}
+	vd->fw_ro_size = cfg->firmware_size;
+	/*
+	 * To be compatible with image config without setting firmware_size,
+	 * fall back to map the whole region to carveout.
+	 */
+	if (vd->fw_ro_size == 0)
+		vd->fw_ro_size = gxp->fwbufs[0].size;
+
+	return 0;
+}
+
+static void unmap_fw_image_config(struct gxp_dev *gxp,
+				  struct gxp_virtual_device *vd)
+{
+	unmap_cfg_regions(vd);
+	gcip_image_config_clear(&vd->cfg_parser);
+}
+
+/*
+ * For each core,
+ *  - fw_rw_size = fwbufs[core].size - fw_ro_size
+ *  - allocates rwdata_sgt[core] with size fw_rw_size
+ *  - maps fwbufs[core].daddr -> fwbufs[core].paddr with size fw_ro_size
+ *  - maps fwbufs[core].daddr + fw_ro_size -> rwdata_sgt[core]
+ */
+static int alloc_and_map_fw_image(struct gxp_dev *gxp,
+				  struct gxp_virtual_device *vd)
+{
+	size_t ro_size = vd->fw_ro_size, rw_size;
+	struct iommu_domain *domain = vd->domain->domain;
+	int i, ret;
+
+	/* Maps all FW regions together and no rwdata_sgt in this case. */
+	if (ro_size == gxp->fwbufs[0].size)
+		return iommu_map(domain, gxp->fwbufs[0].daddr,
+				 gxp->fwbufs[0].paddr, ro_size * GXP_NUM_CORES,
+				 IOMMU_READ | IOMMU_WRITE);
+
+	dev_info(gxp->dev, "mapping firmware RO size %#zx", ro_size);
+	rw_size = gxp->fwbufs[0].size - ro_size;
+	for (i = 0; i < GXP_NUM_CORES; i++) {
+		vd->rwdata_sgt[i] =
+			gcip_alloc_noncontiguous(gxp->dev, rw_size, GFP_KERNEL);
+		if (!vd->rwdata_sgt[i]) {
+			dev_err(gxp->dev,
+				"allocate firmware data for core %d failed", i);
+			ret = -ENOMEM;
+			goto err_free_sgt;
+		}
+	}
+	for (i = 0; i < GXP_NUM_CORES; i++) {
+		ret = iommu_map(domain, gxp->fwbufs[i].daddr,
+				gxp->fwbufs[i].paddr, ro_size,
+				IOMMU_READ | IOMMU_WRITE);
+		if (ret) {
+			dev_err(gxp->dev, "map firmware RO for core %d failed",
+				i);
+			goto err_unmap;
+		}
+		ret = gxp_dma_map_iova_sgt(gxp, vd->domain,
+					   gxp->fwbufs[i].daddr + ro_size,
+					   vd->rwdata_sgt[i],
+					   IOMMU_READ | IOMMU_WRITE);
+		if (ret) {
+			dev_err(gxp->dev, "map firmware RW for core %d failed",
+				i);
+			iommu_unmap(domain, gxp->fwbufs[i].daddr, ro_size);
+			goto err_unmap;
+		}
+	}
+	return 0;
+
+err_unmap:
+	while (i--) {
+		iommu_unmap(domain, gxp->fwbufs[i].daddr, ro_size);
+		gxp_dma_unmap_iova_sgt(gxp, vd->domain,
+				       gxp->fwbufs[i].daddr + ro_size,
+				       vd->rwdata_sgt[i]);
+	}
+err_free_sgt:
+	for (i = 0; i < GXP_NUM_CORES; i++) {
+		if (vd->rwdata_sgt[i])
+			gcip_free_noncontiguous(vd->rwdata_sgt[i]);
+		vd->rwdata_sgt[i] = NULL;
+	}
+	return ret;
+}
+
+static void unmap_and_free_fw_image(struct gxp_dev *gxp,
+				    struct gxp_virtual_device *vd)
+{
+	size_t ro_size = vd->fw_ro_size;
+	struct iommu_domain *domain = vd->domain->domain;
+	int i;
+
+	if (ro_size == gxp->fwbufs[0].size) {
+		iommu_unmap(domain, gxp->fwbufs[0].daddr,
+			    ro_size * GXP_NUM_CORES);
+		return;
+	}
+
+	for (i = 0; i < GXP_NUM_CORES; i++) {
+		iommu_unmap(domain, gxp->fwbufs[i].daddr, ro_size);
+		gxp_dma_unmap_iova_sgt(gxp, vd->domain,
+				       gxp->fwbufs[i].daddr + ro_size,
+				       vd->rwdata_sgt[i]);
+	}
+	for (i = 0; i < GXP_NUM_CORES; i++) {
+		if (vd->rwdata_sgt[i])
+			gcip_free_noncontiguous(vd->rwdata_sgt[i]);
+		vd->rwdata_sgt[i] = NULL;
+	}
 }
 
 static int map_core_telemetry_buffers(struct gxp_dev *gxp,
@@ -130,7 +542,7 @@ static void unmap_core_telemetry_buffers(struct gxp_dev *gxp,
 }
 
 static int map_debug_dump_buffer(struct gxp_dev *gxp,
-				  struct gxp_virtual_device *vd)
+				 struct gxp_virtual_device *vd)
 {
 	if (!gxp->debug_dump_mgr)
 		return 0;
@@ -155,6 +567,11 @@ static int assign_cores(struct gxp_virtual_device *vd)
 	uint core;
 	uint available_cores = 0;
 
+	if (!gxp_core_boot) {
+		/* We don't do core assignment when cores are managed by MCU. */
+		vd->core_list = BIT(GXP_NUM_CORES) - 1;
+		return 0;
+	}
 	vd->core_list = 0;
 	for (core = 0; core < GXP_NUM_CORES; core++) {
 		if (gxp->core_to_vd[core] == NULL) {
@@ -164,7 +581,8 @@ static int assign_cores(struct gxp_virtual_device *vd)
 		}
 	}
 	if (available_cores < vd->num_cores) {
-		dev_err(gxp->dev, "Insufficient available cores. Available: %u. Requested: %u\n",
+		dev_err(gxp->dev,
+			"Insufficient available cores. Available: %u. Requested: %u\n",
 			available_cores, vd->num_cores);
 		return -EBUSY;
 	}
@@ -179,17 +597,69 @@ static void unassign_cores(struct gxp_virtual_device *vd)
 	struct gxp_dev *gxp = vd->gxp;
 	uint core;
 
+	if (!gxp_core_boot)
+		return;
 	for (core = 0; core < GXP_NUM_CORES; core++) {
 		if (gxp->core_to_vd[core] == vd)
 			gxp->core_to_vd[core] = NULL;
 	}
 }
 
+/* Saves the state of this VD's doorbells and clears them. */
+static void vd_save_doorbells(struct gxp_virtual_device *vd)
+{
+	struct gxp_dev *gxp = vd->gxp;
+	uint base_doorbell;
+	uint i;
+
+	if (!gxp_fw_data_use_per_vd_config(vd))
+		return;
+	base_doorbell = GXP_DOORBELLS_START +
+			gxp_vd_hw_slot_id(vd) * GXP_NUM_DOORBELLS_PER_VD;
+	for (i = 0; i < ARRAY_SIZE(vd->doorbells_state); i++) {
+		vd->doorbells_state[i] =
+			gxp_doorbell_status(gxp, base_doorbell + i);
+		gxp_doorbell_clear(gxp, base_doorbell + i);
+	}
+}
+
+/* Restores the state of this VD's doorbells. */
+static void vd_restore_doorbells(struct gxp_virtual_device *vd)
+{
+	struct gxp_dev *gxp = vd->gxp;
+	uint base_doorbell;
+	uint i;
+
+	if (!gxp_fw_data_use_per_vd_config(vd))
+		return;
+	base_doorbell = GXP_DOORBELLS_START +
+			gxp_vd_hw_slot_id(vd) * GXP_NUM_DOORBELLS_PER_VD;
+	for (i = 0; i < ARRAY_SIZE(vd->doorbells_state); i++)
+		if (vd->doorbells_state[i])
+			gxp_doorbell_set(gxp, base_doorbell + i);
+		else
+			gxp_doorbell_clear(gxp, base_doorbell + i);
+}
+
+static void set_config_version(struct gxp_dev *gxp,
+			       struct gxp_virtual_device *vd)
+{
+	if (gxp->firmware_mgr && vd->sys_cfg.daddr)
+		vd->config_version = gxp->firmware_mgr->img_cfg.config_version;
+	/*
+	 * Let gxp_dma_map_core_resources() map this region only when using the
+	 * legacy protocol.
+	 *
+	 * TODO(b/265748027): remove this
+	 */
+	if (gxp_fw_data_use_per_vd_config(vd))
+		gxp->fwdatabuf.daddr = 0;
+}
+
 struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 					   u16 requested_cores)
 {
 	struct gxp_virtual_device *vd;
-	unsigned int size;
 	int i;
 	int err;
 
@@ -209,8 +679,10 @@ struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 	vd->client_id = -1;
 	vd->tpu_client_id = -1;
 	spin_lock_init(&vd->credit_lock);
+	refcount_set(&vd->refcount, 1);
 	vd->credit = GXP_COMMAND_CREDIT_PER_VD;
 	vd->first_open = true;
+	vd->vdid = atomic_inc_return(&gxp->next_vdid);
 
 	vd->domain = gxp_domain_pool_alloc(gxp->domain_pool);
 	if (!vd->domain) {
@@ -218,30 +690,18 @@ struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 		goto error_free_vd;
 	}
 
-	if (gxp->num_shared_slices) {
-		vd->slice_index =
-			ida_alloc_max(&gxp->shared_slice_idp,
-				      gxp->num_shared_slices - 1, GFP_KERNEL);
-		if (vd->slice_index < 0) {
-			err = vd->slice_index;
-			goto error_free_domain;
-		}
-	}
-
-	size = GXP_NUM_CORES * PRIVATE_FW_DATA_SIZE;
-	vd->fwdata_sgt = gcip_alloc_noncontiguous(gxp->dev, size, GFP_KERNEL);
-	if (!vd->fwdata_sgt) {
-		dev_err(gxp->dev, "allocate firmware data size=%x failed",
-			size);
-		err = -ENOMEM;
-		goto error_free_slice_index;
+	vd->slice_index = ida_alloc_max(&gxp->shared_slice_idp,
+					GXP_NUM_SHARED_SLICES - 1, GFP_KERNEL);
+	if (vd->slice_index < 0) {
+		err = vd->slice_index;
+		goto error_free_domain;
 	}
 
 	vd->mailbox_resp_queues = kcalloc(
 		vd->num_cores, sizeof(*vd->mailbox_resp_queues), GFP_KERNEL);
 	if (!vd->mailbox_resp_queues) {
 		err = -ENOMEM;
-		goto error_free_fwdata;
+		goto error_free_slice_index;
 	}
 
 	for (i = 0; i < vd->num_cores; i++) {
@@ -258,19 +718,29 @@ struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 	if (err)
 		goto error_free_resp_queues;
 
+	/*
+	 * Here assumes firmware is requested before allocating a VD, which is
+	 * true because we request firmware on first GXP device open.
+	 */
+	err = map_fw_image_config(gxp, vd, gxp->firmware_mgr);
+	if (err)
+		goto error_unassign_cores;
+
+	set_config_version(gxp, vd);
 	if (gxp->data_mgr) {
-		vd->fw_app = gxp_fw_data_create_app(gxp, vd->core_list);
+		/* After map_fw_image_config because it needs vd->sys_cfg. */
+		vd->fw_app = gxp_fw_data_create_app(gxp, vd);
 		if (IS_ERR(vd->fw_app)) {
 			err = PTR_ERR(vd->fw_app);
-			goto error_unassign_cores;
+			vd->fw_app = NULL;
+			goto error_unmap_imgcfg;
 		}
 	}
 	err = gxp_dma_map_core_resources(gxp, vd->domain, vd->core_list,
 					 vd->slice_index);
 	if (err)
 		goto error_destroy_fw_data;
-	err = gxp_dma_map_iova_sgt(gxp, vd->domain, GXP_IOVA_PRIV_FW_DATA,
-				   vd->fwdata_sgt, IOMMU_READ | IOMMU_WRITE);
+	err = alloc_and_map_fw_image(gxp, vd);
 	if (err)
 		goto error_unmap_core_resources;
 	err = map_core_telemetry_buffers(gxp, vd, vd->core_list);
@@ -285,17 +755,17 @@ struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 error_unmap_core_telemetry_buffer:
 	unmap_core_telemetry_buffers(gxp, vd, vd->core_list);
 error_unmap_fw_data:
-	gxp_dma_unmap_iova_sgt(gxp, vd->domain, GXP_IOVA_PRIV_FW_DATA, vd->fwdata_sgt);
+	unmap_and_free_fw_image(gxp, vd);
 error_unmap_core_resources:
 	gxp_dma_unmap_core_resources(gxp, vd->domain, vd->core_list);
 error_destroy_fw_data:
 	gxp_fw_data_destroy_app(gxp, vd->fw_app);
+error_unmap_imgcfg:
+	unmap_fw_image_config(gxp, vd);
 error_unassign_cores:
 	unassign_cores(vd);
 error_free_resp_queues:
 	kfree(vd->mailbox_resp_queues);
-error_free_fwdata:
-	gcip_free_noncontiguous(vd->fwdata_sgt);
 error_free_slice_index:
 	if (vd->slice_index >= 0)
 		ida_free(&gxp->shared_slice_idp, vd->slice_index);
@@ -322,16 +792,13 @@ void gxp_vd_release(struct gxp_virtual_device *vd)
 		mutex_unlock(&gxp->secure_vd_lock);
 	}
 
-	unassign_cores(vd);
 	unmap_debug_dump_buffer(gxp, vd);
 	unmap_core_telemetry_buffers(gxp, vd, core_list);
-	gxp_dma_unmap_iova_sgt(gxp, vd->domain, GXP_IOVA_PRIV_FW_DATA, vd->fwdata_sgt);
+	unmap_and_free_fw_image(gxp, vd);
 	gxp_dma_unmap_core_resources(gxp, vd->domain, core_list);
-
-	if (!IS_ERR_OR_NULL(vd->fw_app)) {
-		gxp_fw_data_destroy_app(gxp, vd->fw_app);
-		vd->fw_app = NULL;
-	}
+	gxp_fw_data_destroy_app(gxp, vd->fw_app);
+	unmap_fw_image_config(gxp, vd);
+	unassign_cores(vd);
 
 	vd->gxp->mailbox_mgr->release_unconsumed_async_resps(vd);
 
@@ -349,11 +816,11 @@ void gxp_vd_release(struct gxp_virtual_device *vd)
 	up_write(&vd->mappings_semaphore);
 
 	kfree(vd->mailbox_resp_queues);
-	gcip_free_noncontiguous(vd->fwdata_sgt);
 	if (vd->slice_index >= 0)
 		ida_free(&vd->gxp->shared_slice_idp, vd->slice_index);
 	gxp_domain_pool_free(vd->gxp->domain_pool, vd->domain);
-	kfree(vd);
+	vd->state = GXP_VD_RELEASED;
+	gxp_vd_put(vd);
 }
 
 int gxp_vd_block_ready(struct gxp_virtual_device *vd)
@@ -361,6 +828,8 @@ int gxp_vd_block_ready(struct gxp_virtual_device *vd)
 	struct gxp_dev *gxp = vd->gxp;
 	int ret;
 
+	lockdep_assert_held_write(&gxp->vd_semaphore);
+
 	if (vd->state == GXP_VD_SUSPENDED)
 		return 0;
 	if (vd->state != GXP_VD_OFF)
@@ -373,6 +842,7 @@ int gxp_vd_block_ready(struct gxp_virtual_device *vd)
 		ret = gxp->after_vd_block_ready(gxp, vd);
 		if (ret) {
 			gxp_dma_domain_detach_device(gxp, vd->domain);
+			vd->state = GXP_VD_OFF;
 			return ret;
 		}
 	}
@@ -395,21 +865,66 @@ int gxp_vd_run(struct gxp_virtual_device *vd)
 	lockdep_assert_held(&gxp->vd_semaphore);
 	if (vd->state != GXP_VD_READY && vd->state != GXP_VD_OFF)
 		return -EINVAL;
-	if (vd->state == GXP_VD_OFF)
-		gxp_vd_block_ready(vd);
+	if (vd->state == GXP_VD_OFF) {
+		ret = gxp_vd_block_ready(vd);
+		/*
+		 * The failure of `gxp_vd_block_ready` function means following two things:
+		 *
+		 * 1. The MCU firmware is not working for some reason and if it was crash,
+		 *    @vd->state would be set to UNAVAILABLE by the crash handler. However, by the
+		 *    race, if this function holds @gxp->vd_semaphore earlier than that handler,
+		 *    it is reasonable to set @vd->state to UNAVAILABLE from here.
+		 *
+		 * 2. Some information of vd (or client) such as client_id, slice_index are
+		 *    incorrect or not allowed by the MCU firmware for some reasons and the
+		 *    `allocate_vmbox` or `link_offload_vmbox` has been failed. In this case,
+		 *    setting the @vd->state to UNAVAILABLE and letting the runtime close its fd
+		 *    and reallocate a vd would be better than setting @vd->state to OFF.
+		 *
+		 * Therefore, let's set @vd->state to UNAVAILABLE if it returns an error.
+		 */
+		if (ret)
+			goto err_vd_unavailable;
+	}
+	/* Clear all doorbells */
+	vd_restore_doorbells(vd);
 	ret = gxp_firmware_run(gxp, vd, vd->core_list);
 	if (ret)
-		vd->state = GXP_VD_UNAVAILABLE;
-	else
-		vd->state = GXP_VD_RUNNING;
+		goto err_vd_block_unready;
+	vd->state = GXP_VD_RUNNING;
+	return ret;
+
+err_vd_block_unready:
+	gxp_vd_block_unready(vd);
+err_vd_unavailable:
+	vd->state = GXP_VD_UNAVAILABLE;
 	return ret;
 }
 
-/* Caller must hold gxp->vd_semaphore */
+/*
+ * Caller must hold gxp->vd_semaphore.
+ *
+ * This function will be called from the `gxp_client_destroy` function if @vd->state is not
+ * GXP_VD_OFF.
+ *
+ * Note for the case of the MCU firmware crahses:
+ *
+ * In the MCU mode, the `gxp_vd_suspend` function will redirect to this function, but it will not
+ * happen when the @vd->state is GXP_VD_UNAVAILABLE. Therefore, if the MCU firmware crashes,
+ * @vd->state will be changed to GXP_VD_UNAVAILABLE and this function will not be called even
+ * though the runtime is going to release the vd wakelock.
+ *
+ * It means @vd->state will not be changed to GXP_VD_OFF when the vd wkelock is released (i.e., the
+ * state will be kept as GXP_VD_UNAVAILABLE) and when the `gxp_vd_block_unready` function is called
+ * by releasing the block wakelock, it will not send `release_vmbox` and `unlink_offload_vmbox` KCI
+ * commands to the crashed MCU firmware. This function will be finally called when the runtime
+ * closes the fd of the device file.
+ */
 void gxp_vd_stop(struct gxp_virtual_device *vd)
 {
 	struct gxp_dev *gxp = vd->gxp;
-	uint core;
+	uint phys_core;
+	uint core_list = vd->core_list;
 	uint lpm_state;
 
 	lockdep_assert_held(&gxp->vd_semaphore);
@@ -419,28 +934,68 @@ void gxp_vd_stop(struct gxp_virtual_device *vd)
 		/*
 		 * Put all cores in the VD into reset so they can not wake each other up
 		 */
-		for (core = 0; core < GXP_NUM_CORES; core++) {
-			if (gxp->core_to_vd[core] == vd) {
-				lpm_state = gxp_lpm_get_state(gxp, CORE_TO_PSM(core));
+		for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
+			if (core_list & BIT(phys_core)) {
+				lpm_state = gxp_lpm_get_state(
+					gxp, CORE_TO_PSM(phys_core));
 				if (lpm_state != LPM_PG_STATE)
-					hold_core_in_reset(gxp, core);
+					hold_core_in_reset(gxp, phys_core);
 			}
 		}
 	}
 
-	gxp_firmware_stop(gxp, vd, vd->core_list);
-	if (vd->state == GXP_VD_READY || vd->state == GXP_VD_RUNNING)
+	gxp_firmware_stop(gxp, vd, core_list);
+	if (vd->state == GXP_VD_READY || vd->state == GXP_VD_RUNNING ||
+	    vd->state == GXP_VD_UNAVAILABLE)
 		gxp_dma_domain_detach_device(gxp, vd->domain);
 	vd->state = GXP_VD_OFF;
 }
 
+static inline uint select_core(struct gxp_virtual_device *vd, uint virt_core,
+			       uint phys_core)
+{
+	return gxp_fw_data_use_per_vd_config(vd) ? virt_core : phys_core;
+}
+
+static bool boot_state_is_suspend(struct gxp_dev *gxp,
+				  struct gxp_virtual_device *vd, uint core,
+				  u32 *boot_state)
+{
+	if (gxp_fw_data_use_per_vd_config(vd)) {
+		*boot_state = gxp_firmware_get_boot_status(gxp, vd, core);
+		return *boot_state == GXP_BOOT_STATUS_SUSPENDED;
+	}
+
+	*boot_state = gxp_firmware_get_boot_mode(gxp, vd, core);
+	return *boot_state == GXP_BOOT_MODE_STATUS_SUSPEND_COMPLETED;
+}
+
+static bool boot_state_is_active(struct gxp_dev *gxp,
+				 struct gxp_virtual_device *vd, uint core,
+				 u32 *boot_state)
+{
+	if (gxp_fw_data_use_per_vd_config(vd)) {
+		*boot_state = gxp_firmware_get_boot_status(gxp, vd, core);
+		return *boot_state == GXP_BOOT_STATUS_ACTIVE;
+	}
+
+	*boot_state = gxp_firmware_get_boot_mode(gxp, vd, core);
+	return *boot_state == GXP_BOOT_MODE_STATUS_RESUME_COMPLETED;
+}
+
 /*
  * Caller must have locked `gxp->vd_semaphore` for writing.
+ *
+ * This function will be called from the `gxp_client_release_vd_wakelock` function when the runtime
+ * is going to release the vd wakelock only if the @vd->state is not GXP_VD_UNAVAILABLE.
+ *
+ * In the MCU mode, this function will redirect to the `gxp_vd_stop` function.
  */
 void gxp_vd_suspend(struct gxp_virtual_device *vd)
 {
-	uint core;
+	uint virt_core, phys_core;
 	struct gxp_dev *gxp = vd->gxp;
+	uint core_list = vd->core_list;
 	u32 boot_state;
 	uint failed_cores = 0;
 
@@ -460,58 +1015,75 @@ void gxp_vd_suspend(struct gxp_virtual_device *vd)
 	 * Start the suspend process for all of this VD's cores without waiting
 	 * for completion.
 	 */
-	for (core = 0; core < GXP_NUM_CORES; core++) {
-		if (gxp->core_to_vd[core] == vd) {
-			if (!gxp_lpm_wait_state_ne(gxp, CORE_TO_PSM(core), LPM_ACTIVE_STATE)) {
-				vd->state = GXP_VD_UNAVAILABLE;
-				failed_cores |= BIT(core);
-				hold_core_in_reset(gxp, core);
-				dev_err(gxp->dev, "Core %u stuck at LPM_ACTIVE_STATE", core);
-				continue;
-			}
-			/* Mark the boot mode as a suspend event */
-			gxp_firmware_set_boot_mode(gxp, core,
-				GXP_BOOT_MODE_REQUEST_SUSPEND);
-			/*
-			 * Request a suspend event by sending a mailbox
-			 * notification.
-			 */
-			gxp_notification_send(gxp, core,
-					      CORE_NOTIF_SUSPEND_REQUEST);
+	virt_core = 0;
+	for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
+		uint core = select_core(vd, virt_core, phys_core);
+
+		if (!(core_list & BIT(phys_core)))
+			continue;
+		if (!gxp_lpm_wait_state_ne(gxp, CORE_TO_PSM(phys_core),
+					   LPM_ACTIVE_STATE)) {
+			vd->state = GXP_VD_UNAVAILABLE;
+			failed_cores |= BIT(phys_core);
+			hold_core_in_reset(gxp, phys_core);
+			dev_err(gxp->dev, "Core %u stuck at LPM_ACTIVE_STATE",
+				phys_core);
+			continue;
 		}
+		/* Mark the boot mode as a suspend event */
+		if (gxp_fw_data_use_per_vd_config(vd)) {
+			gxp_firmware_set_boot_status(gxp, vd, core,
+						     GXP_BOOT_STATUS_NONE);
+			gxp_firmware_set_boot_mode(gxp, vd, core,
+						   GXP_BOOT_MODE_SUSPEND);
+		} else {
+			gxp_firmware_set_boot_mode(
+				gxp, vd, core, GXP_BOOT_MODE_REQUEST_SUSPEND);
+		}
+		/*
+		 * Request a suspend event by sending a mailbox
+		 * notification.
+		 */
+		gxp_notification_send(gxp, phys_core,
+				      CORE_NOTIF_SUSPEND_REQUEST);
+		virt_core++;
 	}
 	/* Wait for all cores to complete core suspension. */
-	for (core = 0; core < GXP_NUM_CORES; core++) {
-		if (gxp->core_to_vd[core] == vd) {
-			if (!(failed_cores & BIT(core))) {
-				if (!gxp_lpm_wait_state_eq(gxp, CORE_TO_PSM(core),
-							   LPM_PG_STATE)) {
-					boot_state = gxp_firmware_get_boot_mode(
-							gxp, core);
-					if (boot_state !=
-					    GXP_BOOT_MODE_STATUS_SUSPEND_COMPLETED) {
-						dev_err(gxp->dev,
-							"Suspension request on core %u failed (status: %u)",
-							core, boot_state);
-						vd->state = GXP_VD_UNAVAILABLE;
-						failed_cores |= BIT(core);
-						hold_core_in_reset(gxp, core);
-					}
-				} else {
-					/* Re-set PS1 as the default low power state. */
-					gxp_lpm_enable_state(gxp, CORE_TO_PSM(core),
-							     LPM_CG_STATE);
-				}
+	virt_core = 0;
+	for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
+		uint core = select_core(vd, virt_core, phys_core);
+
+		if (!(core_list & BIT(phys_core)))
+			continue;
+		virt_core++;
+		if (failed_cores & BIT(phys_core))
+			continue;
+		if (!gxp_lpm_wait_state_eq(gxp, CORE_TO_PSM(phys_core),
+					   LPM_PG_STATE)) {
+			if (!boot_state_is_suspend(gxp, vd, core,
+						   &boot_state)) {
+				dev_err(gxp->dev,
+					"Suspension request on core %u failed (status: %u)",
+					phys_core, boot_state);
+				vd->state = GXP_VD_UNAVAILABLE;
+				failed_cores |= BIT(phys_core);
+				hold_core_in_reset(gxp, phys_core);
 			}
+		} else {
+			/* Re-set PS1 as the default low power state. */
+			gxp_lpm_enable_state(gxp, CORE_TO_PSM(phys_core),
+					     LPM_CG_STATE);
 		}
 	}
 	gxp_dma_domain_detach_device(gxp, vd->domain);
 	if (vd->state == GXP_VD_UNAVAILABLE) {
 		/* shutdown all cores if virtual device is unavailable */
-		for (core = 0; core < GXP_NUM_CORES; core++)
-			if (gxp->core_to_vd[core] == vd)
-				gxp_pm_core_off(gxp, core);
+		for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++)
+			if (core_list & BIT(phys_core))
+				gxp_pm_core_off(gxp, phys_core);
 	} else {
+		/* Save and clear all doorbells. */
+		vd_save_doorbells(vd);
 		vd->blk_switch_count_when_suspended =
 			gxp_pm_get_blk_switch_count(gxp);
 		vd->state = GXP_VD_SUSPENDED;
@@ -525,8 +1097,8 @@ void gxp_vd_suspend(struct gxp_virtual_device *vd)
 int gxp_vd_resume(struct gxp_virtual_device *vd)
 {
 	int ret = 0;
-	uint core;
-	uint core_list = 0;
+	uint phys_core, virt_core;
+	uint core_list = vd->core_list;
 	uint timeout;
 	u32 boot_state;
 	struct gxp_dev *gxp = vd->gxp;
@@ -543,75 +1115,90 @@ int gxp_vd_resume(struct gxp_virtual_device *vd)
 	gxp_pm_force_clkmux_normal(gxp);
 	curr_blk_switch_count = gxp_pm_get_blk_switch_count(gxp);
 
-	for (core = 0; core < GXP_NUM_CORES; core++) {
-		if (gxp->core_to_vd[core] == vd)
-			core_list |= BIT(core);
-	}
+	/* Restore the doorbells state for this VD. */
+	vd_restore_doorbells(vd);
+
 	gxp_dma_domain_attach_device(gxp, vd->domain, core_list);
 	/*
 	 * Start the resume process for all of this VD's cores without waiting
 	 * for completion.
 	 */
-	for (core = 0; core < GXP_NUM_CORES; core++) {
-		if (BIT(core) & core_list) {
-			/*
-			 * The comparison is to check if blk_switch_count is
-			 * changed. If it's changed, it means the block is rebooted and
-			 * therefore we need to set up the hardware again.
-			 */
-			if (vd->blk_switch_count_when_suspended != curr_blk_switch_count) {
-				ret = gxp_firmware_setup_hw_after_block_off(
-					gxp, core, /*verbose=*/false);
-				if (ret) {
-					vd->state = GXP_VD_UNAVAILABLE;
-					failed_cores |= BIT(core);
-					dev_err(gxp->dev, "Failed to power up core %u\n", core);
-					continue;
-				}
+	virt_core = 0;
+	for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
+		uint core = select_core(vd, virt_core, phys_core);
+
+		if (!(core_list & BIT(phys_core)))
+			continue;
+		/*
+		 * The comparison is to check if blk_switch_count is
+		 * changed. If it's changed, it means the block is rebooted and
+		 * therefore we need to set up the hardware again.
+		 */
+		if (vd->blk_switch_count_when_suspended !=
+		    curr_blk_switch_count) {
+			ret = gxp_firmware_setup_hw_after_block_off(
+				gxp, core, phys_core,
+				/*verbose=*/false);
+			if (ret) {
+				vd->state = GXP_VD_UNAVAILABLE;
+				failed_cores |= BIT(phys_core);
+				dev_err(gxp->dev,
+					"Failed to power up core %u\n",
+					phys_core);
+				continue;
 			}
-			/* Mark this as a resume power-up event. */
-			gxp_firmware_set_boot_mode(gxp, core,
-				GXP_BOOT_MODE_REQUEST_RESUME);
-			/*
-			 * Power on the core by explicitly switching its PSM to
-			 * PS0 (LPM_ACTIVE_STATE).
-			 */
-			gxp_lpm_set_state(gxp, CORE_TO_PSM(core), LPM_ACTIVE_STATE,
-					  /*verbose=*/false);
 		}
+		/* Mark this as a resume power-up event. */
+		if (gxp_fw_data_use_per_vd_config(vd)) {
+			gxp_firmware_set_boot_status(gxp, vd, core,
+						     GXP_BOOT_STATUS_NONE);
+			gxp_firmware_set_boot_mode(gxp, vd, core,
+						   GXP_BOOT_MODE_RESUME);
+		} else {
+			gxp_firmware_set_boot_mode(
+				gxp, vd, core, GXP_BOOT_MODE_REQUEST_RESUME);
+		}
+		/*
+		 * Power on the core by explicitly switching its PSM to
+		 * PS0 (LPM_ACTIVE_STATE).
+		 */
+		gxp_lpm_set_state(gxp, CORE_TO_PSM(phys_core), LPM_ACTIVE_STATE,
+				  /*verbose=*/false);
+		virt_core++;
 	}
 	/* Wait for all cores to complete core resumption. */
-	for (core = 0; core < GXP_NUM_CORES; core++) {
-		if (BIT(core) & core_list) {
-			if (!(failed_cores & BIT(core))) {
-				/* in microseconds */
-				timeout = 1000000;
-				while (--timeout) {
-					boot_state = gxp_firmware_get_boot_mode(
-						gxp, core);
-					if (boot_state ==
-					    GXP_BOOT_MODE_STATUS_RESUME_COMPLETED)
-						break;
-					udelay(1 * GXP_TIME_DELAY_FACTOR);
-				}
-				if (timeout == 0 &&
-				    boot_state !=
-					    GXP_BOOT_MODE_STATUS_RESUME_COMPLETED) {
-					dev_err(gxp->dev,
-						"Resume request on core %u failed (status: %u)",
-						core, boot_state);
-					ret = -EBUSY;
-					vd->state = GXP_VD_UNAVAILABLE;
-					failed_cores |= BIT(core);
-				}
+	virt_core = 0;
+	for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
+		uint core = select_core(vd, virt_core, phys_core);
+
+		if (!(core_list & BIT(phys_core)))
+			continue;
+
+		if (!(failed_cores & BIT(phys_core))) {
+			/* in microseconds */
+			timeout = 1000000;
+			while (--timeout) {
+				if (boot_state_is_active(gxp, vd, core,
+							 &boot_state))
+					break;
+				udelay(1 * GXP_TIME_DELAY_FACTOR);
+			}
+			if (timeout == 0) {
+				dev_err(gxp->dev,
+					"Resume request on core %u failed (status: %u)",
+					phys_core, boot_state);
+				ret = -EBUSY;
+				vd->state = GXP_VD_UNAVAILABLE;
+				failed_cores |= BIT(phys_core);
 			}
 		}
+		virt_core++;
 	}
 	if (vd->state == GXP_VD_UNAVAILABLE) {
 		/* shutdown all cores if virtual device is unavailable */
-		for (core = 0; core < GXP_NUM_CORES; core++) {
-			if (BIT(core) & core_list)
-				gxp_pm_core_off(gxp, core);
+		for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
+			if (core_list & BIT(phys_core))
+				gxp_pm_core_off(gxp, phys_core);
 		}
 		gxp_dma_domain_detach_device(gxp, vd->domain);
 	} else {
@@ -629,11 +1216,9 @@ int gxp_vd_virt_core_to_phys_core(struct gxp_virtual_device *vd, u16 virt_core)
 	uint virt_core_index = 0;
 
 	for (phys_core = 0; phys_core < GXP_NUM_CORES; phys_core++) {
-		if (gxp->core_to_vd[phys_core] == vd) {
-			if (virt_core_index == virt_core) {
-				/* Found virtual core */
+		if (vd->core_list & BIT(phys_core)) {
+			if (virt_core_index == virt_core)
 				return phys_core;
-			}
 
 			virt_core_index++;
 		}
@@ -643,22 +1228,7 @@ int gxp_vd_virt_core_to_phys_core(struct gxp_virtual_device *vd, u16 virt_core)
 	return -EINVAL;
 }
 
-uint gxp_vd_phys_core_list(struct gxp_virtual_device *vd)
-{
-	uint core_list = 0;
-	int core;
-
-	lockdep_assert_held(&vd->gxp->vd_semaphore);
-	for (core = 0; core < GXP_NUM_CORES; core++) {
-		if (vd->gxp->core_to_vd[core] == vd)
-			core_list |= BIT(core);
-	}
-
-	return core_list;
-}
-
-int gxp_vd_mapping_store(struct gxp_virtual_device *vd,
-			 struct gxp_mapping *map)
+int gxp_vd_mapping_store(struct gxp_virtual_device *vd, struct gxp_mapping *map)
 {
 	struct rb_node **link;
 	struct rb_node *parent = NULL;
@@ -824,3 +1394,97 @@ void gxp_vd_release_credit(struct gxp_virtual_device *vd)
 		vd->credit++;
 	spin_unlock_irqrestore(&vd->credit_lock, flags);
 }
+
+void gxp_vd_put(struct gxp_virtual_device *vd)
+{
+	if (!vd)
+		return;
+	if (refcount_dec_and_test(&vd->refcount))
+		kfree(vd);
+}
+
+void gxp_vd_invalidate(struct gxp_dev *gxp, int client_id, uint core_list)
+{
+	struct gxp_client *client = NULL, *c;
+	release_unconsumed_async_resps_t release_unconsumed_async_resps =
+		gxp->mailbox_mgr->release_unconsumed_async_resps;
+	int ret;
+	uint core;
+
+	/*
+	 * Prevent @gxp->client_list is being changed while handling the crash.
+	 * The user cannot open or close an FD until this function releases the lock.
+	 */
+	mutex_lock(&gxp->client_list_lock);
+
+	/*
+	 * Find corresponding vd with client_id.
+	 * If it holds a block wakelock, we should discard all pending/unconsumed UCI responses
+	 * and change the state of the vd to GXP_VD_UNAVAILABLE.
+	 */
+	list_for_each_entry (c, &gxp->client_list, list_entry) {
+		down_write(&c->semaphore);
+		down_write(&gxp->vd_semaphore);
+		if (c->vd && c->vd->client_id == client_id) {
+			client = c;
+			break;
+		}
+		up_write(&gxp->vd_semaphore);
+		up_write(&c->semaphore);
+	}
+
+	mutex_unlock(&gxp->client_list_lock);
+
+	if (!client) {
+		dev_err(gxp->dev, "Failed to find a VD, client_id=%d",
+			client_id);
+		/*
+		 * Invalidate all debug dump segments if debug dump
+		 * is enabled and core_list is not empty.
+		 */
+		if (!gxp_debug_dump_is_enabled() || !core_list)
+			return;
+		for (core = 0; core < GXP_NUM_CORES; core++) {
+			if (!(BIT(core) & core_list))
+				continue;
+			mutex_lock(&gxp->debug_dump_mgr->debug_dump_lock);
+			gxp_debug_dump_invalidate_segments(gxp, core);
+			mutex_unlock(&gxp->debug_dump_mgr->debug_dump_lock);
+		}
+		return;
+	}
+
+	dev_err(gxp->dev, "Invalidate a VD, VDID=%d, client_id=%d",
+		client->vd->vdid, client_id);
+
+	if (client->vd->state != GXP_VD_UNAVAILABLE) {
+		if (client->has_block_wakelock) {
+			if (release_unconsumed_async_resps)
+				release_unconsumed_async_resps(client->vd);
+			gxp_vd_block_unready(client->vd);
+		}
+
+		client->vd->state = GXP_VD_UNAVAILABLE;
+		if (client->vd_invalid_eventfd)
+			gxp_eventfd_signal(client->vd_invalid_eventfd);
+	} else {
+		dev_dbg(gxp->dev, "This VD is already invalidated");
+	}
+
+	up_write(&gxp->vd_semaphore);
+	/*
+	 * Process debug dump if its enabled and core_list is not empty.
+	 * Keep on hold the client lock while processing the dumps. vd
+	 * lock would be taken  and released inside the debug dump
+	 * implementation logic ahead.
+	 */
+	if (gxp_debug_dump_is_enabled() && core_list != 0) {
+		ret = gxp_debug_dump_process_dump_mcu_mode(gxp, core_list,
+							   client->vd);
+		if (ret)
+			dev_err(gxp->dev,
+				"debug dump processing failed (ret=%d).\n",
+				ret);
+	}
+	up_write(&client->semaphore);
+}
diff --git a/gxp-vd.h b/gxp-vd.h
index 704e40f..9d78b45 100644
--- a/gxp-vd.h
+++ b/gxp-vd.h
@@ -11,12 +11,16 @@
 #include <linux/iommu.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
+#include <linux/refcount.h>
 #include <linux/rwsem.h>
 #include <linux/scatterlist.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/wait.h>
 
+#include <gcip/gcip-image-config.h>
+
+#include "gxp-host-device-structs.h"
 #include "gxp-internal.h"
 #include "gxp-mapping.h"
 
@@ -48,6 +52,12 @@ enum gxp_virtual_device_state {
 	 * Note: this state will only be set on suspend/resume failure.
 	 */
 	GXP_VD_UNAVAILABLE,
+	/*
+	 * gxp_vd_release() has been called. VD with this state means it's
+	 * waiting for the last reference to be put(). All fields in VD is
+	 * invalid in this state.
+	 */
+	GXP_VD_RELEASED,
 };
 
 struct gxp_virtual_device {
@@ -58,6 +68,8 @@ struct gxp_virtual_device {
 	struct mailbox_resp_queue *mailbox_resp_queues;
 	struct rb_root mappings_root;
 	struct rw_semaphore mappings_semaphore;
+	/* Used to save doorbell state on VD resume. */
+	uint doorbells_state[GXP_NUM_DOORBELLS_PER_VD];
 	enum gxp_virtual_device_state state;
 	/*
 	 * Record the gxp->power_mgr->blk_switch_count when the vd was
@@ -72,9 +84,24 @@ struct gxp_virtual_device {
 	 */
 	int slice_index;
 	/*
-	 * The SG table that holds the firmware data region.
+	 * The SG table that holds the firmware RW data region.
+	 */
+	struct sg_table *rwdata_sgt[GXP_NUM_CORES];
+	/*
+	 * The SG table that holds the regions specified in the image config's
+	 * non-secure IOMMU mappings.
+	 */
+	struct {
+		dma_addr_t daddr;
+		struct sg_table *sgt;
+	} ns_regions[GCIP_IMG_CFG_MAX_NS_IOMMU_MAPPINGS];
+	/* The firmware size specified in image config. */
+	u32 fw_ro_size;
+	/*
+	 * The config regions specified in image config.
+	 * core_cfg's size should be a multiple of GXP_NUM_CORES.
 	 */
-	struct sg_table *fwdata_sgt;
+	struct gxp_mapped_resource core_cfg, vd_cfg, sys_cfg;
 	uint core_list;
 	/*
 	 * The ID of DSP client. -1 if it is not allocated.
@@ -106,14 +133,15 @@ struct gxp_virtual_device {
 	/* Whether it's the first time allocating a VMBox for this VD. */
 	bool first_open;
 	bool is_secure;
+	refcount_t refcount;
+	/* A constant ID assigned after VD is allocated. For debug only. */
+	int vdid;
+	struct gcip_image_config_parser cfg_parser;
+	/* The config version specified in firmware's image config. */
+	u32 config_version;
 };
 
 /*
- * TODO(b/193180931) cleanup the relationship between the internal GXP modules.
- * For example, whether or not gxp_vd owns the gxp_fw module, and if so, if
- * other modules are expected to access the gxp_fw directly or only via gxp_vd.
- */
-/*
  * Initializes the device management subsystem and allocates resources for it.
  * This is expected to be called once per driver lifecycle.
  */
@@ -145,12 +173,15 @@ struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 					   u16 requested_cores);
 
 /**
- * gxp_vd_release() - Cleanup and free a struct gxp_virtual_device
+ * gxp_vd_release() - Cleanup a struct gxp_virtual_device
  * @vd: The virtual device to be released
  *
  * The caller must have locked gxp->vd_semaphore for writing.
  *
  * A virtual device must be stopped before it can be released.
+ *
+ * If @vd's reference count is 1 before this call, this function frees @vd.
+ * Otherwise @vd's state is set to GXP_VD_RELEASED.
  */
 void gxp_vd_release(struct gxp_virtual_device *vd);
 
@@ -190,13 +221,6 @@ void gxp_vd_stop(struct gxp_virtual_device *vd);
  */
 int gxp_vd_virt_core_to_phys_core(struct gxp_virtual_device *vd, u16 virt_core);
 
-/*
- * Acquires the physical core IDs assigned to the virtual device.
- *
- * The caller must have locked gxp->vd_semaphore for reading.
- */
-uint gxp_vd_phys_core_list(struct gxp_virtual_device *vd);
-
 /**
  * gxp_vd_mapping_store() - Store a mapping in a virtual device's records
  * @vd: The virtual device @map was created for and will be stored in
@@ -330,4 +354,43 @@ bool gxp_vd_has_and_use_credit(struct gxp_virtual_device *vd);
  */
 void gxp_vd_release_credit(struct gxp_virtual_device *vd);
 
+/* Increases reference count of @vd by one and returns @vd. */
+static inline struct gxp_virtual_device *
+gxp_vd_get(struct gxp_virtual_device *vd)
+{
+	WARN_ON_ONCE(!refcount_inc_not_zero(&vd->refcount));
+	return vd;
+}
+
+/*
+ * Decreases reference count of @vd by one.
+ *
+ * If @vd->refcount becomes 0, @vd will be freed.
+ */
+void gxp_vd_put(struct gxp_virtual_device *vd);
+
+/*
+ * Change the status of the vd of @client_id to GXP_VD_UNAVAILABLE.
+ * Internally, it will discard all pending/unconsumed user commands
+ * and call the `gxp_vd_block_unready` function.
+ *
+ * This function will be called when the `CLIENT_FATAL_ERROR_NOTIFY`
+ * RKCI has been sent from the firmware side.
+ *
+ * @gxp: The GXP device to obtain the handler for
+ * @client_id: client_id of the crashed vd.
+ * @core_list: A bitfield enumerating the physical cores on which
+ *             crash is reported from firmware.
+ */
+void gxp_vd_invalidate(struct gxp_dev *gxp, int client_id, uint core_list);
+
+/*
+ * An ID between 0~GXP_NUM_CORES-1 and is unique to each VD.
+ * Only used in direct mode.
+ */
+static inline uint gxp_vd_hw_slot_id(struct gxp_virtual_device *vd)
+{
+	return ffs(vd->core_list) - 1;
+}
+
 #endif /* __GXP_VD_H__ */
diff --git a/gxp.h b/gxp.h
index 6c4ea3f..676b0b3 100644
--- a/gxp.h
+++ b/gxp.h
@@ -932,4 +932,88 @@ struct gxp_mailbox_uci_response_ioctl {
 #define GXP_MAILBOX_UCI_RESPONSE                                               \
 	_IOR(GXP_IOCTL_BASE, 31, struct gxp_mailbox_uci_response_ioctl)
 
+/*
+ * struct gxp_create_sync_fence_data
+ * @seqno:		the seqno to initialize the fence with
+ * @timeline_name:	the name of the timeline the fence belongs to
+ * @fence:		returns the fd of the new sync_file with the new fence
+ *
+ * Timeline names can be up to 128 characters (including trailing NUL byte)
+ * for gxp debugfs and kernel debug logs.  These names are truncated to 32
+ * characters in the data returned by the standard SYNC_IOC_FILE_INFO
+ * ioctl.
+ */
+#define GXP_SYNC_TIMELINE_NAME_LEN 128
+struct gxp_create_sync_fence_data {
+	__u32 seqno;
+	char timeline_name[GXP_SYNC_TIMELINE_NAME_LEN];
+	__s32 fence;
+};
+
+/*
+ * Create a DMA sync fence, return the sync_file fd for the new fence.
+ *
+ * The client must have allocated a virtual device.
+ */
+#define GXP_CREATE_SYNC_FENCE                                                  \
+	_IOWR(GXP_IOCTL_BASE, 32, struct gxp_create_sync_fence_data)
+
+/*
+ * struct gxp_signal_sync_fence_data
+ * @fence:		fd of the sync_file for the fence
+ * @error:		error status errno value or zero for success
+ */
+struct gxp_signal_sync_fence_data {
+	__s32 fence;
+	__s32 error;
+};
+
+/*
+ * Signal a DMA sync fence with optional error status.
+ * Can pass a sync_file fd created by any driver.
+ * Signals the first DMA sync fence in the sync file.
+ */
+#define GXP_SIGNAL_SYNC_FENCE                                                  \
+	_IOW(GXP_IOCTL_BASE, 33, struct gxp_signal_sync_fence_data)
+
+/*
+ * struct gxp_sync_fence_status
+ * @fence:		fd of the sync_file for the fence
+ * @status:		returns:
+ *			   0 if active
+ *			   1 if signaled with no error
+ *			   negative errno value if signaled with error
+ */
+struct gxp_sync_fence_status {
+	__s32 fence;
+	__s32 status;
+};
+
+/*
+ * Retrieve DMA sync fence status.
+ * Can pass a sync_file fd created by any driver.
+ * Returns the status of the first DMA sync fence in the sync file.
+ */
+#define GXP_SYNC_FENCE_STATUS                                                  \
+	_IOWR(GXP_IOCTL_BASE, 34, struct gxp_sync_fence_status)
+
+/*
+ * struct gxp_register_invalidated_eventfd_ioctl
+ * @eventfd:            File-descriptor obtained via eventfd().
+ *                      Not used during the unregister step.
+ */
+struct gxp_register_invalidated_eventfd_ioctl {
+	__u32 eventfd;
+};
+
+/*
+ * Registers an eventfd which will be triggered when the device crashes and
+ * the virtual device of the client is invalidated.
+ */
+#define GXP_REGISTER_INVALIDATED_EVENTFD                                       \
+	_IOW(GXP_IOCTL_BASE, 35, struct gxp_register_invalidated_eventfd_ioctl)
+
+#define GXP_UNREGISTER_INVALIDATED_EVENTFD                                     \
+	_IOW(GXP_IOCTL_BASE, 36, struct gxp_register_invalidated_eventfd_ioctl)
+
 #endif /* __GXP_H__ */
author	Aurora pro automerger <aurora-pro-automerger@google.com>	2023-02-02 11:46:21 +0000
committer	davidchiang <davidchiang@google.com>	2023-02-02 12:17:17 +0000
commit	51c89d9e9a7664ff4068fa1405fca5a5fe60aac0 (patch)
tree	8fa3c34a8dee0d30896f6efc3effa8f86d313e42
parent	8a4b0d93e1f8b9ae23fb476eef4d43f18bc3cc1d (diff)
download	gs201-51c89d9e9a7664ff4068fa1405fca5a5fe60aac0.tar.gz