[Copybara Auto Merge] Merge branch 'gs201-release' into 'android13-gs-pixel-5.10'

gxp: Remove stale TODO comments Bug: 232447048 gxp: heuristically guess the user buffer perm Bug: 232529892 gxp: rearrange logs for BLK/VD suspend/resume Bug: 209083969 gxp: check AUR_OFF while sending power requests Bug: 231694821 gxp: rename NUM_CORES to MAX_NUM_CORES Bug: 229961485 gxp: fix one more typo of "semphore" Bug: 232447048 (repeat) gxp: fix size check for telemetry mmap Bug: 232447048 (repeat) gxp: check VD wakelock for GXP_TRIGGER_DEBUG_DUMP Bug: 231692562 gxp: Handle debug dump cases with invalid segment header Bug: 218344866 gxp: Add header file version into debug dump struct Bug: 202417979 gxp: Fix "semphore" typo Bug: 232447048 (repeat) gxp: Use DMA direction argument to determine read/write buffer map Bug: 201243473 gxp: Update mailbox command codes and remove unused ones Bug: 231328277 gxp: assert client->semaphore held without rw Bug: 231692448 gxp: Add unittests for the TPU related ioctl Bug: 227545695 gxp: refine power state transition interface Bug: 231584263 gxp: Add check that virtual device is not NULL Bug: 231271959 gxp: remove unused block wakelock functions Bug: 232447048 (repeat) gxp: Check memory-per-core arg to GXP_ALLOCATE_VIRTUAL_DEVICE Bug: 231272386 gxp: Log changes to thermal limit on BLK frequency Bug: 177217526 gxp: don't warn if all power votes are revoked Bug: 232447048 (repeat) gxp: Do not request firmware until first open() Bug: 228377252 gxp: Add the ability to re-request firmware images Bug: 228377252 (repeat) gxp: Prepare fw images and buffers on probe Bug: 228377252 (repeat) gxp: Add IOCTL for triggering debug dump Bug: 185262089 gxp: Update coredump debugfs node Bug: 185262089 (repeat) gxp: check valid dma direction Bug: 232447048 (repeat) gxp: fix probe cleanup sequences Bug: 228920163 gxp: Check DMA direction before create mapping Bug: 229578163 gxp: Use kvmalloc to allocate pages for mapping Bug: 230312441 gxp: add uaccess.h header for access_ok Bug: 201243473 (repeat) gxp: Refactor mappings to be owned by VDs Bug: 184572070 Bug: 220225771 Bug: 230291950 gxp: Add access_ok check to buffer map operation Bug: 201243473 (repeat) gxp: Add lock assertion for cmd_queue and resp_queue manipulation Bug: 229919339 gxp: use realtime kthread for response handling Bug: 229687028 gxp: Register IRQ handler after initializing the mailbox Bug: 229912601 gxp: Update comment when scheduling power requests Bug: 232447048 (repeat) gxp: remove config GXP_CLOUDRIPPER Bug: 229696441 gxp: add timeout to response ioctl Bug: 229443637 gxp: fix broken Makefile for out-of-tree build Bug: 232447048 (repeat) gxp: fix GXP_NUM_CORES setting Bug: 232447048 (repeat) gxp: add NOWARN flag when pinning user pages Bug: 229578166 gxp: create amalthea configurations Bug: 227528380 gxp: add Kconfig for future gen Bug: 227528380 (repeat) gxp: check fw_app by IS_ERR_OR_NULL Bug: 229433210 gxp: Add log for out of memory when pinning pages Bug: 232447048 (repeat) gxp: validate virtual core list on buffer map Bug: 229172687 gxp: initialize specs ioctl output buf Bug: 229470814 gxp: Update flow for kernel-initiated debug dumps Bug: 185262089 (repeat) gxp: Always return an error on gxp_vd_allocate failure Bug: 229329108 gxp: refuse vd wakelock acquriing if not allocated Bug: 229249566 gxp: add -Idrivers/gxp/include to ccflags Bug: 205970684 gxp: check null pointer in eventfd unregistration Bug: 229198626 gxp: Add support for dynamic callstack reconstruction in debug dump Bug: 203441187 gxp: Add ability to map/unmap user buffers to kernel space Bug: 203441187 (repeat) gxp: move acpm_dvfs to include/ Bug: 228938583 gxp: not set doorbell mask twice if it's not GEM5 Bug: 232447048 (repeat) gxp: add iommu domain pool Bug: 209083969 (repeat) gxp: Add flush_workqueue when pm destroy Bug: 232447048 (repeat) gxp: add include/ to inclusion path Bug: 205970684 (repeat) GitOrigin-RevId: ef68c0f9b9145e7ffbee141fa192335bf877e82d Change-Id: Ide21a9ab84d480c018ae065868d8ee619df83bf0
author: Aurora pro automerger <aurora-pro-automerger@google.com> 2022-05-14 14:55:22 -0700
committer: Copybara-Service <copybara-worker@google.com> 2022-05-16 12:19:42 -0700
commit: 35e3403a4d6660b3db2e434d5fa93e23961222ec (patch)
tree: 08ae62eb3b9613f11afaf8289651cd04f130c8fa
parent: fa5cf5721220d5b97544ea56b91bd9f2590debac (diff)
download: gs201-35e3403a4d6660b3db2e434d5fa93e23961222ec.tar.gz
37 files changed, 1757 insertions, 1408 deletions
diff --git a/Kconfig b/Kconfig
deleted file mode 100644
index 1673e66..0000000
--- a/Kconfig
+++ /dev/null
@@ -1,39 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-menu "GXP device"
-
-config GXP
-	tristate "Device driver for GXP"
-	default m
-	select GENERIC_ALLOCATOR
-	select DMA_SHARED_BUFFER
-	help
-	  This driver supports the GXP device. Say Y if you want to
-	  include this driver in the kernel.
-
-	  To compile this driver as a module, choose M here. The module will be
-	  called "gxp".
-
-choice GXP_PLATFORM
-	bool "Target platform to build GXP driver for"
-	depends on GXP
-	default GXP_CLOUDRIPPER
-
-config GXP_CLOUDRIPPER
-	bool "Build for Cloudripper development board"
-	help
-	  Select this to build for the Cloudripper development board.
-
-config GXP_ZEBU
-	bool "Build for ZeBu emulation system"
-	help
-	  Select this to build for the full-SoC ZeBu emulation platform.
-
-config GXP_IP_ZEBU
-	bool "Build for an IP-ZeBu emulation system"
-	help
-	  Select this to build for the Aurora IP-ZeBu emulation platform.
-
-endchoice
-
-endmenu
diff --git a/Makefile b/Makefile
index 3b7a340..70342d2 100644
--- a/Makefile
+++ b/Makefile
@@ -11,6 +11,7 @@ gxp-objs +=	\
 		gxp-debug-dump.o \
 		gxp-debugfs.o \
 		gxp-dmabuf.o \
+		gxp-domain-pool.o \
 		gxp-doorbell.o \
 		gxp-eventfd.o \
 		gxp-firmware.o \
@@ -48,11 +49,7 @@ endif
 #     - IP_ZEBU
 # Defaults to building for CLOUDRIPPER if not otherwise specified.
 GXP_PLATFORM ?= CLOUDRIPPER
-
-# Test against the build closet to production mode, choose CLOUDRIPPER.
-ifdef CONFIG_GXP_TEST
-	GXP_PLATFORM = CLOUDRIPPER
-endif
+GXP_CHIP ?= AMALTHEA
 
 # Setup which version of the gxp-dma interface is used.
 # For gem5, need to adopt dma interface without aux domain.
@@ -62,9 +59,10 @@ else
 	gxp-objs += gxp-dma-iommu.o
 endif
 
-ccflags-y += -DCONFIG_GXP_$(GXP_PLATFORM)
+ccflags-y += -DCONFIG_GXP_$(GXP_PLATFORM) -DCONFIG_$(GXP_CHIP)=1 \
+	     -I$(M)/include -I$(srctree)/drivers/gxp/include
 
-KBUILD_OPTIONS += CONFIG_GXP=m
+KBUILD_OPTIONS += CONFIG_GXP=m GXP_CHIP=AMALTHEA
 
 ifdef CONFIG_GXP_TEST
 subdir-ccflags-y        += -Wall -Werror -I$(srctree)/drivers/gxp/include
diff --git a/amalthea/config.h b/amalthea/config.h
new file mode 100644
index 0000000..19afff6
--- /dev/null
+++ b/amalthea/config.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Include all configuration files for Amalthea.
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+
+#ifndef __AMALTHEA_CONFIG_H__
+#define __AMALTHEA_CONFIG_H__
+
+#define GXP_DRIVER_NAME "gxp_platform"
+
+#define GXP_NUM_CORES 4
+
+#include "csrs.h"
+
+#endif /* __AMALTHEA_CONFIG_H__ */
diff --git a/gxp-csrs.h b/amalthea/csrs.h
index d6a6e9f..1338121 100644
--- a/gxp-csrs.h
+++ b/amalthea/csrs.h
@@ -1,11 +1,12 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * GXP CSR definitions.
+ * Amalthea CSR definitions.
  *
- * Copyright (C) 2021 Google LLC
+ * Copyright (C) 2021-2022 Google LLC
  */
-#ifndef __GXP_CSRS_H__
-#define __GXP_CSRS_H__
+
+#ifndef __AMALTHEA_CSRS_H__
+#define __AMALTHEA_CSRS_H__
 
 #define GXP_REG_DOORBELLS_SET_WRITEMASK 0x1
 #define GXP_REG_DOORBELLS_CLEAR_WRITEMASK 0x1
@@ -119,4 +120,4 @@ enum gxp_core_csrs {
 #define PSM_GPOUT_HI_RD_OFFSET		0x6B4
 #define PSM_DEBUG_STATUS_OFFSET		0x6B8
 
-#endif /* __GXP_CSRS_H__ */
+#endif /* __AMALTHEA_CSRS_H__ */
diff --git a/gxp-client.c b/gxp-client.c
index 5802287..87e911a 100644
--- a/gxp-client.c
+++ b/gxp-client.c
@@ -63,11 +63,10 @@ void gxp_client_destroy(struct gxp_client *client)
 
 	if (client->has_block_wakelock) {
 		gxp_wakelock_release(client->gxp);
-		gxp_pm_update_requested_power_state(
+		gxp_pm_update_requested_power_states(
 			gxp, client->requested_power_state,
-			client->requested_aggressor, AUR_OFF, true);
-		gxp_pm_update_requested_memory_power_state(
-			gxp, client->requested_memory_power_state,
+			client->requested_aggressor, AUR_OFF, true,
+			client->requested_memory_power_state,
 			AUR_MEM_UNDEFINED);
 	}
 
diff --git a/gxp-config.h b/gxp-config.h
index 2899288..813cad7 100644
--- a/gxp-config.h
+++ b/gxp-config.h
@@ -8,13 +8,23 @@
 #ifndef __GXP_CONFIG_H__
 #define __GXP_CONFIG_H__
 
-#define GXP_DRIVER_NAME "gxp_platform"
-#ifndef CONFIG_GXP_GEM5
-#define GXP_NUM_CORES 4
-#else
+#if IS_ENABLED(CONFIG_AMALTHEA)
+
+#include "amalthea/config.h"
+
+#else /* unknown */
+
+#error "Unknown GXP config"
+
+#endif /* unknown */
+
+#ifdef CONFIG_GXP_GEM5
+#undef GXP_NUM_CORES
 #define GXP_NUM_CORES 1
 #endif
 
+#define GXP_NUM_PREALLOCATED_DOMAINS GXP_NUM_CORES
+
 #if defined(CONFIG_GXP_ZEBU) || defined(CONFIG_GXP_IP_ZEBU)
 #define GXP_TIME_DELAY_FACTOR 20
 #else
@@ -25,8 +35,6 @@
 
 #define SYNC_BARRIER_COUNT 16
 
-#include "gxp-csrs.h"
-
 /* Core address space starts at Inst_BPM block */
 #define GXP_CORE_0_BASE GXP_REG_CORE_0_INST_BPM
 #define GXP_CORE_SIZE (GXP_REG_CORE_1_INST_BPM - GXP_REG_CORE_0_INST_BPM)
diff --git a/gxp-debug-dump.c b/gxp-debug-dump.c
index d8fd973..322e1ca 100644
--- a/gxp-debug-dump.c
+++ b/gxp-debug-dump.c
@@ -21,9 +21,9 @@
 #include "gxp-doorbell.h"
 #include "gxp-internal.h"
 #include "gxp-lpm.h"
+#include "gxp-mapping.h"
+#include "gxp-vd.h"
 
-#define GXP_COREDUMP_PENDING 0xF
-#define KERNEL_INIT_DUMP_TIMEOUT (10000 * GXP_TIME_DELAY_FACTOR)
 #define SSCD_MSG_LENGTH 64
 
 #define SYNC_BARRIER_BLOCK	0x00100000
@@ -336,9 +336,13 @@ static void gxp_handle_debug_dump(struct gxp_dev *gxp, uint32_t core_id)
 
 	for (i = 0; i < GXP_NUM_CORE_SEGMENTS - 1; i++) {
 		mgr->segs[core_id][seg_idx].addr = data_addr;
-		mgr->segs[core_id][seg_idx].size =
-			core_dump_header->seg_header[i].size;
-		data_addr += mgr->segs[core_id][seg_idx].size;
+		mgr->segs[core_id][seg_idx].size = 0;
+		if (core_dump_header->seg_header[i].valid) {
+			mgr->segs[core_id][seg_idx].size =
+				core_dump_header->seg_header[i].size;
+		}
+
+		data_addr += core_dump_header->seg_header[i].size;
 		seg_idx++;
 	}
 
@@ -368,21 +372,39 @@ static void gxp_free_segments(struct gxp_dev *gxp)
 	kfree(gxp->debug_dump_mgr->common_dump);
 }
 
+#if IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP)
+static int gxp_get_mapping_count(struct gxp_dev *gxp, int core_id)
+{
+	struct gxp_core_dump *core_dump = gxp->debug_dump_mgr->core_dump;
+	struct gxp_core_header *core_header =
+		&core_dump->core_dump_header[core_id].core_header;
+	int i, count = 0;
+
+	for (i = 0; i < GXP_NUM_BUFFER_MAPPINGS; i++) {
+		if (core_header->user_bufs[i].size != 0)
+			count++;
+	}
+
+	return count;
+}
+#endif
+
 static int gxp_init_segments(struct gxp_dev *gxp)
 {
 #if !IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP)
 	return 0;
 #else
 	struct gxp_debug_dump_manager *mgr = gxp->debug_dump_mgr;
-	int segs_num = GXP_NUM_COMMON_SEGMENTS;
-	int core_id = 0;
-
 	/*
 	 * segs_num include the common segments, core segments for each core,
 	 * core header for each core
 	 */
-	segs_num += GXP_NUM_CORE_SEGMENTS + 1;
+	int segs_min_num = GXP_NUM_COMMON_SEGMENTS + GXP_NUM_CORE_SEGMENTS + 1;
+	int segs_num;
+	int core_id = 0;
+
 	for (core_id = 0; core_id < GXP_NUM_CORES; core_id++) {
+		segs_num = segs_min_num + gxp_get_mapping_count(gxp, core_id);
 		mgr->segs[core_id] = kmalloc_array(segs_num,
 						   sizeof(struct sscd_segment),
 						   GFP_KERNEL);
@@ -402,6 +424,130 @@ err_out:
 #endif
 }
 
+/*
+ * `user_bufs` is an input buffer containing up to GXP_NUM_BUFFER_MAPPINGS
+ * virtual addresses
+ */
+#if IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP)
+static void gxp_add_user_buffer_to_segments(struct gxp_dev *gxp,
+					    struct gxp_core_header *core_header,
+					    int core_id, int seg_idx,
+					    void *user_bufs[])
+{
+	struct gxp_debug_dump_manager *mgr = gxp->debug_dump_mgr;
+	struct gxp_user_buffer user_buf;
+	int i;
+
+	for (i = 0; i < GXP_NUM_BUFFER_MAPPINGS ; i++) {
+		user_buf = core_header->user_bufs[i];
+		if (user_buf.size == 0)
+			continue;
+		mgr->segs[core_id][seg_idx].addr = user_bufs[i];
+		mgr->segs[core_id][seg_idx].size = user_buf.size;
+		seg_idx++;
+	}
+}
+
+static void gxp_user_buffers_vunmap(struct gxp_dev *gxp,
+				    struct gxp_core_header *core_header)
+{
+	struct gxp_virtual_device *vd;
+	struct gxp_user_buffer user_buf;
+	int i;
+	struct gxp_mapping *mapping;
+
+	down_read(&gxp->vd_semaphore);
+
+	vd = gxp->core_to_vd[core_header->core_id];
+	if (!vd) {
+		dev_err(gxp->dev, "Virtual device is not available for vunmap\n");
+		goto out;
+	}
+
+	for (i = 0; i < GXP_NUM_BUFFER_MAPPINGS; i++) {
+		user_buf = core_header->user_bufs[i];
+		if (user_buf.size == 0)
+			continue;
+
+		mapping = gxp_vd_mapping_search_in_range(
+			vd, (dma_addr_t)user_buf.device_addr);
+		if (!mapping) {
+			dev_err(gxp->dev,
+				"No mapping found for user buffer at device address %#llX\n",
+				user_buf.device_addr);
+			continue;
+		}
+
+		gxp_mapping_vunmap(mapping);
+		gxp_mapping_put(mapping);
+	}
+
+out:
+	up_read(&gxp->vd_semaphore);
+}
+
+static int gxp_user_buffers_vmap(struct gxp_dev *gxp,
+				 struct gxp_core_header *core_header,
+				 void *user_buf_vaddrs[])
+{
+	struct gxp_virtual_device *vd;
+	struct gxp_user_buffer user_buf;
+	int i, cnt = 0;
+	dma_addr_t daddr;
+	struct gxp_mapping *mapping;
+	void *vaddr;
+
+	down_read(&gxp->vd_semaphore);
+
+	vd = gxp->core_to_vd[core_header->core_id];
+	if (!vd) {
+		dev_err(gxp->dev, "Virtual device is not available for vmap\n");
+		goto out;
+	}
+
+	for (i = 0; i < GXP_NUM_BUFFER_MAPPINGS; i++) {
+		user_buf = core_header->user_bufs[i];
+		if (user_buf.size == 0)
+			continue;
+
+		/* Get mapping */
+		daddr = (dma_addr_t)user_buf.device_addr;
+		mapping = gxp_vd_mapping_search_in_range(vd, daddr);
+		if (!mapping) {
+			user_buf.size = 0;
+			continue;
+		}
+
+		/* Map the mapping into kernel space */
+		vaddr = gxp_mapping_vmap(mapping);
+
+		/*
+		 * Release the reference from searching for the mapping.
+		 * Either vmapping was successful and obtained a new reference
+		 * or vmapping failed, and the gxp_mapping is no longer needed.
+		 */
+		gxp_mapping_put(mapping);
+
+		if (IS_ERR(vaddr)) {
+			up_read(&gxp->vd_semaphore);
+			gxp_user_buffers_vunmap(gxp, core_header);
+			return 0;
+		}
+
+		/* Get kernel address of the user buffer inside the mapping */
+		user_buf_vaddrs[i] =
+			vaddr + daddr -
+			(mapping->device_address & ~(PAGE_SIZE - 1));
+		cnt++;
+	}
+
+out:
+	up_read(&gxp->vd_semaphore);
+
+	return cnt;
+}
+#endif
+
 static void gxp_handle_dram_dump(struct gxp_dev *gxp, uint32_t core_id)
 {
 	struct gxp_debug_dump_manager *mgr = gxp->debug_dump_mgr;
@@ -410,17 +556,29 @@ static void gxp_handle_dram_dump(struct gxp_dev *gxp, uint32_t core_id)
 	struct gxp_seg_header *dram_seg_header =
 		&core_dump_header->seg_header[GXP_CORE_DRAM_SEGMENT_IDX];
 #if IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP)
+	struct gxp_core_header *core_header = &core_dump_header->core_header;
 	struct sscd_segment *sscd_seg =
 		&mgr->segs[core_id][GXP_DEBUG_DUMP_DRAM_SEGMENT_IDX];
 	char sscd_msg[SSCD_MSG_LENGTH];
+	void *user_buf_vaddrs[GXP_NUM_BUFFER_MAPPINGS];
+	int user_buf_cnt;
 
 	sscd_seg->addr = gxp->fwbufs[core_id].vaddr;
 	sscd_seg->size = gxp->fwbufs[core_id].size;
 
+	user_buf_cnt = gxp_user_buffers_vmap(gxp, core_header, user_buf_vaddrs);
+	if (user_buf_cnt > 0) {
+		gxp_add_user_buffer_to_segments(
+			gxp, core_header, core_id,
+			GXP_DEBUG_DUMP_DRAM_SEGMENT_IDX + 1, user_buf_vaddrs);
+	}
+
 	dev_dbg(gxp->dev, "Passing dram data to SSCD daemon\n");
 	snprintf(sscd_msg, SSCD_MSG_LENGTH - 1,
 		 "gxp debug dump - dram data (core %0x)", core_id);
-	gxp_send_to_sscd(gxp, sscd_seg, 1, sscd_msg);
+	gxp_send_to_sscd(gxp, sscd_seg, user_buf_cnt + 1, sscd_msg);
+
+	gxp_user_buffers_vunmap(gxp, core_header);
 #endif
 	dram_seg_header->valid = 1;
 }
@@ -464,30 +622,6 @@ static int gxp_generate_coredump(struct gxp_dev *gxp, uint32_t core_id)
 	return 0;
 }
 
-static void gxp_wait_kernel_init_dump_work(struct work_struct *work)
-{
-	struct gxp_debug_dump_manager *mgr =
-		container_of(work, struct gxp_debug_dump_manager,
-			     wait_kernel_init_dump_work);
-	u32 core_bits;
-	int i;
-
-	wait_event_timeout(mgr->kernel_init_dump_waitq,
-			   mgr->kernel_init_dump_pending ==
-			   GXP_COREDUMP_PENDING,
-			   msecs_to_jiffies(KERNEL_INIT_DUMP_TIMEOUT));
-
-	mutex_lock(&mgr->lock);
-	core_bits = mgr->kernel_init_dump_pending;
-	for (i = 0; i < GXP_NUM_CORES; i++) {
-		if (!(core_bits & BIT(i)))
-			continue;
-		gxp_generate_coredump(mgr->gxp, i);
-	}
-	mgr->kernel_init_dump_pending = 0;
-	mutex_unlock(&mgr->lock);
-}
-
 void gxp_debug_dump_process_dump(struct work_struct *work)
 {
 	struct gxp_debug_dump_work *debug_dump_work =
@@ -495,43 +629,8 @@ void gxp_debug_dump_process_dump(struct work_struct *work)
 
 	uint core_id = debug_dump_work->core_id;
 	struct gxp_dev *gxp = debug_dump_work->gxp;
-	struct gxp_debug_dump_manager *mgr;
-	struct gxp_core_dump *core_dump;
-	struct gxp_core_dump_header *core_dump_header;
-	struct gxp_core_header *core_header;
-	int *kernel_init_dump_pending;
-
-	mgr = gxp->debug_dump_mgr;
-	if (!mgr) {
-		dev_err(gxp->dev,
-			"gxp->debug_dump_mgr has not been initialized\n");
-		return;
-	}
-
-	core_dump = mgr->core_dump;
-	if (!core_dump) {
-		dev_err(gxp->dev,
-			"mgr->core_dump has not been initialized\n");
-		return;
-	}
 
-	core_dump_header = &core_dump->core_dump_header[core_id];
-	core_header = &core_dump_header->core_header;
-	kernel_init_dump_pending = &mgr->kernel_init_dump_pending;
-
-	switch (core_header->dump_req_reason) {
-	case DEBUG_DUMP_FW_INIT:
-		gxp_generate_coredump(gxp, core_id);
-		break;
-	case DEBUG_DUMP_KERNEL_INIT:
-		mutex_lock(&mgr->lock);
-		if (*kernel_init_dump_pending == 0)
-			schedule_work(&mgr->wait_kernel_init_dump_work);
-		*kernel_init_dump_pending |= BIT(core_id);
-		wake_up(&mgr->kernel_init_dump_waitq);
-		mutex_unlock(&mgr->lock);
-		break;
-	}
+	gxp_generate_coredump(gxp, core_id);
 }
 
 struct work_struct *gxp_debug_dump_get_notification_handler(struct gxp_dev *gxp,
@@ -587,7 +686,8 @@ int gxp_debug_dump_init(struct gxp_dev *gxp, void *sscd_dev, void *sscd_pdata)
 		core_dump_header->core_header.dump_available = 0;
 		for (i = 0; i < GXP_NUM_CORE_SEGMENTS; i++)
 			core_dump_header->seg_header[i].valid = 0;
-
+		for (i = 0; i < GXP_NUM_BUFFER_MAPPINGS; i++)
+			core_dump_header->core_header.user_bufs[i].size = 0;
 		mgr->debug_dump_works[core].gxp = gxp;
 		mgr->debug_dump_works[core].core_id = core;
 		INIT_WORK(&mgr->debug_dump_works[core].work,
@@ -598,17 +698,10 @@ int gxp_debug_dump_init(struct gxp_dev *gxp, void *sscd_dev, void *sscd_pdata)
 
 	/* No need for a DMA handle since the carveout is coherent */
 	mgr->debug_dump_dma_handle = 0;
-	mgr->kernel_init_dump_pending = 0;
 	mgr->sscd_dev = sscd_dev;
 	mgr->sscd_pdata = sscd_pdata;
-	mutex_init(&mgr->lock);
 	mutex_init(&mgr->debug_dump_lock);
 
-	INIT_WORK(&mgr->wait_kernel_init_dump_work,
-		  gxp_wait_kernel_init_dump_work);
-
-	init_waitqueue_head(&mgr->kernel_init_dump_waitq);
-
 	return 0;
 }
 
@@ -621,12 +714,10 @@ void gxp_debug_dump_exit(struct gxp_dev *gxp)
 		return;
 	}
 
-	cancel_work_sync(&mgr->wait_kernel_init_dump_work);
 	gxp_free_segments(gxp);
 	/* TODO (b/200169232) Remove this once we're using devm_memremap */
 	memunmap(gxp->coredumpbuf.vaddr);
 
-	mutex_destroy(&mgr->lock);
 	mutex_destroy(&mgr->debug_dump_lock);
 	devm_kfree(mgr->gxp->dev, mgr);
 	gxp->debug_dump_mgr = NULL;
diff --git a/gxp-debug-dump.h b/gxp-debug-dump.h
index b1905b7..9d80564 100644
--- a/gxp-debug-dump.h
+++ b/gxp-debug-dump.h
@@ -20,6 +20,7 @@
 #define GXP_NUM_COMMON_SEGMENTS 2
 #define GXP_NUM_CORE_SEGMENTS 8
 #define GXP_CORE_DRAM_SEGMENT_IDX 7
+#define GXP_NUM_BUFFER_MAPPINGS 32
 #define GXP_DEBUG_DUMP_CORE_SEGMENT_IDX_START (GXP_NUM_COMMON_SEGMENTS + 1)
 #define GXP_DEBUG_DUMP_DRAM_SEGMENT_IDX                                        \
 	(GXP_DEBUG_DUMP_CORE_SEGMENT_IDX_START + GXP_CORE_DRAM_SEGMENT_IDX)
@@ -45,6 +46,17 @@
 #define GXP_DEBUG_DUMP_INT_MASK BIT(GXP_DEBUG_DUMP_INT)
 #define GXP_DEBUG_DUMP_RETRY_NUM 5
 
+/*
+ * For debug dump, the kernel driver header file version must be the same as
+ * the firmware header file version. In other words,
+ * GXP_DEBUG_DUMP_HEADER_VERSION must be the same value as the value of
+ * kGxpDebugDumpHeaderVersion in firmware.
+ * Note: This needs to be updated when there are updates to gxp_core_dump and
+ * gxp_core_dump_header (or anything within the struct that may cause a mismatch
+ * with the firmware version of the debug dump header file).
+ */
+#define GXP_DEBUG_DUMP_HEADER_VERSION 0
+
 struct gxp_timer_registers {
 	u32 comparator;
 	u32 control;
@@ -110,13 +122,19 @@ struct gxp_lpm_registers {
 	struct gxp_lpm_psm_registers psm_regs[PSM_COUNT];
 };
 
+struct gxp_user_buffer {
+	u64 device_addr; /* Device address of user buffer */
+	u32 size; /* Size of user buffer */
+};
+
 struct gxp_core_header {
 	u32 core_id; /* Aurora core ID */
 	u32 dump_available; /* Dump data is available for core*/
 	u32 dump_req_reason; /* Code indicating reason for debug dump request */
-	u32 crash_reason; /* Error code identifying crash reason */
+	u32 header_version; /* Header file version */
 	u32 fw_version; /* Firmware version */
 	u32 core_dump_size; /* Size of core dump */
+	struct gxp_user_buffer user_bufs[GXP_NUM_BUFFER_MAPPINGS];
 };
 
 struct gxp_seg_header {
@@ -163,12 +181,6 @@ struct gxp_debug_dump_manager {
 	void *sscd_dev;
 	void *sscd_pdata;
 	dma_addr_t debug_dump_dma_handle; /* dma handle for debug dump */
-	/* Lock protects kernel_init_dump_pending and kernel_init_dump_waitq */
-	struct mutex lock;
-	/* Keep track of which cores have kernel-initiated core dump ready */
-	int kernel_init_dump_pending;
-	wait_queue_head_t kernel_init_dump_waitq;
-	struct work_struct wait_kernel_init_dump_work;
 	/*
 	 * Debug dump lock to ensure only one debug dump is being processed at a
 	 * time
diff --git a/gxp-debugfs.c b/gxp-debugfs.c
index 0d9dae6..8853c3b 100644
--- a/gxp-debugfs.c
+++ b/gxp-debugfs.c
@@ -5,23 +5,22 @@
  * Copyright (C) 2021 Google LLC
  */
 
-#ifdef CONFIG_GXP_CLOUDRIPPER
 #include <linux/acpm_dvfs.h>
-#endif
 
-#include "gxp.h"
 #include "gxp-client.h"
 #include "gxp-debug-dump.h"
 #include "gxp-debugfs.h"
-#include "gxp-firmware.h"
 #include "gxp-firmware-data.h"
+#include "gxp-firmware.h"
 #include "gxp-internal.h"
-#include "gxp-pm.h"
+#include "gxp-notification.h"
+#include "gxp-lpm.h"
 #include "gxp-mailbox.h"
+#include "gxp-pm.h"
 #include "gxp-telemetry.h"
-#include "gxp-lpm.h"
 #include "gxp-vd.h"
 #include "gxp-wakelock.h"
+#include "gxp.h"
 
 static int gxp_debugfs_lpm_test(void *data, u64 val)
 {
@@ -73,54 +72,18 @@ static int gxp_debugfs_mailbox(void *data, u64 val)
 }
 DEFINE_DEBUGFS_ATTRIBUTE(gxp_mailbox_fops, NULL, gxp_debugfs_mailbox, "%llu\n");
 
-static int gxp_debugfs_pingpong(void *data, u64 val)
-{
-	int core;
-	struct gxp_command cmd;
-	struct gxp_response resp;
-	struct gxp_dev *gxp = (struct gxp_dev *)data;
-
-	core = val / 1000;
-	if (core >= GXP_NUM_CORES) {
-		dev_notice(gxp->dev,
-			   "Mailbox for core %d doesn't exist.\n", core);
-		return -EINVAL;
-	}
-
-	if (gxp->mailbox_mgr == NULL ||
-	    gxp->mailbox_mgr->mailboxes[core] == NULL) {
-		dev_notice(
-			gxp->dev,
-			"Unable to send mailbox pingpong -- mailbox %d not ready\n",
-			core);
-		return -EINVAL;
-	}
-
-	cmd.code = GXP_MBOX_CODE_PINGPONG;
-	cmd.priority = 0;
-	cmd.buffer_descriptor.address = 0;
-	cmd.buffer_descriptor.size = 0;
-	cmd.buffer_descriptor.flags = (u32) val;
-
-	down_read(&gxp->vd_semaphore);
-	gxp_mailbox_execute_cmd(gxp->mailbox_mgr->mailboxes[core], &cmd, &resp);
-	up_read(&gxp->vd_semaphore);
-
-	dev_info(
-		gxp->dev,
-		"Mailbox Pingpong Sent to core %d: val=%d, resp.status=%d, resp.retval=%d\n",
-		core, cmd.buffer_descriptor.flags, resp.status, resp.retval);
-	return 0;
-}
-DEFINE_DEBUGFS_ATTRIBUTE(gxp_pingpong_fops, NULL, gxp_debugfs_pingpong,
-			 "%llu\n");
-
 static int gxp_firmware_run_set(void *data, u64 val)
 {
 	struct gxp_dev *gxp = (struct gxp_dev *) data;
 	struct gxp_client *client;
 	int ret = 0;
 
+	ret = gxp_firmware_request_if_needed(gxp);
+	if (ret) {
+		dev_err(gxp->dev, "Unable to request dsp firmware files\n");
+		return ret;
+	}
+
 	mutex_lock(&gxp->debugfs_client_lock);
 
 	if (val) {
@@ -157,8 +120,10 @@ static int gxp_firmware_run_set(void *data, u64 val)
 			goto err_wakelock;
 		}
 		gxp->debugfs_client->has_block_wakelock = true;
-		gxp_pm_update_requested_power_state(gxp, AUR_OFF, true, AUR_UUD,
-						    true);
+		gxp_pm_update_requested_power_states(gxp, AUR_OFF, true,
+						     AUR_UUD, true,
+						     AUR_MEM_UNDEFINED,
+						     AUR_MEM_UNDEFINED);
 
 		down_write(&gxp->vd_semaphore);
 		ret = gxp_vd_start(gxp->debugfs_client->vd);
@@ -181,8 +146,10 @@ static int gxp_firmware_run_set(void *data, u64 val)
 		 */
 		gxp_client_destroy(gxp->debugfs_client);
 		gxp->debugfs_client = NULL;
-		gxp_pm_update_requested_power_state(gxp, AUR_UUD, true, AUR_OFF,
-						    true);
+		gxp_pm_update_requested_power_states(gxp, AUR_UUD, true,
+						     AUR_OFF, true,
+						     AUR_MEM_UNDEFINED,
+						     AUR_MEM_UNDEFINED);
 	}
 
 out:
@@ -192,7 +159,9 @@ out:
 
 err_start:
 	gxp_wakelock_release(gxp);
-	gxp_pm_update_requested_power_state(gxp, AUR_UUD, true, AUR_OFF, true);
+	gxp_pm_update_requested_power_states(gxp, AUR_UUD, true, AUR_OFF, true,
+					     AUR_MEM_UNDEFINED,
+					     AUR_MEM_UNDEFINED);
 err_wakelock:
 	/* Destroying a client cleans up any VDss or wakelocks it held. */
 	gxp_client_destroy(gxp->debugfs_client);
@@ -205,7 +174,10 @@ static int gxp_firmware_run_get(void *data, u64 *val)
 {
 	struct gxp_dev *gxp = (struct gxp_dev *) data;
 
+	down_read(&gxp->vd_semaphore);
 	*val = gxp->firmware_running;
+	up_read(&gxp->vd_semaphore);
+
 	return 0;
 }
 
@@ -236,8 +208,10 @@ static int gxp_wakelock_set(void *data, u64 val)
 			goto out;
 		}
 		gxp->debugfs_wakelock_held = true;
-		gxp_pm_update_requested_power_state(gxp, AUR_OFF, true, AUR_UUD,
-						    true);
+		gxp_pm_update_requested_power_states(gxp, AUR_OFF, true,
+						     AUR_UUD, true,
+						     AUR_MEM_UNDEFINED,
+						     AUR_MEM_UNDEFINED);
 	} else {
 		/* Wakelock Release */
 		if (!gxp->debugfs_wakelock_held) {
@@ -248,8 +222,10 @@ static int gxp_wakelock_set(void *data, u64 val)
 
 		gxp_wakelock_release(gxp);
 		gxp->debugfs_wakelock_held = false;
-		gxp_pm_update_requested_power_state(gxp, AUR_UUD, true, AUR_OFF,
-						    true);
+		gxp_pm_update_requested_power_states(gxp, AUR_UUD, true,
+						     AUR_OFF, true,
+						     AUR_MEM_UNDEFINED,
+						     AUR_MEM_UNDEFINED);
 	}
 
 out:
@@ -301,8 +277,22 @@ DEFINE_DEBUGFS_ATTRIBUTE(gxp_blk_powerstate_fops, gxp_blk_powerstate_get,
 
 static int gxp_debugfs_coredump(void *data, u64 val)
 {
-	return gxp_debugfs_mailbox(data, GXP_MBOX_CODE_COREDUMP);
+	struct gxp_dev *gxp = (struct gxp_dev *)data;
+	int core;
+
+	down_read(&gxp->vd_semaphore);
+
+	for (core = 0; core < GXP_NUM_CORES; core++) {
+		if (gxp_is_fw_running(gxp, core))
+			gxp_notification_send(gxp, core,
+					      CORE_NOTIF_GENERATE_DEBUG_DUMP);
+	}
+
+	up_read(&gxp->vd_semaphore);
+
+	return 0;
 }
+
 DEFINE_DEBUGFS_ATTRIBUTE(gxp_coredump_fops, NULL, gxp_debugfs_coredump,
 			 "%llu\n");
 
@@ -475,8 +465,6 @@ void gxp_create_debugfs(struct gxp_dev *gxp)
 			    &gxp_lpm_test_fops);
 	debugfs_create_file("mailbox", 0200, gxp->d_entry, gxp,
 			    &gxp_mailbox_fops);
-	debugfs_create_file("pingpong", 0200, gxp->d_entry, gxp,
-			    &gxp_pingpong_fops);
 	debugfs_create_file("firmware_run", 0600, gxp->d_entry, gxp,
 			    &gxp_firmware_run_fops);
 	debugfs_create_file("wakelock", 0200, gxp->d_entry, gxp,
diff --git a/gxp-dma-iommu-gem5.c b/gxp-dma-iommu-gem5.c
deleted file mode 100644
index 8368dcb..0000000
--- a/gxp-dma-iommu-gem5.c
+++ /dev/null
@@ -1,577 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GXP DMA implemented via IOMMU without AUX domain and SSMT support.
- *
- * Copyright (C) 2021 Google LLC
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/iommu.h>
-#include <linux/platform_device.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-
-#include "gxp-config.h"
-#include "gxp-dma.h"
-#include "gxp-iova.h"
-#include "gxp-mapping.h"
-
-struct gxp_dma_iommu_manager {
-	struct gxp_dma_manager dma_mgr;
-	struct iommu_domain *default_domain;
-};
-
-/* Fault handler */
-
-static int sysmmu_fault_handler(struct iommu_fault *fault, void *token)
-{
-	struct gxp_dev *gxp = (struct gxp_dev *)token;
-
-	switch (fault->type) {
-	case IOMMU_FAULT_DMA_UNRECOV:
-		dev_err(gxp->dev, "Unrecoverable IOMMU fault!\n");
-		break;
-	case IOMMU_FAULT_PAGE_REQ:
-		dev_err(gxp->dev, "IOMMU page request fault!\n");
-		break;
-	default:
-		dev_err(gxp->dev, "Unexpected IOMMU fault type (%d)\n",
-			fault->type);
-		return -EAGAIN;
-	}
-
-	/*
-	 * Normally the iommu driver should fill out the `event` struct for
-	 * unrecoverable errors, and the `prm` struct for page request faults.
-	 * The SysMMU driver, instead, always fills out the `event` struct.
-	 *
-	 * Note that the `fetch_addr` and `perm` fields are never filled out,
-	 * so we skip printing them.
-	 */
-	dev_err(gxp->dev, "reason = %08X\n", fault->event.reason);
-	dev_err(gxp->dev, "flags = %08X\n", fault->event.flags);
-	dev_err(gxp->dev, "pasid = %08X\n", fault->event.pasid);
-	dev_err(gxp->dev, "addr = %llX\n", fault->event.addr);
-
-	// Tell the IOMMU driver to carry on
-	return -EAGAIN;
-}
-
-/* gxp-dma.h Interface */
-
-int gxp_dma_init(struct gxp_dev *gxp)
-{
-	struct gxp_dma_iommu_manager *mgr;
-	int ret;
-
-	/* GXP can only address 32-bit IOVAs */
-	ret = dma_set_mask_and_coherent(gxp->dev, DMA_BIT_MASK(32));
-	if (ret) {
-		dev_err(gxp->dev, "Failed to set DMA mask\n");
-		return ret;
-	}
-
-	mgr = devm_kzalloc(gxp->dev, sizeof(*mgr), GFP_KERNEL);
-	if (!mgr)
-		return -ENOMEM;
-
-	mgr->default_domain = iommu_get_domain_for_dev(gxp->dev);
-	if (!mgr->default_domain) {
-		dev_err(gxp->dev, "Failed to find default IOMMU domain\n");
-		return -EIO;
-	}
-
-	if (iommu_register_device_fault_handler(gxp->dev, sysmmu_fault_handler,
-						gxp)) {
-		dev_err(gxp->dev, "Failed to register iommu fault handler\n");
-		return -EIO;
-	}
-
-	gxp->dma_mgr = &(mgr->dma_mgr);
-
-	return 0;
-}
-
-void gxp_dma_exit(struct gxp_dev *gxp)
-{
-	if (iommu_unregister_device_fault_handler(gxp->dev))
-		dev_err(gxp->dev,
-			"Failed to unregister SysMMU fault handler\n");
-}
-
-#define SYNC_BARRIERS_SIZE 0x100000
-#define SYNC_BARRIERS_TOP_OFFSET 0x100000
-#define EXT_TPU_MBX_SIZE 0x2000
-
-/* Offset from mailbox base to the device interface that needs to be mapped */
-#define MAILBOX_DEVICE_INTERFACE_OFFSET 0x10000
-
-void gxp_dma_init_default_resources(struct gxp_dev *gxp)
-{
-	unsigned int core;
-
-	for (core = 0; core < GXP_NUM_CORES; core++) {
-		gxp->mbx[core].daddr = GXP_IOVA_MAILBOX(core);
-		gxp->fwbufs[core].daddr = GXP_IOVA_FIRMWARE(core);
-	}
-	gxp->regs.daddr = GXP_IOVA_AURORA_TOP;
-	gxp->coredumpbuf.daddr = GXP_IOVA_CORE_DUMP;
-	gxp->fwdatabuf.daddr = GXP_IOVA_FW_DATA;
-}
-
-int gxp_dma_map_core_resources(struct gxp_dev *gxp,
-			       struct gxp_virtual_device *vd, uint virt_core,
-			       uint core)
-{
-	struct gxp_dma_iommu_manager *mgr = container_of(
-		gxp->dma_mgr, struct gxp_dma_iommu_manager, dma_mgr);
-	int ret = 0;
-
-	ret = iommu_map(mgr->default_domain, GXP_IOVA_AURORA_TOP,
-			gxp->regs.paddr, gxp->regs.size,
-			IOMMU_READ | IOMMU_WRITE);
-	if (ret)
-		goto err;
-	/*
-	 * Firmware expects to access the sync barriers at a separate
-	 * address, lower than the rest of the AURORA_TOP registers.
-	 */
-	ret = iommu_map(mgr->default_domain, GXP_IOVA_SYNC_BARRIERS,
-			gxp->regs.paddr + SYNC_BARRIERS_TOP_OFFSET,
-			SYNC_BARRIERS_SIZE, IOMMU_READ | IOMMU_WRITE);
-	if (ret)
-		goto err;
-	/*
-	 * TODO(b/202213606): Map FW regions of all cores in a VD for
-	 * each other at VD creation.
-	 */
-	ret = iommu_map(mgr->default_domain, GXP_IOVA_FIRMWARE(0),
-			gxp->fwbufs[0].paddr,
-			gxp->fwbufs[0].size * GXP_NUM_CORES,
-			IOMMU_READ | IOMMU_WRITE);
-	if (ret)
-		goto err;
-	ret = iommu_map(mgr->default_domain, GXP_IOVA_CORE_DUMP,
-			gxp->coredumpbuf.paddr, gxp->coredumpbuf.size,
-			IOMMU_READ | IOMMU_WRITE);
-	if (ret)
-		goto err;
-	ret = iommu_map(mgr->default_domain, GXP_IOVA_FW_DATA,
-			gxp->fwdatabuf.paddr, gxp->fwdatabuf.size,
-			IOMMU_READ | IOMMU_WRITE);
-	if (ret)
-		goto err;
-	ret = iommu_map(mgr->default_domain, GXP_IOVA_MAILBOX(core),
-			gxp->mbx[core].paddr +
-				MAILBOX_DEVICE_INTERFACE_OFFSET,
-			gxp->mbx[core].size, IOMMU_READ | IOMMU_WRITE);
-	if (ret)
-		goto err;
-	/* Only map the TPU mailboxes if they were found on probe */
-	if (gxp->tpu_dev.mbx_paddr) {
-		ret = iommu_map(
-			mgr->default_domain,
-			GXP_IOVA_EXT_TPU_MBX + core * EXT_TPU_MBX_SIZE,
-			gxp->tpu_dev.mbx_paddr +
-				core * EXT_TPU_MBX_SIZE,
-			EXT_TPU_MBX_SIZE, IOMMU_READ | IOMMU_WRITE);
-		if (ret)
-			goto err;
-	}
-
-	return ret;
-
-err:
-	/*
-	 * Attempt to unmap all resources.
-	 * Any resource that hadn't been mapped yet will cause `iommu_unmap()`
-	 * to return immediately, so its safe to try to unmap everything.
-	 */
-	gxp_dma_unmap_core_resources(gxp, vd, virt_core, core);
-	return ret;
-}
-
-void gxp_dma_unmap_core_resources(struct gxp_dev *gxp,
-				  struct gxp_virtual_device *vd, uint virt_core,
-				  uint core)
-{
-	struct gxp_dma_iommu_manager *mgr = container_of(
-		gxp->dma_mgr, struct gxp_dma_iommu_manager, dma_mgr);
-
-	iommu_unmap(mgr->default_domain, GXP_IOVA_AURORA_TOP, gxp->regs.size);
-	iommu_unmap(mgr->default_domain, GXP_IOVA_SYNC_BARRIERS,
-		    SYNC_BARRIERS_SIZE);
-	/*
-	 * TODO(b/202213606): A core should only have access to the FW
-	 * of other cores if they're in the same VD, and have the FW
-	 * region unmapped on VD destruction.
-	 */
-	iommu_unmap(mgr->default_domain, GXP_IOVA_FIRMWARE(0),
-		    gxp->fwbufs[0].size * GXP_NUM_CORES);
-	iommu_unmap(mgr->default_domain, GXP_IOVA_CORE_DUMP,
-		    gxp->coredumpbuf.size);
-	iommu_unmap(mgr->default_domain, GXP_IOVA_FW_DATA, gxp->fwdatabuf.size);
-	iommu_unmap(mgr->default_domain, GXP_IOVA_MAILBOX(core),
-		    gxp->mbx[core].size);
-	/* Only unmap the TPU mailboxes if they were found on probe */
-	if (gxp->tpu_dev.mbx_paddr) {
-		iommu_unmap(mgr->default_domain,
-			    GXP_IOVA_EXT_TPU_MBX +
-				    core * EXT_TPU_MBX_SIZE,
-			    EXT_TPU_MBX_SIZE);
-	}
-}
-
-static inline struct sg_table *alloc_sgt_for_buffer(void *ptr, size_t size,
-						    struct iommu_domain *domain,
-						    dma_addr_t daddr)
-{
-	struct sg_table *sgt;
-	ulong offset;
-	uint num_ents;
-	int ret;
-	struct scatterlist *next;
-	size_t size_in_page;
-	struct page *page;
-	void *va_base = ptr;
-
-	/* Calculate the number of entries needed in the table */
-	offset = offset_in_page(va_base);
-	if (unlikely((size + offset) / PAGE_SIZE >= UINT_MAX - 1 ||
-		     size + offset < size))
-		return ERR_PTR(-EINVAL);
-	num_ents = (size + offset) / PAGE_SIZE;
-	if ((size + offset) % PAGE_SIZE)
-		num_ents++;
-
-	/* Allocate and setup the table for filling out */
-	sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
-	if (!sgt)
-		return ERR_PTR(-ENOMEM);
-
-	ret = sg_alloc_table(sgt, num_ents, GFP_KERNEL);
-	if (ret) {
-		kfree(sgt);
-		return ERR_PTR(ret);
-	}
-	next = sgt->sgl;
-
-	/*
-	 * Fill in the first scatterlist entry.
-	 * This is the only one which may start at a non-page-aligned address.
-	 */
-	size_in_page = size > (PAGE_SIZE - offset_in_page(ptr)) ?
-			       PAGE_SIZE - offset_in_page(ptr) :
-			       size;
-	page = phys_to_page(iommu_iova_to_phys(domain, daddr));
-	sg_set_page(next, page, size_in_page, offset_in_page(ptr));
-	size -= size_in_page;
-	ptr += size_in_page;
-	next = sg_next(next);
-
-	while (size > 0) {
-		/*
-		 * Fill in and link the next scatterlist entry.
-		 * `ptr` is now page-aligned, so it is only necessary to check
-		 * if this entire page is part of the buffer, or if the buffer
-		 * ends part way through the page (which means this is the last
-		 * entry in the list).
-		 */
-		size_in_page = size > PAGE_SIZE ? PAGE_SIZE : size;
-		page = phys_to_page(iommu_iova_to_phys(
-			domain, daddr + (unsigned long long)(ptr - va_base)));
-		sg_set_page(next, page, size_in_page, 0);
-
-		size -= size_in_page;
-		ptr += size_in_page;
-		next = sg_next(next);
-	}
-
-	return sgt;
-}
-
-#if IS_ENABLED(CONFIG_ANDROID) && !IS_ENABLED(CONFIG_GXP_GEM5)
-int gxp_dma_map_tpu_buffer(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-			   uint virt_core_list, uint core_list
-			   struct edgetpu_ext_mailbox_info *mbx_info)
-{
-	struct gxp_dma_iommu_manager *mgr = container_of(
-		gxp->dma_mgr, struct gxp_dma_iommu_manager, dma_mgr);
-	uint orig_virt_core_list = virt_core_list;
-	u64 queue_iova;
-	int core;
-	int ret;
-	int i = 0;
-
-	while (virt_core_list) {
-		phys_addr_t cmdq_pa = mbx_info->mailboxes[i].cmdq_pa;
-		phys_addr_t respq_pa = mbx_info->mailboxes[i++].respq_pa;
-
-		virt_core = ffs(virt_core_list) - 1;
-		virt_core_list &= ~BIT(virt_core);
-		core = ffs(core_list) - 1;
-		core_list &= ~BIT(core);
-		queue_iova = GXP_IOVA_TPU_MBX_BUFFER(core);
-		ret = iommu_map(mgr->default_domain, queue_iova, cmdq_pa,
-				mbx_info->cmdq_size, IOMMU_WRITE);
-		if (ret)
-			goto error;
-		ret = iommu_map(mgr->default_domain,
-				queue_iova + mbx_info->cmdq_size, respq_pa,
-				mbx_info->respq_size, IOMMU_READ);
-		if (ret) {
-			iommu_unmap(mgr->default_domain, queue_iova,
-				    mbx_info->cmdq_size);
-			goto error;
-		}
-	}
-	return 0;
-
-error:
-	virt_core_list ^= orig_virt_core_list;
-	while (virt_core_list) {
-		virt_core = ffs(virt_core_list) - 1;
-		virt_core_list &= ~BIT(virt_core);
-		core = ffs(core_list) - 1;
-		core_list &= ~BIT(core);
-		queue_iova = GXP_IOVA_TPU_MBX_BUFFER(core);
-		iommu_unmap(mgr->default_domain, queue_iova,
-			    mbx_info->cmdq_size);
-		iommu_unmap(mgr->default_domain,
-			    queue_iova + mbx_info->cmdq_size,
-			    mbx_info->respq_size);
-	}
-	return ret;
-}
-
-void gxp_dma_unmap_tpu_buffer(struct gxp_dev *gxp,
-			      struct gxp_virtual_device *vd,
-			      struct gxp_tpu_mbx_desc mbx_desc)
-{
-	struct gxp_dma_iommu_manager *mgr = container_of(
-		gxp->dma_mgr, struct gxp_dma_iommu_manager, dma_mgr);
-	uint virt_core_list = mbx_desc.virt_core_list;
-	uint core_list = mbx_desc.phys_core_list;
-	u64 queue_iova;
-	int core;
-	uint virt_core;
-
-	while (virt_core_list) {
-		virt_core = ffs(virt_core_list) - 1;
-		virt_core_list &= ~BIT(virt_core);
-		core = ffs(core_list) - 1;
-		core_list &= ~BIT(core);
-		queue_iova = GXP_IOVA_TPU_MBX_BUFFER(core);
-		iommu_unmap(mgr->default_domain, queue_iova,
-			    mbx_desc.cmdq_size);
-		iommu_unmap(mgr->default_domain,
-			    queue_iova + mbx_desc.cmdq_size,
-			    mbx_desc.respq_size);
-	}
-}
-#endif // CONFIG_ANDROID && !CONFIG_GXP_GEM5
-
-int gxp_dma_ssmt_program(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-			 uint virt_core, uint core)
-{
-	/* NO-OP when aux domains are not supported */
-	return 0;
-}
-int gxp_dma_domain_attach_device(struct gxp_dev *gxp,
-				 struct gxp_virtual_device *vd, uint virt_core,
-				 uint core)
-{
-	/* NO-OP when aux domains are not supported */
-	return 0;
-}
-
-void gxp_dma_domain_detach_device(struct gxp_dev *gxp,
-				  struct gxp_virtual_device *vd, uint virt_core)
-{
-	/* NO-OP when aux domains are not supported */
-}
-
-int gxp_dma_map_allocated_coherent_buffer(struct gxp_dev *gxp, void *buf,
-					  struct gxp_virtual_device *vd,
-					  uint virt_core_list, size_t size,
-					  dma_addr_t dma_handle,
-					  uint gxp_dma_flags)
-{
-	/* NO-OP when aux domains are not supported */
-	return 0;
-}
-
-void gxp_dma_unmap_allocated_coherent_buffer(struct gxp_dev *gxp,
-					     struct gxp_virtual_device *vd,
-					     uint virt_core_list, size_t size,
-					     dma_addr_t dma_handle)
-{
-	/* NO-OP when aux domains are not supported */
-}
-
-void *gxp_dma_alloc_coherent(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-			     uint virt_core_list, size_t size,
-			     dma_addr_t *dma_handle, gfp_t flag,
-			     uint gxp_dma_flags)
-{
-	struct gxp_dma_iommu_manager *mgr = container_of(
-		gxp->dma_mgr, struct gxp_dma_iommu_manager, dma_mgr);
-	void *buf;
-	struct sg_table *sgt;
-	dma_addr_t daddr;
-
-	size = size < PAGE_SIZE ? PAGE_SIZE : size;
-
-	/* Allocate a coherent buffer in the default domain */
-	buf = dma_alloc_coherent(gxp->dev, size, &daddr, flag);
-	if (!buf) {
-		dev_err(gxp->dev, "Failed to allocate coherent buffer\n");
-		return NULL;
-	}
-
-	if (dma_handle)
-		*dma_handle = daddr;
-
-	return buf;
-}
-
-void gxp_dma_free_coherent(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-			   uint virt_core_list, size_t size, void *cpu_addr,
-			   dma_addr_t dma_handle)
-{
-	size = size < PAGE_SIZE ? PAGE_SIZE : size;
-
-	dma_free_coherent(gxp->dev, size, cpu_addr, dma_handle);
-}
-
-dma_addr_t gxp_dma_map_single(struct gxp_dev *gxp,
-			      struct gxp_virtual_device *vd,
-			      uint virt_core_list, void *cpu_addr, size_t size,
-			      enum dma_data_direction direction,
-			      unsigned long attrs, uint gxp_dma_flags)
-{
-	dma_addr_t daddr;
-
-	daddr = dma_map_single_attrs(gxp->dev, cpu_addr, size, direction,
-				     attrs);
-	if (dma_mapping_error(gxp->dev, daddr))
-		return DMA_MAPPING_ERROR;
-
-	return daddr;
-}
-
-void gxp_dma_unmap_single(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-			  uint virt_core_list, dma_addr_t dma_addr, size_t size,
-			  enum dma_data_direction direction,
-			  unsigned long attrs)
-{
-	dma_unmap_single_attrs(gxp->dev, dma_addr, size, direction, attrs);
-}
-
-dma_addr_t gxp_dma_map_page(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-			    uint virt_core_list, struct page *page,
-			    unsigned long offset, size_t size,
-			    enum dma_data_direction direction,
-			    unsigned long attrs, uint gxp_dma_flags)
-{
-	dma_addr_t daddr;
-
-	daddr = dma_map_page_attrs(gxp->dev, page, offset, size, direction,
-				   attrs);
-	if (dma_mapping_error(gxp->dev, daddr))
-		return DMA_MAPPING_ERROR;
-
-	return daddr;
-}
-
-void gxp_dma_unmap_page(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-			uint virt_core_list, dma_addr_t dma_addr, size_t size,
-			enum dma_data_direction direction, unsigned long attrs)
-{
-	dma_unmap_page_attrs(gxp->dev, dma_addr, size, direction, attrs);
-}
-
-dma_addr_t gxp_dma_map_resource(struct gxp_dev *gxp,
-				struct gxp_virtual_device *vd,
-				uint virt_core_list, phys_addr_t phys_addr,
-				size_t size, enum dma_data_direction direction,
-				unsigned long attrs, uint gxp_dma_flags)
-{
-	dma_addr_t daddr;
-
-	daddr = dma_map_resource(gxp->dev, phys_addr, size, direction, attrs);
-	if (dma_mapping_error(gxp->dev, daddr))
-		return DMA_MAPPING_ERROR;
-
-	return daddr;
-}
-
-void gxp_dma_unmap_resource(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-			    uint virt_core_list, dma_addr_t dma_addr,
-			    size_t size, enum dma_data_direction direction,
-			    unsigned long attrs)
-{
-	dma_unmap_resource(gxp->dev, dma_addr, size, direction, attrs);
-}
-
-int gxp_dma_map_sg(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-		   int virt_core_list, struct scatterlist *sg, int nents,
-		   enum dma_data_direction direction, unsigned long attrs,
-		   uint gxp_dma_flags)
-{
-	return dma_map_sg_attrs(gxp->dev, sg, nents, direction, attrs);
-}
-
-void gxp_dma_unmap_sg(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-		      uint virt_core_list, struct scatterlist *sg, int nents,
-		      enum dma_data_direction direction, unsigned long attrs)
-{
-	dma_unmap_sg_attrs(gxp->dev, sg, nents, direction, attrs);
-}
-
-void gxp_dma_sync_single_for_cpu(struct gxp_dev *gxp, dma_addr_t dma_handle,
-				 size_t size, enum dma_data_direction direction)
-{
-	/* Syncing is not domain specific. Just call through to DMA API */
-	dma_sync_single_for_cpu(gxp->dev, dma_handle, size, direction);
-}
-
-void gxp_dma_sync_single_for_device(struct gxp_dev *gxp, dma_addr_t dma_handle,
-				    size_t size,
-				    enum dma_data_direction direction)
-{
-	/* Syncing is not domain specific. Just call through to DMA API */
-	dma_sync_single_for_device(gxp->dev, dma_handle, size, direction);
-}
-
-void gxp_dma_sync_sg_for_cpu(struct gxp_dev *gxp, struct scatterlist *sg,
-			     int nents, enum dma_data_direction direction)
-{
-	/* Syncing is not domain specific. Just call through to DMA API */
-	dma_sync_sg_for_cpu(gxp->dev, sg, nents, direction);
-}
-
-void gxp_dma_sync_sg_for_device(struct gxp_dev *gxp, struct scatterlist *sg,
-				int nents, enum dma_data_direction direction)
-{
-	/* Syncing is not domain specific. Just call through to DMA API */
-	dma_sync_sg_for_device(gxp->dev, sg, nents, direction);
-}
-
-struct sg_table *gxp_dma_map_dmabuf_attachment(
-	struct gxp_dev *gxp, struct gxp_virtual_device *vd, uint virt_core_list,
-	struct dma_buf_attachment *attachment,
-	enum dma_data_direction direction)
-{
-	return dma_buf_map_attachment(attachment, direction);
-}
-
-void gxp_dma_unmap_dmabuf_attachment(struct gxp_dev *gxp,
-				     struct gxp_virtual_device *vd,
-				     uint virt_core_list,
-				     struct dma_buf_attachment *attachment,
-				     struct sg_table *sgt,
-				     enum dma_data_direction direction)
-{
-	dma_buf_unmap_attachment(attachment, sgt, direction);
-}
diff --git a/gxp-dma-iommu.c b/gxp-dma-iommu.c
index caedac3..3e0fc6c 100644
--- a/gxp-dma-iommu.c
+++ b/gxp-dma-iommu.c
@@ -441,7 +441,7 @@ alloc_sgt_for_buffer(void *ptr, size_t size,
 	return sgt;
 }
 
-#if IS_ENABLED(CONFIG_ANDROID) && !IS_ENABLED(CONFIG_GXP_GEM5)
+#if (IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_ANDROID)) && !IS_ENABLED(CONFIG_GXP_GEM5)
 int gxp_dma_map_tpu_buffer(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
 			   uint virt_core_list, uint core_list,
 			   struct edgetpu_ext_mailbox_info *mbx_info)
@@ -515,7 +515,7 @@ void gxp_dma_unmap_tpu_buffer(struct gxp_dev *gxp,
 			    mbx_desc.cmdq_size, mbx_desc.respq_size);
 	}
 }
-#endif  // CONFIG_ANDROID && !CONFIG_GXP_GEM5
+#endif  // (CONFIG_GXP_TEST || CONFIG_ANDROID) && !CONFIG_GXP_GEM5
 
 int gxp_dma_map_allocated_coherent_buffer(struct gxp_dev *gxp, void *buf,
 					  struct gxp_virtual_device *vd,
diff --git a/gxp-dma.h b/gxp-dma.h
index 71c3c50..cf05e57 100644
--- a/gxp-dma.h
+++ b/gxp-dma.h
@@ -11,7 +11,7 @@
 #include <linux/dma-direction.h>
 #include <linux/dma-mapping.h>
 #include <linux/types.h>
-#if IS_ENABLED(CONFIG_ANDROID) && !IS_ENABLED(CONFIG_GXP_GEM5)
+#if (IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_ANDROID)) && !IS_ENABLED(CONFIG_GXP_GEM5)
 #include <soc/google/tpu-ext.h>
 #endif
 
@@ -136,7 +136,7 @@ void gxp_dma_unmap_core_resources(struct gxp_dev *gxp,
 				  struct gxp_virtual_device *vd, uint virt_core,
 				  uint core);
 
-#if IS_ENABLED(CONFIG_ANDROID) && !IS_ENABLED(CONFIG_GXP_GEM5)
+#if (IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_ANDROID)) && !IS_ENABLED(CONFIG_GXP_GEM5)
 /**
  * gxp_dma_map_tpu_buffer() - Map the tpu mbx queue buffers with fixed IOVAs
  * @gxp: The GXP device to set up the mappings for
@@ -169,7 +169,7 @@ int gxp_dma_map_tpu_buffer(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
 void gxp_dma_unmap_tpu_buffer(struct gxp_dev *gxp,
 			      struct gxp_virtual_device *vd,
 			      struct gxp_tpu_mbx_desc mbx_desc);
-#endif  // CONFIG_ANDROID && !CONFIG_GXP_GEM5
+#endif  // (CONFIG_GXP_TEST || CONFIG_ANDROID) && !CONFIG_GXP_GEM5
 
 /**
  * gxp_dma_map_allocated_coherent_buffer() - Map a coherent buffer
diff --git a/gxp-dmabuf.c b/gxp-dmabuf.c
index c3ef1eb..789efeb 100644
--- a/gxp-dmabuf.c
+++ b/gxp-dmabuf.c
@@ -29,6 +29,26 @@ struct gxp_dmabuf_mapping {
 	struct sg_table *sgt;
 };
 
+/* Mapping destructor for gxp_mapping_put() to call */
+static void destroy_dmabuf_mapping(struct gxp_mapping *mapping)
+{
+	struct gxp_dmabuf_mapping *dmabuf_mapping;
+	struct gxp_dev *gxp = mapping->gxp;
+	struct gxp_virtual_device *vd = mapping->vd;
+
+	/* Unmap and detach the dma-buf */
+	dmabuf_mapping =
+		container_of(mapping, struct gxp_dmabuf_mapping, mapping);
+
+	gxp_dma_unmap_dmabuf_attachment(gxp, vd, mapping->virt_core_list,
+					dmabuf_mapping->attachment,
+					dmabuf_mapping->sgt, mapping->dir);
+	dma_buf_detach(dmabuf_mapping->dmabuf, dmabuf_mapping->attachment);
+	dma_buf_put(dmabuf_mapping->dmabuf);
+
+	kfree(dmabuf_mapping);
+}
+
 struct gxp_mapping *gxp_dmabuf_map(struct gxp_dev *gxp,
 				   struct gxp_virtual_device *vd,
 				   uint virt_core_list, int fd, u32 flags,
@@ -40,6 +60,9 @@ struct gxp_mapping *gxp_dmabuf_map(struct gxp_dev *gxp,
 	struct gxp_dmabuf_mapping *dmabuf_mapping;
 	int ret = 0;
 
+	if (!valid_dma_direction(dir))
+		return ERR_PTR(-EINVAL);
+
 	dmabuf = dma_buf_get(fd);
 	if (IS_ERR(dmabuf)) {
 		dev_err(gxp->dev, "Failed to get dma-buf to map (ret=%ld)\n",
@@ -71,24 +94,20 @@ struct gxp_mapping *gxp_dmabuf_map(struct gxp_dev *gxp,
 	}
 
 	/* dma-buf mappings are indicated by a host_address of 0 */
+	refcount_set(&dmabuf_mapping->mapping.refcount, 1);
+	dmabuf_mapping->mapping.destructor = destroy_dmabuf_mapping;
 	dmabuf_mapping->mapping.host_address = 0;
+	dmabuf_mapping->mapping.gxp = gxp;
 	dmabuf_mapping->mapping.virt_core_list = virt_core_list;
+	dmabuf_mapping->mapping.vd = vd;
 	dmabuf_mapping->mapping.device_address = sg_dma_address(sgt->sgl);
 	dmabuf_mapping->mapping.dir = dir;
 	dmabuf_mapping->dmabuf = dmabuf;
 	dmabuf_mapping->attachment = attachment;
 	dmabuf_mapping->sgt = sgt;
-	ret = gxp_mapping_put(gxp, &dmabuf_mapping->mapping);
-	if (ret) {
-		dev_err(gxp->dev,
-			"Failed to store mapping for dma-buf (ret=%d)\n", ret);
-		goto err_put_mapping;
-	}
 
 	return &dmabuf_mapping->mapping;
 
-err_put_mapping:
-	kfree(dmabuf_mapping);
 err_alloc_mapping:
 	gxp_dma_unmap_dmabuf_attachment(gxp, vd, virt_core_list, attachment, sgt, dir);
 err_map_attachment:
@@ -97,35 +116,3 @@ err_attach:
 	dma_buf_put(dmabuf);
 	return ERR_PTR(ret);
 }
-
-void gxp_dmabuf_unmap(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-		      dma_addr_t device_address)
-{
-	struct gxp_dmabuf_mapping *dmabuf_mapping;
-	struct gxp_mapping *mapping;
-
-	/*
-	 * Fetch and remove the internal mapping records.
-	 * If host_address is not 0, the provided device_address belongs to a
-	 * non-dma-buf mapping.
-	 */
-	mapping = gxp_mapping_get(gxp, device_address);
-	if (IS_ERR_OR_NULL(mapping) || mapping->host_address) {
-		dev_warn(gxp->dev, "No dma-buf mapped for given IOVA\n");
-		return;
-	}
-
-	gxp_mapping_remove(gxp, mapping);
-
-	/* Unmap and detach the dma-buf */
-	dmabuf_mapping =
-		container_of(mapping, struct gxp_dmabuf_mapping, mapping);
-
-	gxp_dma_unmap_dmabuf_attachment(gxp, vd, mapping->virt_core_list,
-					dmabuf_mapping->attachment,
-					dmabuf_mapping->sgt, mapping->dir);
-	dma_buf_detach(dmabuf_mapping->dmabuf, dmabuf_mapping->attachment);
-	dma_buf_put(dmabuf_mapping->dmabuf);
-
-	kfree(dmabuf_mapping);
-}
diff --git a/gxp-dmabuf.h b/gxp-dmabuf.h
index bff95ea..5803841 100644
--- a/gxp-dmabuf.h
+++ b/gxp-dmabuf.h
@@ -14,7 +14,7 @@
 #include "gxp-mapping.h"
 
 /**
- * gxp_dmabuf_map() - Map a dma-buf for access by the specified physical cores
+ * gxp_dmabuf_map() - Map a dma-buf for access by the specified virtual device
  * @gxp: The GXP device to map the dma-buf for
  * @vd: The virtual device includes the virtual cores the dma-buf is mapped for
  * @virt_core_list: A bitfield enumerating the virtual cores the mapping is for
@@ -22,6 +22,8 @@
  * @flags: The type of mapping to create; Currently unused
  * @direction: DMA direction
  *
+ * If successful, the mapping will be initialized with a reference count of 1
+ *
  * Return: The structure that was created and is being tracked to describe the
  *         mapping of the dma-buf. Returns ERR_PTR on failure.
  */
@@ -30,15 +32,4 @@ struct gxp_mapping *gxp_dmabuf_map(struct gxp_dev *gxp,
 				   uint virt_core_list, int fd, u32 flags,
 				   enum dma_data_direction dir);
 
-/**
- * gxp_dmabuf_unmap - Unmap a dma-buf previously mapped with `gxp_dmabuf_map()`
- * @gxp: The GXP device the dma-buf was mapped for.
- * @vd: The virtual device includes the virtual cores the dma-buf was mapped for
- * @device_address: The IOVA the dma-buf was mapped to. Should be obtained from
- *                  the `device_address` field of the `struct gxp_mapping`
- *                  returned by `gxp_dmabuf_map()`
- */
-void gxp_dmabuf_unmap(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
-		      dma_addr_t device_address);
-
 #endif /* __GXP_DMABUF_H__ */
diff --git a/gxp-domain-pool.c b/gxp-domain-pool.c
new file mode 100644
index 0000000..53a5b38
--- /dev/null
+++ b/gxp-domain-pool.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GXP IOMMU domain allocator.
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+
+#include <linux/idr.h>
+#include <linux/iommu.h>
+#include <linux/slab.h>
+
+#include "gxp-domain-pool.h"
+#include "gxp-internal.h"
+
+int gxp_domain_pool_init(struct gxp_dev *gxp, struct gxp_domain_pool *pool,
+			 unsigned int size)
+{
+	unsigned int i;
+	struct iommu_domain *domain;
+
+	pool->size = size;
+	pool->gxp = gxp;
+
+	if (!size)
+		return 0;
+
+	dev_dbg(pool->gxp->dev, "Initializing domain pool with %u domains\n", size);
+
+	ida_init(&pool->idp);
+	pool->array = vzalloc(sizeof(*pool->array) * size);
+	if (!pool->array) {
+		dev_err(gxp->dev, "Failed to allocate memory for domain pool array\n");
+		return -ENOMEM;
+	}
+	for (i = 0; i < size; i++) {
+		domain = iommu_domain_alloc(pool->gxp->dev->bus);
+		if (!domain) {
+			dev_err(pool->gxp->dev,
+				"Failed to allocate iommu domain %d of %u\n",
+				i + 1, size);
+			gxp_domain_pool_destroy(pool);
+			return -ENOMEM;
+		}
+		pool->array[i] = domain;
+	}
+	return 0;
+}
+
+struct iommu_domain *gxp_domain_pool_alloc(struct gxp_domain_pool *pool)
+{
+	int id;
+
+	if (!pool->size)
+		return iommu_domain_alloc(pool->gxp->dev->bus);
+
+	id = ida_alloc_max(&pool->idp, pool->size - 1, GFP_KERNEL);
+
+	if (id < 0) {
+		dev_err(pool->gxp->dev,
+			"No more domains available from pool of size %u\n",
+			pool->size);
+		return NULL;
+	}
+
+	dev_dbg(pool->gxp->dev, "Allocated domain from pool with id = %d\n", id);
+
+	return pool->array[id];
+}
+
+void gxp_domain_pool_free(struct gxp_domain_pool *pool, struct iommu_domain *domain)
+{
+	int id;
+
+	if (!pool->size) {
+		iommu_domain_free(domain);
+		return;
+	}
+	for (id = 0; id < pool->size; id++) {
+		if (pool->array[id] == domain) {
+			dev_dbg(pool->gxp->dev, "Released domain from pool with id = %d\n", id);
+			ida_free(&pool->idp, id);
+			return;
+		}
+	}
+	dev_err(pool->gxp->dev, "%s: domain not found in pool", __func__);
+}
+
+void gxp_domain_pool_destroy(struct gxp_domain_pool *pool)
+{
+	int i;
+
+	if (!pool->size)
+		return;
+
+	dev_dbg(pool->gxp->dev, "Destroying domain pool with %u domains\n", pool->size);
+
+	for (i = 0; i < pool->size; i++) {
+		if (pool->array[i])
+			iommu_domain_free(pool->array[i]);
+	}
+
+	ida_destroy(&pool->idp);
+	vfree(pool->array);
+}
diff --git a/gxp-domain-pool.h b/gxp-domain-pool.h
new file mode 100644
index 0000000..ee95155
--- /dev/null
+++ b/gxp-domain-pool.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IOMMU domain allocator for gxp
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+
+#ifndef __GXP_DOMAIN_POOL_H__
+#define __GXP_DOMAIN_POOL_H__
+
+#include <linux/idr.h>
+#include <linux/iommu.h>
+
+#include "gxp-internal.h"
+
+struct gxp_domain_pool {
+	struct ida idp;			/* ID allocator to keep track of used domains. */
+	/*
+	 * Size of the pool. Can be set to 0, in which case the implementation will fall back to
+	 * dynamic domain allocation using the IOMMU API directly.
+	 */
+	unsigned int size;
+	struct iommu_domain **array;	/* Array holding the pointers to pre-allocated domains. */
+	struct gxp_dev *gxp;	/* The gxp device used for logging warnings/errors. */
+};
+
+
+/*
+ * Initializes a domain pool.
+ *
+ * @gxp: pointer to gxp device.
+ * @pool: caller-allocated pool structure.
+ * @size: size of the pre-allocated domains pool.
+ * Set to zero to fall back to dynamically allocated domains.
+ *
+ * returns 0 on success or negative error value.
+ */
+int gxp_domain_pool_init(struct gxp_dev *gxp, struct gxp_domain_pool *pool,
+			 unsigned int size);
+
+/*
+ * Allocates a domain from the pool
+ * returns NULL on error.
+ */
+struct iommu_domain *gxp_domain_pool_alloc(struct gxp_domain_pool *pool);
+
+/* Releases a domain from the pool. */
+void gxp_domain_pool_free(struct gxp_domain_pool *pool, struct iommu_domain *domain);
+
+/* Cleans up all resources used by the domain pool. */
+void gxp_domain_pool_destroy(struct gxp_domain_pool *pool);
+
+#endif /* __GXP_DOMAIN_POOL_H__ */
diff --git a/gxp-firmware-data.c b/gxp-firmware-data.c
index 0cfc7b4..ea00dd9 100644
--- a/gxp-firmware-data.c
+++ b/gxp-firmware-data.c
@@ -70,7 +70,7 @@ struct gxp_fw_data_manager {
 	/* Doorbells allocator and reserved doorbell IDs */
 	struct range_alloc *doorbell_allocator;
 	int cores_wakeup_doorbell;
-	int semaphore_doorbells[NUM_CORES];
+	int semaphore_doorbells[GXP_NUM_CORES];
 
 	/* Sync barriers allocator and reserved sync barrier IDs */
 	struct range_alloc *sync_barrier_allocator;
@@ -110,8 +110,8 @@ struct app_metadata {
 	struct fw_memory sync_barriers_mem;
 	struct fw_memory semaphores_mem;
 	struct fw_memory cores_mem;
-	struct fw_memory core_cmd_queues_mem[NUM_CORES];
-	struct fw_memory core_rsp_queues_mem[NUM_CORES];
+	struct fw_memory core_cmd_queues_mem[GXP_NUM_CORES];
+	struct fw_memory core_rsp_queues_mem[GXP_NUM_CORES];
 	struct fw_memory app_mem;
 };
 
@@ -316,7 +316,7 @@ static struct fw_memory init_app_semaphores(struct app_metadata *app)
 	sm_region->protection_barrier = app->mgr->semaphores_regions_barrier;
 
 	core = 0;
-	for (i = 0; i < NUM_CORES; i++) {
+	for (i = 0; i < GXP_NUM_CORES; i++) {
 		if (app->core_list & BIT(i))
 			sm_region->wakeup_doorbells[core++] =
 				app->mgr->semaphore_doorbells[i];
@@ -498,7 +498,7 @@ int gxp_fw_data_init(struct gxp_dev *gxp)
 		goto err;
 
 	/* Semaphores operation doorbells */
-	for (i = 0; i < NUM_CORES; i++) {
+	for (i = 0; i < GXP_NUM_CORES; i++) {
 		range_alloc_get_any(mgr->doorbell_allocator,
 				    &mgr->semaphore_doorbells[i]);
 	}
@@ -538,7 +538,6 @@ int gxp_fw_data_init(struct gxp_dev *gxp)
 		goto err;
 
 	/* Semaphore regions for all apps */
-	// TODO: make this per-app to improve performance?
 	res = range_alloc_get_any(mgr->sync_barrier_allocator,
 				  &mgr->semaphores_regions_barrier);
 	if (res)
@@ -618,7 +617,7 @@ void *gxp_fw_data_create_app(struct gxp_dev *gxp, uint core_list)
 
 	/* Application region. */
 	app->app_mem = init_application(app);
-	for (i = 0; i < NUM_CORES; i++) {
+	for (i = 0; i < GXP_NUM_CORES; i++) {
 		if (core_list & BIT(i)) {
 			mgr->system_desc->app_descriptor_dev_addr[i] =
 				app->app_mem.device_addr;
@@ -662,6 +661,9 @@ void gxp_fw_data_destroy(struct gxp_dev *gxp)
 {
 	struct gxp_fw_data_manager *mgr = gxp->data_mgr;
 
+	if (!mgr)
+		return;
+
 	mem_alloc_free(mgr->allocator, &mgr->telemetry_mem);
 	mem_alloc_free(mgr->allocator, &mgr->wdog_mem);
 	mem_alloc_free(mgr->allocator, &mgr->sys_desc_mem);
@@ -700,12 +702,12 @@ int gxp_fw_data_set_telemetry_descriptors(struct gxp_dev *gxp, u8 type,
 		return -EINVAL;
 
 	/* Validate that the provided IOVAs are addressable (i.e. 32-bit) */
-	for (core = 0; core < NUM_CORES; core++) {
+	for (core = 0; core < GXP_NUM_CORES; core++) {
 		if (buffer_addrs[core] > U32_MAX)
 			return -EINVAL;
 	}
 
-	for (core = 0; core < NUM_CORES; core++) {
+	for (core = 0; core < GXP_NUM_CORES; core++) {
 		core_descriptors[core].host_status = host_status;
 		core_descriptors[core].buffer_addr = (u32)buffer_addrs[core];
 		core_descriptors[core].buffer_size = per_buffer_size;
diff --git a/gxp-firmware.c b/gxp-firmware.c
index 6e079fb..ba3a784 100644
--- a/gxp-firmware.c
+++ b/gxp-firmware.c
@@ -9,7 +9,6 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/elf.h>
-#include <linux/firmware.h>
 #include <linux/gsa/gsa_image_auth.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
@@ -29,21 +28,54 @@
 #include "gxp-telemetry.h"
 #include "gxp-vd.h"
 
-/* TODO (b/176984045): Clean up gxp-firmware.c */
-
 /* Files need to be copied to /lib/firmware */
-#define Q7_ELF_FILE0	"gxp_fw_core0"
-#define Q7_ELF_FILE1	"gxp_fw_core1"
-#define Q7_ELF_FILE2	"gxp_fw_core2"
-#define Q7_ELF_FILE3	"gxp_fw_core3"
+#define DSP_FIRMWARE_DEFAULT_PREFIX	"gxp_fw_core"
 
 #define FW_HEADER_SIZE		(0x1000)
 #define FW_IMAGE_TYPE_OFFSET	(0x400)
 
-static const struct firmware *fw[GXP_NUM_CORES];
+static int
+request_dsp_firmware(struct gxp_dev *gxp, char *name_prefix,
+		     const struct firmware *out_firmwares[GXP_NUM_CORES])
+{
+	char *name_buf;
+	/* 1 for NULL-terminator and up to 4 for core number */
+	size_t name_len = strlen(name_prefix) + 5;
+	int core;
+	int ret = 0;
 
-static char *fw_elf[] = {Q7_ELF_FILE0, Q7_ELF_FILE1, Q7_ELF_FILE2,
-			 Q7_ELF_FILE3};
+	name_buf = kzalloc(name_len, GFP_KERNEL);
+	if (!name_buf)
+		return -ENOMEM;
+
+	for (core = 0; core < GXP_NUM_CORES; core++) {
+		ret = snprintf(name_buf, name_len, "%s%d", name_prefix, core);
+		if (ret <= 0 || ret >= name_len) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		dev_notice(gxp->dev, "Requesting dsp core %d firmware file: %s\n",
+			   core, name_buf);
+		ret = request_firmware(&out_firmwares[core], name_buf, NULL);
+		if (ret < 0) {
+			dev_err(gxp->dev,
+				"Requesting dsp core %d firmware failed (ret=%d)\n",
+				core, ret);
+			goto err;
+		}
+		dev_dbg(gxp->dev, "dsp core %d firmware file obtained\n", core);
+	}
+
+	kfree(name_buf);
+	return ret;
+
+err:
+	for (core -= 1; core >= 0; core--)
+		release_firmware(out_firmwares[core]);
+	kfree(name_buf);
+	return ret;
+}
 
 static int elf_load_segments(struct gxp_dev *gxp, const u8 *elf_data,
 			     size_t size,
@@ -240,44 +272,9 @@ static int gxp_firmware_load(struct gxp_dev *gxp, uint core)
 	void __iomem *core_scratchpad_base;
 	int ret;
 
-	dev_notice(gxp->dev, "Loading Q7 ELF file %s\n", fw_elf[core]);
-	ret = request_firmware(&fw[core], fw_elf[core], NULL);
-	if (ret < 0) {
-		dev_err(gxp->dev, "Loading ELF failed (ret=%d)\n", ret);
-		return ret;
-	}
-	dev_notice(gxp->dev, "Q7 ELF file loaded\n");
-
-	/*
-	 * Currently, the Q7 FW needs to be statically linked to a base
-	 * address where it would be loaded in memory. This requires the
-	 * address (where the FW is to be loaded in DRAM) to be
-	 * pre-defined, and hence not allocate-able dynamically (using
-	 * the kernel's memory management system). Therefore, we are
-	 * memremapping a static address and loading the FW there, while
-	 * also having compiled the FW with this as the base address
-	 * (used by the linker).
-	 *
-	 * FIXME: This should be fixed by compiling the FW in a
-	 * re-locateable way so that it is independent of the load
-	 * address, and then using the standard kernel APIs
-	 * (kmalloc/dma_alloc_coherrent) to allocate memory and load the
-	 * FW.
-	 */
-	/*
-	 * TODO (b/193069216) allocate a dynamic buffer and let
-	 * `gxp_dma_map_resources()` map it to the expected paddr
-	 */
-	gxp->fwbufs[core].vaddr = memremap(gxp->fwbufs[core].paddr,
-					   gxp->fwbufs[core].size, MEMREMAP_WC);
-	if (!(gxp->fwbufs[core].vaddr)) {
-		dev_err(gxp->dev, "FW buf memremap failed\n");
-		ret = -EINVAL;
-		goto out_firmware_unload;
-	}
-
 	/* Authenticate and load firmware to System RAM */
-	ret = gxp_firmware_load_authenticated(gxp, fw[core], &gxp->fwbufs[core]);
+	ret = gxp_firmware_load_authenticated(gxp, gxp->firmwares[core],
+					      &gxp->fwbufs[core]);
 	if (ret) {
 		dev_err(gxp->dev, "Unable to load elf file\n");
 		goto out_firmware_unload;
@@ -319,7 +316,9 @@ static int gxp_firmware_handshake(struct gxp_dev *gxp, uint core)
 	/* Raise wakeup doorbell */
 	dev_notice(gxp->dev, "Raising doorbell %d interrupt\n",
 		   CORE_WAKEUP_DOORBELL);
+#ifndef CONFIG_GXP_GEM5
 	gxp_doorbell_set_listening_core(gxp, CORE_WAKEUP_DOORBELL, core);
+#endif
 	gxp_doorbell_set(gxp, CORE_WAKEUP_DOORBELL);
 
 	/* Wait for core to come up */
@@ -376,7 +375,6 @@ static int gxp_firmware_handshake(struct gxp_dev *gxp, uint core)
 	 * affect the order of reading/writing INT_MASK0, so ignore this
 	 * handshaking in Gem5.
 	 */
-	/* TODO (b/182528386): Fix handshake for verifying TOP access */
 	ctr = 1000;
 	offset = SCRATCHPAD_MSG_OFFSET(MSG_TOP_ACCESS_OK);
 	expected_top_value = BIT(0);
@@ -404,17 +402,109 @@ static int gxp_firmware_handshake(struct gxp_dev *gxp, uint core)
 
 static void gxp_firmware_unload(struct gxp_dev *gxp, uint core)
 {
-	if (gxp->fwbufs[core].vaddr) {
-		memunmap(gxp->fwbufs[core].vaddr);
-		gxp->fwbufs[core].vaddr = NULL;
+	/* NO-OP for now. */
+}
+
+/* Helper function to parse name written to sysfs "load_dsp_firmware" node */
+static char *fw_name_from_attr_buf(const char *buf)
+{
+	size_t len;
+	char *name;
+
+	len = strlen(buf);
+	if (len == 0 || buf[len - 1] != '\n')
+		return ERR_PTR(-EINVAL);
+
+	name = kstrdup(buf, GFP_KERNEL);
+	if (!name)
+		return ERR_PTR(-ENOMEM);
+
+	name[len - 1] = '\0';
+	return name;
+}
+
+/* sysfs node for loading custom firmware */
+
+static ssize_t load_dsp_firmware_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct gxp_dev *gxp = dev_get_drvdata(dev);
+	ssize_t ret;
+
+	mutex_lock(&gxp->dsp_firmware_lock);
+
+	ret = scnprintf(buf, PAGE_SIZE, "%s\n",
+			gxp->firmware_name ? gxp->firmware_name :
+					     DSP_FIRMWARE_DEFAULT_PREFIX);
+
+	mutex_unlock(&gxp->dsp_firmware_lock);
+
+	return ret;
+}
+
+static ssize_t load_dsp_firmware_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct gxp_dev *gxp = dev_get_drvdata(dev);
+	const struct firmware *firmwares[GXP_NUM_CORES];
+	char *name_buf = NULL;
+	int ret;
+	int core;
+
+	name_buf = fw_name_from_attr_buf(buf);
+	if (IS_ERR(name_buf)) {
+		dev_err(gxp->dev, "Invalid firmware prefix requested: %s\n",
+			buf);
+		return PTR_ERR(name_buf);
+	}
+
+	mutex_lock(&gxp->dsp_firmware_lock);
+
+	dev_notice(gxp->dev, "Requesting firmware be reloaded: %s\n", name_buf);
+
+	ret = request_dsp_firmware(gxp, name_buf, firmwares);
+	if (ret) {
+		mutex_unlock(&gxp->dsp_firmware_lock);
+		dev_err(gxp->dev,
+			"Failed to request firmwares with names \"%sX\" (ret=%d)\n",
+			name_buf, ret);
+		kfree(name_buf);
+		return ret;
 	}
 
-	if (fw[core]) {
-		release_firmware(fw[core]);
-		fw[core] = NULL;
+	/*
+	 * Lock the VD semaphore to make sure no new firmware is started while
+	 * changing out the images in `gxp->firmwares`
+	 */
+	down_read(&gxp->vd_semaphore);
+
+	for (core = 0; core < GXP_NUM_CORES; core++) {
+		if (gxp->firmwares[core])
+			release_firmware(gxp->firmwares[core]);
+		gxp->firmwares[core] = firmwares[core];
 	}
+
+	kfree(gxp->firmware_name);
+	gxp->firmware_name = name_buf;
+
+	up_read(&gxp->vd_semaphore);
+	mutex_unlock(&gxp->dsp_firmware_lock);
+
+	return count;
 }
 
+static DEVICE_ATTR_RW(load_dsp_firmware);
+
+static struct attribute *dev_attrs[] = {
+	&dev_attr_load_dsp_firmware.attr,
+	NULL,
+};
+
+static const struct attribute_group gxp_firmware_attr_group = {
+	.attrs = dev_attrs,
+};
+
 int gxp_fw_init(struct gxp_dev *gxp)
 {
 	u32 ver, proc_id;
@@ -468,18 +558,76 @@ int gxp_fw_init(struct gxp_dev *gxp)
 	 * initialized.
 	 */
 
+	for (core = 0; core < GXP_NUM_CORES; core++) {
+		/*
+		 * Currently, the Q7 FW needs to be statically linked to a base
+		 * address where it would be loaded in memory. This requires the
+		 * address (where the FW is to be loaded in DRAM) to be
+		 * pre-defined, and hence not allocate-able dynamically (using
+		 * the kernel's memory management system). Therefore, we are
+		 * memremapping a static address and loading the FW there, while
+		 * also having compiled the FW with this as the base address
+		 * (used by the linker).
+		 */
+		gxp->fwbufs[core].vaddr =
+			memremap(gxp->fwbufs[core].paddr,
+				 gxp->fwbufs[core].size, MEMREMAP_WC);
+		if (!(gxp->fwbufs[core].vaddr)) {
+			dev_err(gxp->dev, "FW buf %d memremap failed\n", core);
+			ret = -EINVAL;
+			goto out_fw_destroy;
+		}
+	}
+
+	ret = device_add_group(gxp->dev, &gxp_firmware_attr_group);
+	if (ret)
+		goto out_fw_destroy;
+
 	gxp->firmware_running = 0;
 	return 0;
+
+out_fw_destroy:
+	gxp_fw_destroy(gxp);
+	return ret;
 }
 
 void gxp_fw_destroy(struct gxp_dev *gxp)
 {
-	/* NO-OP for now. */
-	/*
-	 * TODO(b/214124218): Revisit if the firmware subsystem still needs a
-	 * "destroy" method now that power management is decoupled from the
-	 * firmware subsystem's lifecycle.
-	 */
+	uint core;
+
+	device_remove_group(gxp->dev, &gxp_firmware_attr_group);
+
+	for (core = 0; core < GXP_NUM_CORES; core++) {
+		if (gxp->fwbufs[core].vaddr) {
+			memunmap(gxp->fwbufs[core].vaddr);
+			gxp->fwbufs[core].vaddr = NULL;
+		}
+
+		if (gxp->firmwares[core]) {
+			release_firmware(gxp->firmwares[core]);
+			gxp->firmwares[core] = NULL;
+		}
+	}
+
+	kfree(gxp->firmware_name);
+}
+
+int gxp_firmware_request_if_needed(struct gxp_dev *gxp)
+{
+	int ret = 0;
+
+	mutex_lock(&gxp->dsp_firmware_lock);
+
+	if (!gxp->is_firmware_requested) {
+		ret = request_dsp_firmware(gxp, DSP_FIRMWARE_DEFAULT_PREFIX,
+					   gxp->firmwares);
+		if (!ret)
+			gxp->is_firmware_requested = true;
+	}
+
+	mutex_unlock(&gxp->dsp_firmware_lock);
+
+	return ret;
 }
 
 int gxp_firmware_run(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
@@ -504,7 +652,14 @@ int gxp_firmware_run(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
 	gxp_write_32_core(gxp, core, GXP_REG_BOOT_MODE,
 			  GXP_BOOT_MODE_REQUEST_COLD_BOOT);
 
+#ifdef CONFIG_GXP_GEM5
+	/*
+	 * GEM5 starts firmware after LPM is programmed, so we need to call
+	 * gxp_doorbell_set_listening_core here to set GXP_REG_COMMON_INT_MASK_0
+	 * first to enable the firmware hadnshaking.
+	 */
 	gxp_doorbell_set_listening_core(gxp, CORE_WAKEUP_DOORBELL, core);
+#endif
 	ret = gxp_firmware_setup_hw_after_block_off(gxp, core,
 						    /*verbose=*/true);
 	if (ret) {
diff --git a/gxp-firmware.h b/gxp-firmware.h
index 775e83f..f945d3e 100644
--- a/gxp-firmware.h
+++ b/gxp-firmware.h
@@ -34,6 +34,7 @@ enum aurora_msg {
 	MSG_SCRATCHPAD_MAX,
 };
 
+/* The caller must have locked gxp->vd_semaphore for reading. */
 static inline bool gxp_is_fw_running(struct gxp_dev *gxp, uint core)
 {
 	return (gxp->firmware_running & BIT(core)) != 0;
@@ -45,11 +46,23 @@ static inline bool gxp_is_fw_running(struct gxp_dev *gxp, uint core)
  * The function needs to be called once after a block power up event.
  */
 int gxp_fw_init(struct gxp_dev *gxp);
+
 /*
  * Tears down the firmware loading/unloading subsystem in preparation for a
  * block-level shutdown event. To be called once before a block shutdown.
  */
 void gxp_fw_destroy(struct gxp_dev *gxp);
+
+/*
+ * Check if the DSP firmware files have been requested yet, and if not, request
+ * them.
+ *
+ * Returns 0 if the files have already been requested or were successfully
+ * requested by this call; Returns an errno if this call attempted to request
+ * the files and it failed.
+ */
+int gxp_firmware_request_if_needed(struct gxp_dev *gxp);
+
 /*
  * Loads the firmware for the specified core in system memory and powers up the
  * core to start FW execution.
diff --git a/gxp-host-device-structs.h b/gxp-host-device-structs.h
index 8182138..57dc673 100644
--- a/gxp-host-device-structs.h
+++ b/gxp-host-device-structs.h
@@ -14,7 +14,7 @@
 #ifndef __GXP_HOST_DEVICE_STRUCTURES_H__
 #define __GXP_HOST_DEVICE_STRUCTURES_H__
 
-#define NUM_CORES 4
+#define MAX_NUM_CORES 4
 #define NUM_SYSTEM_SEMAPHORES 64
 
 /* Bit masks for the status fields in the telemetry structures. */
@@ -156,7 +156,7 @@ struct gxp_telemetry_descriptor {
 		uint32_t buffer_size;
 		/* The watermark interrupt threshold (in bytes) */
 		uint32_t watermark_level;
-	} per_core_loggers[NUM_CORES], per_core_tracers[NUM_CORES];
+	} per_core_loggers[MAX_NUM_CORES], per_core_tracers[MAX_NUM_CORES];
 };
 
 /*
@@ -173,12 +173,12 @@ struct gxp_semaphores_descriptor {
 	 * bit map describing of all the semaphores in the list below that have
 	 * been unlocked but haven't been processed yet by the receiptient core.
 	 */
-	uint64_t woken_pending_semaphores[NUM_CORES];
+	uint64_t woken_pending_semaphores[MAX_NUM_CORES];
 	/*
 	 * A mapping of which doorbells to use as a wakeup signal source per
 	 * core.
 	 */
-	uint32_t wakeup_doorbells[NUM_CORES];
+	uint32_t wakeup_doorbells[MAX_NUM_CORES];
 	/* The number of items described in this region. */
 	uint32_t num_items;
 	/* The list of semaphores available for usage. */
@@ -260,7 +260,7 @@ struct gxp_cores_descriptor {
  */
 struct gxp_system_descriptor {
 	/* A device address for the application data descriptor. */
-	uint32_t app_descriptor_dev_addr[NUM_CORES];
+	uint32_t app_descriptor_dev_addr[MAX_NUM_CORES];
 	/* A device address for the watchdog descriptor. */
 	uint32_t watchdog_dev_addr;
 	/* A device address for the telemetry descriptor */
diff --git a/gxp-internal.h b/gxp-internal.h
index 01df49e..e456668 100644
--- a/gxp-internal.h
+++ b/gxp-internal.h
@@ -9,6 +9,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/delay.h>
+#include <linux/firmware.h>
 #include <linux/io.h>
 #include <linux/iommu.h>
 #include <linux/list.h>
@@ -46,7 +47,7 @@ struct gxp_tpu_dev {
 struct gxp_client;
 struct gxp_mailbox_manager;
 struct gxp_debug_dump_manager;
-struct gxp_mapping_root;
+struct gxp_domain_pool;
 struct gxp_dma_manager;
 struct gxp_fw_data_manager;
 struct gxp_power_manager;
@@ -67,8 +68,18 @@ struct gxp_dev {
 	struct gxp_mailbox_manager *mailbox_mgr;
 	struct gxp_power_manager *power_mgr;
 	struct gxp_debug_dump_manager *debug_dump_mgr;
-	struct gxp_mapping_root *mappings;	/* tree of user mappings */
-	u32 firmware_running;		 /* firmware status bitmap */
+	const struct firmware *firmwares[GXP_NUM_CORES];
+	char *firmware_name;
+	bool is_firmware_requested;
+	/* Protects `firmwares` and `firmware_name` */
+	struct mutex dsp_firmware_lock;
+	/* Firmware status bitmap. Accessors must hold `vd_semaphore` */
+	u32 firmware_running;
+	/*
+	 * Lock to ensure only one thread at a time is ever calling
+	 * `pin_user_pages_fast()` during mapping, otherwise it will fail.
+	 */
+	struct mutex pin_user_pages_lock;
 	/*
 	 * Reader/writer lock protecting usage of virtual cores assigned to
 	 * physical cores.
@@ -76,12 +87,8 @@ struct gxp_dev {
 	 * running or stopping one on a physical core.
 	 * A reader is any function making use of or interacting with a virtual
 	 * core without starting or stopping it on a physical core.
-	 */
-	/*
-	 * TODO(b/216862052) vd_semaphore also currently protects client state.
-	 *                   A separate per-client lock should be introduced
-	 *                   instead, as part of support for creating VDs
-	 *                   without running them on physical cores.
+	 * The fields `core_to_vd[]` and `firmware_running` are also protected
+	 * by this lock.
 	 */
 	struct rw_semaphore vd_semaphore;
 	struct gxp_virtual_device *core_to_vd[GXP_NUM_CORES];
@@ -100,6 +107,7 @@ struct gxp_dev {
 	 */
 	struct device *gsa_dev;
 	u32 memory_per_core;
+	struct gxp_domain_pool *domain_pool;
 	struct list_head client_list;
 	struct mutex client_list_lock;
 };
diff --git a/gxp-lpm.c b/gxp-lpm.c
index 348590f..33a0a58 100644
--- a/gxp-lpm.c
+++ b/gxp-lpm.c
@@ -5,14 +5,11 @@
  * Copyright (C) 2021 Google LLC
  */
 
+#include <linux/acpm_dvfs.h>
 #include <linux/bitops.h>
 #include <linux/io.h>
-#include <linux/types.h>
 #include <linux/pm_runtime.h>
-
-#ifdef CONFIG_GXP_CLOUDRIPPER
-#include <linux/acpm_dvfs.h>
-#endif
+#include <linux/types.h>
 
 #include "gxp-bpm.h"
 #include "gxp-doorbell.h"
@@ -111,16 +108,19 @@ static int set_state_internal(struct gxp_dev *gxp, uint psm, uint target_state)
 	return 0;
 }
 
-int gxp_lpm_set_state(struct gxp_dev *gxp, uint psm, uint target_state)
+int gxp_lpm_set_state(struct gxp_dev *gxp, uint psm, uint target_state,
+		      bool verbose)
 {
 	uint curr_state = gxp_lpm_get_state(gxp, psm);
 
 	if (curr_state == target_state)
 		return 0;
 
-	dev_warn(gxp->dev, "Forcing a transition to PS%u on core%u, status: %x\n",
-		 target_state, psm,
-		 lpm_read_32_psm(gxp, psm, PSM_STATUS_OFFSET));
+	if (verbose)
+		dev_warn(gxp->dev,
+			 "Forcing a transition to PS%u on core%u, status: %x\n",
+			 target_state, psm,
+			 lpm_read_32_psm(gxp, psm, PSM_STATUS_OFFSET));
 
 	gxp_lpm_enable_state(gxp, psm, target_state);
 
@@ -132,9 +132,12 @@ int gxp_lpm_set_state(struct gxp_dev *gxp, uint psm, uint target_state)
 
 	set_state_internal(gxp, psm, target_state);
 
-	dev_warn(gxp->dev, "Finished forced transition on core %u.  target: PS%u, actual: PS%u, status: %x\n",
-		 psm, target_state, gxp_lpm_get_state(gxp, psm),
-		 lpm_read_32_psm(gxp, psm, PSM_STATUS_OFFSET));
+	if (verbose)
+		dev_warn(
+			gxp->dev,
+			"Finished forced transition on core %u.  target: PS%u, actual: PS%u, status: %x\n",
+			psm, target_state, gxp_lpm_get_state(gxp, psm),
+			lpm_read_32_psm(gxp, psm, PSM_STATUS_OFFSET));
 
 	/* Set HW sequencing mode */
 	lpm_write_32_psm(gxp, psm, PSM_CFG_OFFSET, LPM_HW_MODE);
@@ -150,7 +153,8 @@ static int psm_enable(struct gxp_dev *gxp, uint psm)
 	if (gxp_lpm_is_initialized(gxp, psm)) {
 		if (psm != LPM_TOP_PSM) {
 			/* Ensure core is in PS3 */
-			return gxp_lpm_set_state(gxp, psm, LPM_PG_STATE);
+			return gxp_lpm_set_state(gxp, psm, LPM_PG_STATE,
+						 /*verbose=*/true);
 		}
 
 		return 0;
@@ -178,12 +182,8 @@ static int psm_enable(struct gxp_dev *gxp, uint psm)
 void gxp_lpm_init(struct gxp_dev *gxp)
 {
 	/* Enable Top PSM */
-	dev_notice(gxp->dev, "Enabling Top PSM...\n");
-	if (psm_enable(gxp, LPM_TOP_PSM)) {
-		dev_notice(gxp->dev, "Timed out!\n");
-		return;
-	}
-	dev_notice(gxp->dev, "Enabled\n");
+	if (psm_enable(gxp, LPM_TOP_PSM))
+		dev_err(gxp->dev, "Timed out when enabling Top PSM!\n");
 }
 
 void gxp_lpm_destroy(struct gxp_dev *gxp)
@@ -202,12 +202,11 @@ int gxp_lpm_up(struct gxp_dev *gxp, uint core)
 	gxp_doorbell_clear(gxp, CORE_WAKEUP_DOORBELL);
 
 	/* Enable core PSM */
-	dev_notice(gxp->dev, "Enabling Core%u PSM...\n", core);
 	if (psm_enable(gxp, core)) {
-		dev_notice(gxp->dev, "Timed out!\n");
+		dev_err(gxp->dev, "Timed out when enabling Core%u PSM!\n",
+			core);
 		return -ETIMEDOUT;
 	}
-	dev_notice(gxp->dev, "Enabled\n");
 
 	/* Enable PS1 (Clk Gated) */
 	gxp_lpm_enable_state(gxp, core, LPM_CG_STATE);
@@ -237,7 +236,7 @@ void gxp_lpm_down(struct gxp_dev *gxp, uint core)
 	gxp_doorbell_clear(gxp, CORE_WAKEUP_DOORBELL);
 
 	/* Ensure core is in PS3 */
-	gxp_lpm_set_state(gxp, core, LPM_PG_STATE);
+	gxp_lpm_set_state(gxp, core, LPM_PG_STATE, /*verbose=*/true);
 }
 
 bool gxp_lpm_wait_state_ne(struct gxp_dev *gxp, uint psm, uint state)
diff --git a/gxp-lpm.h b/gxp-lpm.h
index dc87817..20f48d3 100644
--- a/gxp-lpm.h
+++ b/gxp-lpm.h
@@ -97,7 +97,8 @@ bool gxp_lpm_wait_state_eq(struct gxp_dev *gxp, uint psm, uint state);
 /*
  * Force a state transition on the specified PSM.
  */
-int gxp_lpm_set_state(struct gxp_dev *gxp, uint psm, uint target_state);
+int gxp_lpm_set_state(struct gxp_dev *gxp, uint psm, uint target_state,
+		      bool verbose);
 
 /*
  * Get current LPM state of the specified PSM.
diff --git a/gxp-mailbox.c b/gxp-mailbox.c
index b9c9c7e..dcb0b2a 100644
--- a/gxp-mailbox.c
+++ b/gxp-mailbox.c
@@ -12,6 +12,7 @@
 #include <linux/kthread.h>
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
+#include <uapi/linux/sched/types.h>
 
 #include "gxp-dma.h"
 #include "gxp-internal.h"
@@ -20,10 +21,8 @@
 #include "gxp-pm.h"
 
 /* Timeout of 8s by default to account for slower emulation platforms */
-static int mbx_timeout = 8000;
-module_param(mbx_timeout, int, 0660);
-
-#define MAILBOX_TIMEOUT (mbx_timeout * GXP_TIME_DELAY_FACTOR)
+int gxp_mbx_timeout = 8000;
+module_param_named(mbx_timeout, gxp_mbx_timeout, int, 0660);
 
 /* Utilities of circular queue operations */
 
@@ -95,6 +94,8 @@ static void gxp_mailbox_set_resp_queue_head(struct gxp_mailbox *mailbox,
  * -EBUSY is returned and all fields remain unchanged. The caller should
  * handle this case and implement a mechanism to wait until the consumer
  * consumes commands.
+ *
+ * Caller must hold cmd_queue_lock.
  */
 static int gxp_mailbox_inc_cmd_queue_tail(struct gxp_mailbox *mailbox, u32 inc)
 {
@@ -102,6 +103,8 @@ static int gxp_mailbox_inc_cmd_queue_tail(struct gxp_mailbox *mailbox, u32 inc)
 	u32 remain_size;
 	u32 new_tail;
 
+	lockdep_assert_held(&mailbox->cmd_queue_lock);
+
 	if (inc > mailbox->cmd_queue_size)
 		return -EINVAL;
 
@@ -132,6 +135,8 @@ static int gxp_mailbox_inc_cmd_queue_tail(struct gxp_mailbox *mailbox, u32 inc)
  * Returns 0 on success.
  * -EINVAL is returned if the queue head will exceed tail of queue, and no
  * fields or CSR is updated in this case.
+ *
+ * Caller must hold resp_queue_lock.
  */
 static int gxp_mailbox_inc_resp_queue_head(struct gxp_mailbox *mailbox, u32 inc)
 {
@@ -139,6 +144,8 @@ static int gxp_mailbox_inc_resp_queue_head(struct gxp_mailbox *mailbox, u32 inc)
 	u32 size;
 	u32 new_head;
 
+	lockdep_assert_held(&mailbox->resp_queue_lock);
+
 	if (inc > mailbox->resp_queue_size)
 		return -EINVAL;
 
@@ -224,18 +231,13 @@ static void gxp_mailbox_handle_response(struct gxp_mailbox *mailbox,
 						     resp);
 
 				cancel_delayed_work(&async_resp->timeout_work);
-				if (async_resp->memory_power_state !=
-				    AUR_MEM_UNDEFINED)
-					gxp_pm_update_requested_memory_power_state(
-						async_resp->mailbox->gxp,
-						async_resp->memory_power_state,
-						AUR_MEM_UNDEFINED);
-				if (async_resp->gxp_power_state != AUR_OFF)
-					gxp_pm_update_requested_power_state(
-						async_resp->mailbox->gxp,
-						async_resp->gxp_power_state,
-						async_resp->requested_aggressor,
-						AUR_OFF, true);
+				gxp_pm_update_requested_power_states(
+					async_resp->mailbox->gxp,
+					async_resp->gxp_power_state,
+					async_resp->requested_aggressor,
+					AUR_OFF, true,
+					async_resp->memory_power_state,
+					AUR_MEM_UNDEFINED);
 
 				spin_lock_irqsave(async_resp->dest_queue_lock,
 						  flags);
@@ -358,7 +360,7 @@ gxp_mailbox_fetch_responses(struct gxp_mailbox *mailbox, u32 *total_ptr)
  * or race-condition bugs, gxp_mailbox_release() must be called before free the
  * mailbox.
  */
-static void gxp_mailbox_consume_responses_work(struct work_struct *work)
+static void gxp_mailbox_consume_responses_work(struct kthread_work *work)
 {
 	struct gxp_mailbox *mailbox =
 		container_of(work, struct gxp_mailbox, response_work);
@@ -366,10 +368,6 @@ static void gxp_mailbox_consume_responses_work(struct work_struct *work)
 	u32 i;
 	u32 count = 0;
 
-	/*
-	 * TODO(b/177692488) Review if changes in edgetpu's consume response
-	 * logic should be ported to the GXP driver as well.
-	 */
 	/* fetch responses and bump RESP_QUEUE_HEAD */
 	responses = gxp_mailbox_fetch_responses(mailbox, &count);
 	if (IS_ERR(responses)) {
@@ -395,11 +393,33 @@ static void gxp_mailbox_consume_responses_work(struct work_struct *work)
  */
 static inline void gxp_mailbox_handle_irq(struct gxp_mailbox *mailbox)
 {
-	queue_work(mailbox->response_wq, &mailbox->response_work);
+	kthread_queue_work(&mailbox->response_worker, &mailbox->response_work);
+}
+
+/* Priority level for realtime worker threads */
+#define GXP_RT_THREAD_PRIORITY 2
+static struct task_struct *
+create_response_rt_thread(struct device *dev, void *data, int core_id)
+{
+	static const struct sched_param param = {
+		.sched_priority = GXP_RT_THREAD_PRIORITY,
+	};
+	struct task_struct *task = kthread_create(kthread_worker_fn, data,
+						  "gxp_response_%d", core_id);
+
+	if (!IS_ERR(task)) {
+		wake_up_process(task);
+		if (sched_setscheduler(task, SCHED_FIFO, &param))
+			dev_warn(dev, "response task %d not set to RT prio",
+				 core_id);
+		else
+			dev_dbg(dev, "response task %d set to RT prio: %i",
+				core_id, param.sched_priority);
+	}
+
+	return task;
 }
 
-#define _RESPONSE_WORKQUEUE_NAME(_x_) "gxp_responses_" #_x_
-#define RESPONSE_WORKQUEUE_NAME(_x_) _RESPONSE_WORKQUEUE_NAME(_x_)
 static struct gxp_mailbox *create_mailbox(struct gxp_mailbox_manager *mgr,
 					  struct gxp_virtual_device *vd,
 					  uint virt_core, u8 core_id)
@@ -455,17 +475,18 @@ static struct gxp_mailbox *create_mailbox(struct gxp_mailbox_manager *mgr,
 	mailbox->descriptor->cmd_queue_size = mailbox->cmd_queue_size;
 	mailbox->descriptor->resp_queue_size = mailbox->resp_queue_size;
 
-	mailbox->response_wq =
-		create_singlethread_workqueue(RESPONSE_WORKQUEUE_NAME(i));
-	if (!mailbox->response_wq)
-		goto err_workqueue;
+	kthread_init_worker(&mailbox->response_worker);
+	mailbox->response_thread = create_response_rt_thread(
+		mailbox->gxp->dev, &mailbox->response_worker, core_id);
+	if (IS_ERR(mailbox->response_thread))
+		goto err_thread;
 
 	/* Initialize driver before interacting with its registers */
 	gxp_mailbox_driver_init(mailbox);
 
 	return mailbox;
 
-err_workqueue:
+err_thread:
 	gxp_dma_free_coherent(mailbox->gxp, vd, BIT(virt_core),
 			      sizeof(struct gxp_mailbox_descriptor),
 			      mailbox->descriptor,
@@ -488,7 +509,6 @@ err_mailbox:
 
 static void enable_mailbox(struct gxp_mailbox *mailbox)
 {
-	gxp_mailbox_driver_init(mailbox);
 	gxp_mailbox_write_descriptor(mailbox, mailbox->descriptor_device_addr);
 	gxp_mailbox_write_cmd_queue_head(mailbox, 0);
 	gxp_mailbox_write_cmd_queue_tail(mailbox, 0);
@@ -500,7 +520,7 @@ static void enable_mailbox(struct gxp_mailbox *mailbox)
 	init_waitqueue_head(&mailbox->wait_list_waitq);
 	INIT_LIST_HEAD(&mailbox->wait_list);
 	mutex_init(&mailbox->wait_list_lock);
-	INIT_WORK(&mailbox->response_work, gxp_mailbox_consume_responses_work);
+	kthread_init_work(&mailbox->response_work, gxp_mailbox_consume_responses_work);
 
 	/* Only enable interrupts once everything has been setup */
 	gxp_mailbox_driver_enable_interrupts(mailbox);
@@ -550,7 +570,7 @@ void gxp_mailbox_release(struct gxp_mailbox_manager *mgr,
 	gxp_mailbox_driver_disable_interrupts(mailbox);
 
 	/* Halt and flush any traffic */
-	cancel_work_sync(&mailbox->response_work);
+	kthread_cancel_work_sync(&mailbox->response_work);
 	for (i = 0; i < GXP_MAILBOX_INT_BIT_COUNT; i++) {
 		if (mailbox->interrupt_handlers[i])
 			cancel_work_sync(mailbox->interrupt_handlers[i]);
@@ -629,7 +649,8 @@ void gxp_mailbox_release(struct gxp_mailbox_manager *mgr,
 			      sizeof(struct gxp_mailbox_descriptor),
 			      mailbox->descriptor,
 			      mailbox->descriptor_device_addr);
-	destroy_workqueue(mailbox->response_wq);
+	kthread_flush_worker(&mailbox->response_worker);
+	kthread_stop(mailbox->response_thread);
 	kfree(mailbox);
 }
 
@@ -805,16 +826,10 @@ static void async_cmd_timeout_work(struct work_struct *work)
 		list_add_tail(&async_resp->list_entry, async_resp->dest_queue);
 		spin_unlock_irqrestore(async_resp->dest_queue_lock, flags);
 
-		if (async_resp->memory_power_state != AUR_MEM_UNDEFINED)
-			gxp_pm_update_requested_memory_power_state(
-				async_resp->mailbox->gxp,
-				async_resp->memory_power_state,
-				AUR_MEM_UNDEFINED);
-		if (async_resp->gxp_power_state != AUR_OFF)
-			gxp_pm_update_requested_power_state(
-				async_resp->mailbox->gxp,
-				async_resp->gxp_power_state,
-				async_resp->requested_aggressor, AUR_OFF, true);
+		gxp_pm_update_requested_power_states(
+			async_resp->mailbox->gxp, async_resp->gxp_power_state,
+			async_resp->requested_aggressor, AUR_OFF, true,
+			async_resp->memory_power_state, AUR_MEM_UNDEFINED);
 
 		if (async_resp->eventfd) {
 			gxp_eventfd_signal(async_resp->eventfd);
@@ -859,13 +874,9 @@ int gxp_mailbox_execute_cmd_async(struct gxp_mailbox *mailbox,
 	schedule_delayed_work(&async_resp->timeout_work,
 			      msecs_to_jiffies(MAILBOX_TIMEOUT));
 
-	if (gxp_power_state != AUR_OFF)
-		gxp_pm_update_requested_power_state(mailbox->gxp, AUR_OFF, true,
-						    gxp_power_state,
-						    requested_aggressor);
-	if (memory_power_state != AUR_MEM_UNDEFINED)
-		gxp_pm_update_requested_memory_power_state(
-			mailbox->gxp, AUR_MEM_UNDEFINED, memory_power_state);
+	gxp_pm_update_requested_power_states(
+		mailbox->gxp, AUR_OFF, true, gxp_power_state,
+		requested_aggressor, AUR_MEM_UNDEFINED, memory_power_state);
 	ret = gxp_mailbox_enqueue_cmd(mailbox, cmd, &async_resp->resp,
 				      /* resp_is_async = */ true);
 	if (ret)
@@ -874,14 +885,10 @@ int gxp_mailbox_execute_cmd_async(struct gxp_mailbox *mailbox,
 	return 0;
 
 err_free_resp:
-	if (memory_power_state != AUR_MEM_UNDEFINED)
-		gxp_pm_update_requested_memory_power_state(
-			mailbox->gxp, memory_power_state, AUR_MEM_UNDEFINED);
-	if (gxp_power_state != AUR_OFF)
-		gxp_pm_update_requested_power_state(mailbox->gxp,
-						    gxp_power_state,
-						    requested_aggressor,
-						    AUR_OFF, true);
+	gxp_pm_update_requested_power_states(mailbox->gxp, gxp_power_state,
+					     requested_aggressor, AUR_OFF, true,
+					     memory_power_state,
+					     AUR_MEM_UNDEFINED);
 	cancel_delayed_work_sync(&async_resp->timeout_work);
 	kfree(async_resp);
 	return ret;
diff --git a/gxp-mailbox.h b/gxp-mailbox.h
index 486264f..986620b 100644
--- a/gxp-mailbox.h
+++ b/gxp-mailbox.h
@@ -7,15 +7,18 @@
 #ifndef __GXP_MAILBOX_H__
 #define __GXP_MAILBOX_H__
 
+#include <linux/kthread.h>
+
 #include "gxp-client.h"
 #include "gxp-internal.h"
 
 /* Command/Response Structures */
 
 enum gxp_mailbox_command_code {
+	/* A user-space initiated dispatch message. */
 	GXP_MBOX_CODE_DISPATCH = 0,
-	GXP_MBOX_CODE_COREDUMP = 1,
-	GXP_MBOX_CODE_PINGPONG = 2,
+	/* A kernel initiated core suspend request. */
+	GXP_MBOX_CODE_SUSPEND_REQUEST = 1,
 };
 
 /* Basic Buffer descriptor struct for message payloads. */
@@ -156,8 +159,10 @@ struct gxp_mailbox {
 	struct mutex wait_list_lock; /* protects wait_list */
 	/* queue for waiting for the wait_list to be consumed */
 	wait_queue_head_t wait_list_waitq;
-	struct workqueue_struct *response_wq;
-	struct work_struct response_work;
+	/* to create our own realtime worker for handling responses */
+	struct kthread_worker response_worker;
+	struct task_struct *response_thread;
+	struct kthread_work response_work;
 };
 
 typedef void __iomem *(*get_mailbox_base_t)(struct gxp_dev *gxp, uint index);
@@ -172,6 +177,9 @@ struct gxp_mailbox_manager {
 
 /* Mailbox APIs */
 
+extern int gxp_mbx_timeout;
+#define MAILBOX_TIMEOUT (gxp_mbx_timeout * GXP_TIME_DELAY_FACTOR)
+
 struct gxp_mailbox_manager *gxp_mailbox_create_manager(struct gxp_dev *gxp,
 						       uint num_cores);
 
diff --git a/gxp-mapping.c b/gxp-mapping.c
index 8f9359e..6bdd707 100644
--- a/gxp-mapping.c
+++ b/gxp-mapping.c
@@ -8,23 +8,51 @@
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 
+#include "gxp-debug-dump.h"
 #include "gxp-dma.h"
 #include "gxp-internal.h"
 #include "gxp-mapping.h"
 #include "mm-backport.h"
 
-int gxp_mapping_init(struct gxp_dev *gxp)
+/* Destructor for a mapping created with `gxp_mapping_create()` */
+static void destroy_mapping(struct gxp_mapping *mapping)
 {
-	gxp->mappings =
-		devm_kzalloc(gxp->dev, sizeof(*gxp->mappings), GFP_KERNEL);
-	if (!gxp->mappings)
-		return -ENOMEM;
+	struct sg_page_iter sg_iter;
+	struct page *page;
 
-	gxp->mappings->rb = RB_ROOT;
-	mutex_init(&gxp->mappings->lock);
+	mutex_destroy(&mapping->vlock);
+	mutex_destroy(&mapping->sync_lock);
+
+	/*
+	 * Unmap the user pages
+	 *
+	 * Normally on unmap, the entire mapping is synced back to the CPU.
+	 * Since mappings are made at a page granularity regardless of the
+	 * underlying buffer's size, they can cover other data as well. If a
+	 * user requires a mapping be synced before unmapping, they are
+	 * responsible for calling `gxp_mapping_sync()` before hand.
+	 */
+	gxp_dma_unmap_sg(mapping->gxp, mapping->vd, mapping->virt_core_list,
+			 mapping->sgt.sgl, mapping->sgt.orig_nents,
+			 mapping->dir, DMA_ATTR_SKIP_CPU_SYNC);
 
-	return 0;
+	/* Unpin the user pages */
+	for_each_sg_page(mapping->sgt.sgl, &sg_iter, mapping->sgt.orig_nents,
+			 0) {
+		page = sg_page_iter_page(&sg_iter);
+		if (mapping->dir == DMA_FROM_DEVICE ||
+		    mapping->dir == DMA_BIDIRECTIONAL) {
+			set_page_dirty(page);
+		}
+
+		unpin_user_page(page);
+	}
+
+	/* Free the mapping book-keeping */
+	sg_free_table(&mapping->sgt);
+	kfree(mapping);
 }
 
 struct gxp_mapping *gxp_mapping_create(struct gxp_dev *gxp,
@@ -41,41 +69,69 @@ struct gxp_mapping *gxp_mapping_create(struct gxp_dev *gxp,
 	struct vm_area_struct *vma;
 	unsigned int foll_flags = FOLL_LONGTERM | FOLL_WRITE;
 
+	/* Check whether dir is valid or not */
+	if (!valid_dma_direction(dir))
+		return ERR_PTR(-EINVAL);
+
+	if (!access_ok((const void *)user_address, size)) {
+		dev_err(gxp->dev, "invalid address range in buffer map request");
+		return ERR_PTR(-EFAULT);
+	}
+
 	/*
 	 * The host pages might be read-only and could fail if we attempt to pin
 	 * it with FOLL_WRITE.
 	 * default to read/write if find_extend_vma returns NULL
 	 */
 	vma = find_extend_vma(current->mm, user_address & PAGE_MASK);
-	if (vma && !(vma->vm_flags & VM_WRITE)) {
-		foll_flags &= ~FOLL_WRITE;
-		if (dir != DMA_TO_DEVICE) {
-			dev_err(gxp->dev,
-				"Unable to map read-only pages as anything but DMA_TO_DEVICE\n");
-			return ERR_PTR(-EINVAL);
-		}
+	if (vma) {
+		if (!(vma->vm_flags & VM_WRITE))
+			foll_flags &= ~FOLL_WRITE;
+	} else {
+		dev_dbg(gxp->dev,
+			"unable to find address in VMA, assuming buffer writable");
 	}
 
 	/* Pin the user pages */
 	offset = user_address & (PAGE_SIZE - 1);
 	if (unlikely((size + offset) / PAGE_SIZE >= UINT_MAX - 1 ||
 		     size + offset < size))
-		return ERR_PTR(-ENOMEM);
+		return ERR_PTR(-EFAULT);
 	num_pages = (size + offset) / PAGE_SIZE;
 	if ((size + offset) % PAGE_SIZE)
 		num_pages++;
 
-	pages = kcalloc(num_pages, sizeof(*pages), GFP_KERNEL);
-	if (!pages)
+	/*
+	 * "num_pages" is decided from user-space arguments, don't show warnings
+	 * when facing malicious input.
+	 */
+	pages = kvmalloc((num_pages * sizeof(*pages)), GFP_KERNEL | __GFP_NOWARN);
+	if (!pages) {
+		dev_err(gxp->dev, "Failed to alloc pages for mapping: num_pages=%u",
+			num_pages);
 		return ERR_PTR(-ENOMEM);
+	}
 
-	/* Provide protection around `pin_user_pages_fast` since it fails if
+	/*
+	 * Provide protection around `pin_user_pages_fast` since it fails if
 	 * called by more than one thread simultaneously.
 	 */
-	mutex_lock(&gxp->mappings->lock);
+	mutex_lock(&gxp->pin_user_pages_lock);
 	ret = pin_user_pages_fast(user_address & PAGE_MASK, num_pages,
 				  foll_flags, pages);
-	mutex_unlock(&gxp->mappings->lock);
+	if (ret == -EFAULT && !vma) {
+		dev_warn(gxp->dev,
+			 "pin failed with fault, assuming buffer is read-only");
+		ret = pin_user_pages_fast(user_address & PAGE_MASK, num_pages,
+					  foll_flags & ~FOLL_WRITE, pages);
+	}
+	mutex_unlock(&gxp->pin_user_pages_lock);
+	if (ret == -ENOMEM)
+		dev_err(gxp->dev, "system out of memory locking %u pages",
+			num_pages);
+	if (ret == -EFAULT)
+		dev_err(gxp->dev, "address fault mapping %s buffer",
+			dir == DMA_TO_DEVICE ? "read-only" : "writeable");
 	if (ret < 0 || ret < num_pages) {
 		dev_dbg(gxp->dev,
 			"Get user pages failed: user_add=%pK, num_pages=%u, ret=%d\n",
@@ -91,17 +147,19 @@ struct gxp_mapping *gxp_mapping_create(struct gxp_dev *gxp,
 		ret = -ENOMEM;
 		goto error_unpin_pages;
 	}
+	refcount_set(&mapping->refcount, 1);
+	mapping->destructor = destroy_mapping;
 	mapping->host_address = user_address;
+	mapping->gxp = gxp;
 	mapping->virt_core_list = virt_core_list;
 	mapping->vd = vd;
 	mapping->size = size;
-	mapping->map_count = 1;
 	mapping->gxp_dma_flags = flags;
 	mapping->dir = dir;
 	ret = sg_alloc_table_from_pages(&mapping->sgt, pages, num_pages, 0,
 					num_pages * PAGE_SIZE, GFP_KERNEL);
 	if (ret) {
-		dev_dbg(gxp->dev, "Failed to alloc sgt for mapping (ret=%d)\n",
+		dev_err(gxp->dev, "Failed to alloc sgt for mapping (ret=%d)\n",
 			ret);
 		goto error_free_sgt;
 	}
@@ -111,7 +169,7 @@ struct gxp_mapping *gxp_mapping_create(struct gxp_dev *gxp,
 			     mapping->sgt.sgl, mapping->sgt.nents, mapping->dir,
 			     DMA_ATTR_SKIP_CPU_SYNC, mapping->gxp_dma_flags);
 	if (!ret) {
-		dev_dbg(gxp->dev, "Failed to map sgt (ret=%d)\n", ret);
+		dev_err(gxp->dev, "Failed to map sgt (ret=%d)\n", ret);
 		ret = -EINVAL;
 		goto error_free_sgt;
 	}
@@ -119,7 +177,10 @@ struct gxp_mapping *gxp_mapping_create(struct gxp_dev *gxp,
 	mapping->device_address =
 		sg_dma_address(mapping->sgt.sgl) + offset;
 
-	kfree(pages);
+	mutex_init(&mapping->sync_lock);
+	mutex_init(&mapping->vlock);
+
+	kvfree(pages);
 	return mapping;
 
 error_free_sgt:
@@ -128,54 +189,41 @@ error_free_sgt:
 error_unpin_pages:
 	for (i = 0; i < num_pages; i++)
 		unpin_user_page(pages[i]);
-	kfree(pages);
+	kvfree(pages);
 
 	return ERR_PTR(ret);
 }
 
-void gxp_mapping_destroy(struct gxp_dev *gxp, struct gxp_mapping *mapping)
+bool gxp_mapping_get(struct gxp_mapping *mapping)
 {
-	struct sg_page_iter sg_iter;
-	struct page *page;
-
-	/*
-	 * Unmap the user pages
-	 *
-	 * Normally on unmap, the entire mapping is synced back to the CPU.
-	 * Since mappings are made at a page granularity regardless of the
-	 * underlying buffer's size, they can cover other data as well. If a
-	 * user requires a mapping be synced before unmapping, they are
-	 * responsible for calling `gxp_mapping_sync()` before hand.
-	 */
-	gxp_dma_unmap_sg(gxp, mapping->vd, mapping->virt_core_list,
-			 mapping->sgt.sgl, mapping->sgt.orig_nents,
-			 mapping->dir, DMA_ATTR_SKIP_CPU_SYNC);
-
-	/* Unpin the user pages */
-	for_each_sg_page(mapping->sgt.sgl, &sg_iter, mapping->sgt.orig_nents,
-			 0) {
-		page = sg_page_iter_page(&sg_iter);
-		if (mapping->dir == DMA_FROM_DEVICE ||
-		    mapping->dir == DMA_BIDIRECTIONAL) {
-			set_page_dirty(page);
-		}
-
-		unpin_user_page(page);
-	}
+	return refcount_inc_not_zero(&mapping->refcount);
+}
 
-	/* Free the mapping book-keeping */
-	sg_free_table(&mapping->sgt);
-	kfree(mapping);
+void gxp_mapping_put(struct gxp_mapping *mapping)
+{
+	/* `refcount_dec_and_test()` returns true if the refcount drops to 0 */
+	if (refcount_dec_and_test(&mapping->refcount))
+		mapping->destructor(mapping);
 }
 
-int gxp_mapping_sync(struct gxp_dev *gxp, struct gxp_mapping *mapping,
-		     u32 offset, u32 size, bool for_cpu)
+int gxp_mapping_sync(struct gxp_mapping *mapping, u32 offset, u32 size,
+		     bool for_cpu)
 {
+	struct gxp_dev *gxp = mapping->gxp;
 	struct scatterlist *sg, *start_sg = NULL, *end_sg = NULL;
 	int nelems = 0, cur_offset = 0, ret = 0, i;
 	u64 start, end;
 	unsigned int start_diff = 0, end_diff = 0;
 
+	if (!gxp_mapping_get(mapping))
+		return -ENODEV;
+
+	/* Only mappings with valid `host_address`es can be synced */
+	if (!mapping->host_address) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	/*
 	 * Valid input requires
 	 * - size > 0 (offset + size != offset)
@@ -183,8 +231,10 @@ int gxp_mapping_sync(struct gxp_dev *gxp, struct gxp_mapping *mapping,
 	 * - the mapped range falls within [0 : mapping->size]
 	 */
 	if (offset + size <= offset ||
-	    offset + size > mapping->size)
-		return -EINVAL;
+	    offset + size > mapping->size) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	/*
 	 * Mappings are created at a PAGE_SIZE granularity, however other data
@@ -215,8 +265,10 @@ int gxp_mapping_sync(struct gxp_dev *gxp, struct gxp_mapping *mapping,
 	end_diff = cur_offset - end;
 
 	/* Make sure a valid starting scatterlist was found for the start */
-	if (!start_sg)
-		return -EINVAL;
+	if (!start_sg) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	/*
 	 * Since the scatter-gather list of the mapping is modified while it is
@@ -224,7 +276,7 @@ int gxp_mapping_sync(struct gxp_dev *gxp, struct gxp_mapping *mapping,
 	 * Rather than maintain a mutex for every mapping, lock the mapping list
 	 * mutex, making all syncs mutually exclusive.
 	 */
-	mutex_lock(&gxp->mappings->lock);
+	mutex_lock(&mapping->sync_lock);
 
 	start_sg->offset += start_diff;
 	start_sg->dma_address += start_diff;
@@ -249,110 +301,95 @@ int gxp_mapping_sync(struct gxp_dev *gxp, struct gxp_mapping *mapping,
 	start_sg->length += start_diff;
 	start_sg->dma_length += start_diff;
 
-	mutex_unlock(&gxp->mappings->lock);
+	mutex_unlock(&mapping->sync_lock);
+
+out:
+	gxp_mapping_put(mapping);
 
 	return ret;
 }
 
-int gxp_mapping_put(struct gxp_dev *gxp, struct gxp_mapping *map)
+void *gxp_mapping_vmap(struct gxp_mapping *mapping)
 {
-	struct rb_node **link;
-	struct rb_node *parent = NULL;
-	dma_addr_t device_address = map->device_address;
-	struct gxp_mapping *mapping;
-
-	link = &gxp->mappings->rb.rb_node;
-
-	mutex_lock(&gxp->mappings->lock);
-
-	/* Figure out where to put new node */
-	while (*link) {
-		parent = *link;
-		mapping = rb_entry(parent, struct gxp_mapping, node);
-
-		if (mapping->device_address > device_address)
-			link = &(*link)->rb_left;
-		else if (mapping->device_address < device_address)
-			link = &(*link)->rb_right;
-		else
-			goto out;
-	}
+	struct sg_table *sgt;
+	struct sg_page_iter sg_iter;
+	struct page **pages;
+	void *vaddr;
+	int i = 0;
+	u32 page_count = 0;
 
-	/* Add new node and rebalance tree. */
-	rb_link_node(&map->node, parent, link);
-	rb_insert_color(&map->node, &gxp->mappings->rb);
+	if (!gxp_mapping_get(mapping))
+		return ERR_PTR(-ENODEV);
 
-	mutex_unlock(&gxp->mappings->lock);
+	mutex_lock(&mapping->vlock);
 
-	return 0;
+	/* Check if user buffer has already been mapped to kernel */
+	if (mapping->vmap_count) {
+		vaddr = mapping->virtual_address;
+		mapping->vmap_count++;
+		goto out;
+	}
 
-out:
-	mutex_unlock(&gxp->mappings->lock);
-	dev_err(gxp->dev, "Duplicate mapping: %pad", &map->device_address);
-	return -EINVAL;
-}
+	sgt = &mapping->sgt;
+	for_each_sg_page(sgt->sgl, &sg_iter, sgt->orig_nents, 0)
+		page_count++;
 
-struct gxp_mapping *gxp_mapping_get(struct gxp_dev *gxp,
-				    dma_addr_t device_address)
-{
-	struct rb_node *node;
-	struct gxp_mapping *mapping;
+	pages = kvmalloc((page_count * sizeof(*pages)), GFP_KERNEL);
+	if (!pages) {
+		vaddr = ERR_PTR(-ENOMEM);
+		goto out;
+	}
 
-	mutex_lock(&gxp->mappings->lock);
+	for_each_sg_page(sgt->sgl, &sg_iter, sgt->orig_nents, 0)
+		pages[i++] = sg_page_iter_page(&sg_iter);
 
-	node = gxp->mappings->rb.rb_node;
+	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
+	kvfree(pages);
+	if (vaddr == NULL) {
+		dev_err(mapping->gxp->dev,
+			"Failed to map user buffer to kernel");
+		vaddr = ERR_PTR(-ENOMEM);
+		goto out;
+	}
 
-	while (node) {
-		mapping = rb_entry(node, struct gxp_mapping, node);
+	mapping->virtual_address = vaddr;
+	mapping->page_count = page_count;
+	mapping->vmap_count = 1;
 
-		if (mapping->device_address > device_address) {
-			node = node->rb_left;
-		} else if (mapping->device_address < device_address) {
-			node = node->rb_right;
-		} else {
-			mutex_unlock(&gxp->mappings->lock);
-			return mapping;  /* Found it */
-		}
-	}
+	/* Hold a reference to the mapping so long as it is vmapped */
+	gxp_mapping_get(mapping);
 
-	mutex_unlock(&gxp->mappings->lock);
+out:
+	mutex_unlock(&mapping->vlock);
+
+	gxp_mapping_put(mapping);
 
-	dev_err(gxp->dev, "Mapping not found: %pad", &device_address);
-	return NULL;
+	return vaddr;
 }
 
-struct gxp_mapping *gxp_mapping_get_host(struct gxp_dev *gxp, u64 host_address)
+void gxp_mapping_vunmap(struct gxp_mapping *mapping)
 {
-	struct rb_node *node;
-	struct gxp_mapping *mapping;
+	if (!gxp_mapping_get(mapping))
+		return;
 
-	mutex_lock(&gxp->mappings->lock);
+	mutex_lock(&mapping->vlock);
 
-	if (!host_address) {
-		dev_warn(gxp->dev,
-			 "Unable to get dma-buf mapping by host address\n");
-		return NULL;
-	}
-
-	/* Iterate through the elements in the rbtree */
-	for (node = rb_first(&gxp->mappings->rb); node; node = rb_next(node)) {
-		mapping = rb_entry(node, struct gxp_mapping, node);
-		if (mapping->host_address == host_address) {
-			mutex_unlock(&gxp->mappings->lock);
-			return mapping;
-		}
-	}
-
-	mutex_unlock(&gxp->mappings->lock);
+	/*
+	 * Exit immediately if the mapping was never vmapped, or still has
+	 * other users expecting it to be vmapped.
+	 */
+	if (!mapping->vmap_count || --mapping->vmap_count)
+		goto out;
 
-	return NULL;
-}
+	vunmap(mapping->virtual_address);
+	mapping->virtual_address = 0;
+	mapping->page_count = 0;
 
-void gxp_mapping_remove(struct gxp_dev *gxp, struct gxp_mapping *map)
-{
-	mutex_lock(&gxp->mappings->lock);
+	/* Release the reference from gxp_mapping_vmap() */
+	gxp_mapping_put(mapping);
 
-	rb_erase(&map->node, &gxp->mappings->rb);
+out:
+	mutex_unlock(&mapping->vlock);
 
-	mutex_unlock(&gxp->mappings->lock);
+	gxp_mapping_put(mapping);
 }
diff --git a/gxp-mapping.h b/gxp-mapping.h
index 0e5c869..dbb80d9 100644
--- a/gxp-mapping.h
+++ b/gxp-mapping.h
@@ -10,24 +10,23 @@
 #include <linux/dma-direction.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
+#include <linux/refcount.h>
 #include <linux/scatterlist.h>
 #include <linux/types.h>
 
 #include "gxp-internal.h"
 
-struct gxp_mapping_root {
-	struct rb_root rb;
-	struct mutex lock;
-};
-
 struct gxp_mapping {
 	struct rb_node node;
+	refcount_t refcount;
+	void (*destructor)(struct gxp_mapping *mapping);
 	/*
 	 * User-space address of the mapped buffer.
 	 * If this value is 0, it indicates this mapping is for a dma-buf and
 	 * should not be used if a regular buffer mapping was expected.
 	 */
 	u64 host_address;
+	struct gxp_dev *gxp;
 	uint virt_core_list;
 	struct gxp_virtual_device *vd;
 	/*
@@ -43,22 +42,101 @@ struct gxp_mapping {
 	uint gxp_dma_flags;
 	enum dma_data_direction dir;
 	struct sg_table sgt;
-	u32 map_count;
+	/* A mapping can only be synced by one thread at a time */
+	struct mutex sync_lock;
+	/*
+	 * `virtual_address` and `page_count` are set when gxp_mapping_vmap(..)
+	 * is called, and unset when gxp_mapping_vunmap(..) is called
+	 */
+	void *virtual_address;
+	u32 page_count;
+	uint vmap_count;
+	/* Protects `virtual_address`, `page_count`, and `vmap_count` */
+	struct mutex vlock;
 };
 
-int gxp_mapping_init(struct gxp_dev *gxp);
+/**
+ * gxp_mapping_create() - Create a mapping for a user buffer
+ * @gxp: The GXP device to create the mapping for
+ * @vd: The virtual device to create the mapping for
+ * @virt_core_list: A bitfield indicating the cores in @vd to map the buffer to
+ * @user_address: The user-space address of the buffer to map
+ * @size: The size of the buffer to be mapped
+ * @flags: Flags describing the type of mapping to create; currently unused
+ * @dir: DMA direction
+ *
+ * Upon successful creation, the mapping will be created with a reference count
+ * of 1.
+ *
+ * Return: A pointer to the newly created mapping on success; otherwise an
+ *        ERR_PTR:
+ * * -ENOMEM: Insufficient memory to create the mapping
+ * * -EFAULT: Unable to pin the user pages
+ * * -EINVAL: Attempting to map read-only pages for writing by device or failed
+ *            to map the buffer for the device.
+ */
 struct gxp_mapping *gxp_mapping_create(struct gxp_dev *gxp,
 				       struct gxp_virtual_device *vd,
 				       uint virt_core_list, u64 user_address,
 				       size_t size, u32 flags,
 				       enum dma_data_direction dir);
-void gxp_mapping_destroy(struct gxp_dev *gxp, struct gxp_mapping *mapping);
-int gxp_mapping_sync(struct gxp_dev *gxp, struct gxp_mapping *mapping,
-		     u32 offset, u32 size, bool for_cpu);
-int gxp_mapping_put(struct gxp_dev *gxp, struct gxp_mapping *map);
-struct gxp_mapping *gxp_mapping_get(struct gxp_dev *gxp,
-				    dma_addr_t device_address);
-struct gxp_mapping *gxp_mapping_get_host(struct gxp_dev *gxp, u64 host_address);
-void gxp_mapping_remove(struct gxp_dev *gxp, struct gxp_mapping *map);
+
+/**
+ * gxp_mapping_get() - Increment a mapping's reference count
+ * @map: The mapping to obtain a reference to
+ *
+ * Return: True if the mapping's reference count was non-zero and incremented
+ *         successfully; false otherwise.
+ */
+bool gxp_mapping_get(struct gxp_mapping *mapping);
+
+/**
+ * gxp_mapping_put() - Decrement a mapping's reference
+ * @mapping: The mapping to release a reference to
+ */
+void gxp_mapping_put(struct gxp_mapping *mapping);
+
+/**
+ * gxp_mapping_sync() - Sync a mapped buffer for either CPU or device
+ * @mapping: The mapping to sync
+ * @offset: The offset, in bytes, into the mapped buffer where the region to
+ *          be synced begins
+ * @size: The size, in bytes, of the region to be synced
+ * @for_cpu: True to sync for CPU access (cache invalidate), false to sync for
+ *           device access (cache flush)
+ *
+ * Return:
+ * * 0: Success
+ * * -ENODEV: A reference to the mapping could not be obtained
+ * * -EINVAL: The specified @offset and @size were not valid
+ */
+int gxp_mapping_sync(struct gxp_mapping *mapping, u32 offset, u32 size,
+		     bool for_cpu);
+
+/**
+ * gxp_mapping_vmap() - Map a mapping's buffer into kernel address space
+ * @mapping: Tha mapping to map into kernel space
+ *
+ * If the buffer is already mapped, increments a reference count and returns
+ * the existing virtual address instead.
+ *
+ * Obtains a reference to @mapping if the buffer had not been mapped yet.
+ *
+ * Return: A pointer to the mapped buffer if successful; otherwise an ERR_PTR:
+ * * -ENODEV: A reference to the mapping could not be obtained
+ * * -ENOMEM: Insufficient memory to map the buffer
+ */
+void *gxp_mapping_vmap(struct gxp_mapping *mapping);
+
+/**
+ * gxp_mapping_vunmap() - Unmap a mapping from kernel address space
+ * @mapping: The mapping to unmap from kernel space
+ *
+ * Decrements the mapping's vmap reference count, and unmaps the buffer if that
+ * count drops to zero.
+ *
+ * Releases a reference to @mapping if the buffer is unmapped
+ */
+void gxp_mapping_vunmap(struct gxp_mapping *mapping);
 
 #endif /* __GXP_MAPPING_H__ */
diff --git a/gxp-platform.c b/gxp-platform.c
index 2bfdb28..36ae93d 100644
--- a/gxp-platform.c
+++ b/gxp-platform.c
@@ -10,6 +10,7 @@
 #endif
 
 #include <linux/acpi.h>
+#include <linux/cred.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/fs.h>
@@ -22,35 +23,59 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/uaccess.h>
-#if IS_ENABLED(CONFIG_ANDROID) && !IS_ENABLED(CONFIG_GXP_GEM5)
+#include <linux/uidgid.h>
+#if (IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_ANDROID)) && !IS_ENABLED(CONFIG_GXP_GEM5)
 #include <soc/google/tpu-ext.h>
 #endif
 
-#include "gxp.h"
 #include "gxp-client.h"
+#include "gxp-config.h"
 #include "gxp-debug-dump.h"
 #include "gxp-debugfs.h"
 #include "gxp-dma.h"
 #include "gxp-dmabuf.h"
+#include "gxp-domain-pool.h"
 #include "gxp-firmware.h"
 #include "gxp-firmware-data.h"
 #include "gxp-internal.h"
 #include "gxp-mailbox.h"
 #include "gxp-mailbox-driver.h"
 #include "gxp-mapping.h"
+#include "gxp-notification.h"
 #include "gxp-pm.h"
 #include "gxp-telemetry.h"
 #include "gxp-thermal.h"
 #include "gxp-vd.h"
 #include "gxp-wakelock.h"
+#include "gxp.h"
 
-/* Caller needs to hold client->semaphore for reading */
+#define __wait_event_lock_irq_timeout_exclusive(wq_head, condition, lock,      \
+						timeout, state)                \
+	___wait_event(wq_head, ___wait_cond_timeout(condition), state, 1,      \
+		      timeout, spin_unlock_irq(&lock);                         \
+		      __ret = schedule_timeout(__ret); spin_lock_irq(&lock))
+
+/*
+ * wait_event_interruptible_lock_irq_timeout() but set the exclusive flag.
+ */
+#define wait_event_interruptible_lock_irq_timeout_exclusive(                   \
+	wq_head, condition, lock, timeout)                                     \
+	({                                                                     \
+		long __ret = timeout;                                          \
+		if (!___wait_cond_timeout(condition))                          \
+			__ret = __wait_event_lock_irq_timeout_exclusive(       \
+				wq_head, condition, lock, timeout,             \
+				TASK_INTERRUPTIBLE);                           \
+		__ret;                                                         \
+	})
+
+/* Caller needs to hold client->semaphore */
 static bool check_client_has_available_vd(struct gxp_client *client,
 					  char *ioctl_name)
 {
 	struct gxp_dev *gxp = client->gxp;
 
-	lockdep_assert_held_read(&client->semaphore);
+	lockdep_assert_held(&client->semaphore);
 	if (!client->vd) {
 		dev_err(gxp->dev,
 			"%s requires the client allocate a VIRTUAL_DEVICE\n",
@@ -120,6 +145,15 @@ static int gxp_open(struct inode *inode, struct file *file)
 	struct gxp_client *client;
 	struct gxp_dev *gxp = container_of(file->private_data, struct gxp_dev,
 					   misc_dev);
+	int ret = 0;
+
+	/* If this is the first call to open(), request the firmware files */
+	ret = gxp_firmware_request_if_needed(gxp);
+	if (ret) {
+		dev_err(gxp->dev,
+			"Failed to request dsp firmware files (ret=%d)\n", ret);
+		return ret;
+	}
 
 	client = gxp_client_create(gxp);
 	if (IS_ERR(client))
@@ -133,7 +167,7 @@ static int gxp_open(struct inode *inode, struct file *file)
 	list_add(&client->list_entry, &gxp->client_list);
 	mutex_unlock(&gxp->client_list_lock);
 
-	return 0;
+	return ret;
 }
 
 static int gxp_release(struct inode *inode, struct file *file)
@@ -150,10 +184,6 @@ static int gxp_release(struct inode *inode, struct file *file)
 	list_del(&client->list_entry);
 	mutex_unlock(&client->gxp->client_list_lock);
 
-	/*
-	 * TODO (b/184572070): Unmap buffers and drop mailbox responses
-	 * belonging to the client
-	 */
 	gxp_client_destroy(client);
 
 	return 0;
@@ -184,7 +214,7 @@ static int gxp_map_buffer(struct gxp_client *client,
 	if (copy_from_user(&ibuf, argp, sizeof(ibuf)))
 		return -EFAULT;
 
-	if (ibuf.size == 0)
+	if (ibuf.size == 0 || ibuf.virtual_core_list == 0)
 		return -EINVAL;
 
 	if (ibuf.host_address % L1_CACHE_BYTES || ibuf.size % L1_CACHE_BYTES) {
@@ -192,8 +222,6 @@ static int gxp_map_buffer(struct gxp_client *client,
 			"Mapped buffers must be cache line aligned and padded.\n");
 		return -EINVAL;
 	}
-	if (ibuf.virtual_core_list == 0)
-		return -EINVAL;
 
 	down_read(&client->semaphore);
 
@@ -202,18 +230,27 @@ static int gxp_map_buffer(struct gxp_client *client,
 		goto out;
 	}
 
+	/* the list contains un-allocated core bits */
+	if (ibuf.virtual_core_list & ~(BIT(client->vd->num_cores) - 1)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	map = gxp_mapping_create(gxp, client->vd, ibuf.virtual_core_list,
 				 ibuf.host_address, ibuf.size,
 				 /*gxp_dma_flags=*/0,
 				 mapping_flags_to_dma_dir(ibuf.flags));
 	if (IS_ERR(map)) {
 		ret = PTR_ERR(map);
+		dev_err(gxp->dev, "Failed to create mapping (ret=%d)\n", ret);
 		goto out;
 	}
 
-	ret = gxp_mapping_put(gxp, map);
-	if (ret)
+	ret = gxp_vd_mapping_store(client->vd, map);
+	if (ret) {
+		dev_err(gxp->dev, "Failed to store mapping (ret=%d)\n", ret);
 		goto error_destroy;
+	}
 
 	ibuf.device_address = map->device_address;
 
@@ -222,17 +259,23 @@ static int gxp_map_buffer(struct gxp_client *client,
 		goto error_remove;
 	}
 
+	/*
+	 * The virtual device acquired its own reference to the mapping when
+	 * it was stored in the VD's records. Release the reference from
+	 * creating the mapping since this function is done using it.
+	 */
+	gxp_mapping_put(map);
+
 out:
 	up_read(&client->semaphore);
 
 	return ret;
 
 error_remove:
-	gxp_mapping_remove(gxp, map);
+	gxp_vd_mapping_remove(client->vd, map);
 error_destroy:
-	gxp_mapping_destroy(gxp, map);
+	gxp_mapping_put(map);
 	up_read(&client->semaphore);
-	devm_kfree(gxp->dev, (void *)map);
 	return ret;
 }
 
@@ -256,8 +299,12 @@ static int gxp_unmap_buffer(struct gxp_client *client,
 		goto out;
 	}
 
-	map = gxp_mapping_get(gxp, (dma_addr_t)ibuf.device_address);
+	map = gxp_vd_mapping_search(client->vd,
+				    (dma_addr_t)ibuf.device_address);
 	if (!map) {
+		dev_err(gxp->dev,
+			"Mapping not found for provided device address %#llX\n",
+			ibuf.device_address);
 		ret = -EINVAL;
 		goto out;
 	} else if (!map->host_address) {
@@ -267,11 +314,11 @@ static int gxp_unmap_buffer(struct gxp_client *client,
 	}
 
 	WARN_ON(map->host_address != ibuf.host_address);
-	if (--(map->map_count))
-		goto out;
 
-	gxp_mapping_remove(gxp, map);
-	gxp_mapping_destroy(gxp, map);
+	gxp_vd_mapping_remove(client->vd, map);
+
+	/* Release the reference from gxp_vd_mapping_search() */
+	gxp_mapping_put(map);
 
 out:
 	up_read(&client->semaphore);
@@ -299,15 +346,22 @@ static int gxp_sync_buffer(struct gxp_client *client,
 		goto out;
 	}
 
-	map = gxp_mapping_get(gxp, (dma_addr_t)ibuf.device_address);
+	map = gxp_vd_mapping_search(client->vd,
+				    (dma_addr_t)ibuf.device_address);
 	if (!map) {
+		dev_err(gxp->dev,
+			"Mapping not found for provided device address %#llX\n",
+			ibuf.device_address);
 		ret = -EINVAL;
 		goto out;
 	}
 
-	ret = gxp_mapping_sync(gxp, map, ibuf.offset, ibuf.size,
+	ret = gxp_mapping_sync(map, ibuf.offset, ibuf.size,
 			       ibuf.flags == GXP_SYNC_FOR_CPU);
 
+	/* Release the reference from gxp_vd_mapping_search() */
+	gxp_mapping_put(map);
+
 out:
 	up_read(&client->semaphore);
 
@@ -338,7 +392,7 @@ gxp_mailbox_command_compat(struct gxp_client *client,
 	if (!check_client_has_available_vd_wakelock(client,
 						   "GXP_MAILBOX_COMMAND")) {
 		ret = -ENODEV;
-		goto out_unlock_client_semphore;
+		goto out_unlock_client_semaphore;
 	}
 
 	down_read(&gxp->vd_semaphore);
@@ -402,7 +456,7 @@ gxp_mailbox_command_compat(struct gxp_client *client,
 
 out:
 	up_read(&gxp->vd_semaphore);
-out_unlock_client_semphore:
+out_unlock_client_semaphore:
 	up_read(&client->semaphore);
 
 	return ret;
@@ -447,7 +501,7 @@ static int gxp_mailbox_command(struct gxp_client *client,
 	if (!check_client_has_available_vd_wakelock(client,
 						   "GXP_MAILBOX_COMMAND")) {
 		ret = -ENODEV;
-		goto out_unlock_client_semphore;
+		goto out_unlock_client_semaphore;
 	}
 
 	down_read(&gxp->vd_semaphore);
@@ -512,7 +566,7 @@ static int gxp_mailbox_command(struct gxp_client *client,
 
 out:
 	up_read(&gxp->vd_semaphore);
-out_unlock_client_semphore:
+out_unlock_client_semaphore:
 	up_read(&client->semaphore);
 
 	return ret;
@@ -525,8 +579,8 @@ static int gxp_mailbox_response(struct gxp_client *client,
 	struct gxp_mailbox_response_ioctl ibuf;
 	struct gxp_async_response *resp_ptr;
 	int virt_core;
-	unsigned long flags;
 	int ret = 0;
+	long timeout;
 
 	if (copy_from_user(&ibuf, argp, sizeof(ibuf)))
 		return -EFAULT;
@@ -548,31 +602,26 @@ static int gxp_mailbox_response(struct gxp_client *client,
 		goto out;
 	}
 
-	spin_lock_irqsave(&client->vd->mailbox_resp_queues[virt_core].lock,
-			  flags);
+	spin_lock_irq(&client->vd->mailbox_resp_queues[virt_core].lock);
 
 	/*
-	 * No timeout is required since commands have a hard timeout after
-	 * which the command is abandoned and a response with a failure
-	 * status is added to the mailbox_resps queue.
-	 *
 	 * The "exclusive" version of wait_event is used since each wake
 	 * corresponds to the addition of exactly one new response to be
 	 * consumed. Therefore, only one waiting response ioctl can ever
 	 * proceed per wake event.
 	 */
-	wait_event_exclusive_cmd(
+	timeout = wait_event_interruptible_lock_irq_timeout_exclusive(
 		client->vd->mailbox_resp_queues[virt_core].waitq,
 		!list_empty(&client->vd->mailbox_resp_queues[virt_core].queue),
-		/* Release the lock before sleeping */
-		spin_unlock_irqrestore(
-			&client->vd->mailbox_resp_queues[virt_core].lock,
-			flags),
-		/* Reacquire the lock after waking */
-		spin_lock_irqsave(
-			&client->vd->mailbox_resp_queues[virt_core].lock,
-			flags));
-
+		client->vd->mailbox_resp_queues[virt_core].lock,
+		msecs_to_jiffies(MAILBOX_TIMEOUT));
+	if (timeout <= 0) {
+		spin_unlock_irq(
+			&client->vd->mailbox_resp_queues[virt_core].lock);
+		/* unusual case - this only happens when there is no command pushed */
+		ret = timeout ? -ETIMEDOUT : timeout;
+		goto out;
+	}
 	resp_ptr = list_first_entry(
 		&client->vd->mailbox_resp_queues[virt_core].queue,
 		struct gxp_async_response, list_entry);
@@ -580,8 +629,7 @@ static int gxp_mailbox_response(struct gxp_client *client,
 	/* Pop the front of the response list */
 	list_del(&(resp_ptr->list_entry));
 
-	spin_unlock_irqrestore(&client->vd->mailbox_resp_queues[virt_core].lock,
-			       flags);
+	spin_unlock_irq(&client->vd->mailbox_resp_queues[virt_core].lock);
 
 	ibuf.sequence_number = resp_ptr->resp.seq;
 	switch (resp_ptr->resp.status) {
@@ -630,10 +678,10 @@ out:
 static int gxp_get_specs(struct gxp_client *client,
 			 struct gxp_specs_ioctl __user *argp)
 {
-	struct gxp_specs_ioctl ibuf;
-
-	ibuf.core_count = GXP_NUM_CORES;
-	ibuf.memory_per_core = client->gxp->memory_per_core;
+	struct gxp_specs_ioctl ibuf = {
+		.core_count = GXP_NUM_CORES,
+		.memory_per_core = client->gxp->memory_per_core,
+	};
 
 	if (copy_to_user(argp, &ibuf, sizeof(ibuf)))
 		return -EFAULT;
@@ -657,6 +705,12 @@ static int gxp_allocate_vd(struct gxp_client *client,
 		return -EINVAL;
 	}
 
+	if (ibuf.memory_per_core > gxp->memory_per_core) {
+		dev_err(gxp->dev, "Invalid memory-per-core (%u)\n",
+			ibuf.memory_per_core);
+		return -EINVAL;
+	}
+
 	down_write(&client->semaphore);
 
 	if (client->vd) {
@@ -715,7 +769,7 @@ gxp_etm_trace_start_command(struct gxp_client *client,
 	if (!check_client_has_available_vd_wakelock(
 		    client, "GXP_ETM_TRACE_START_COMMAND")) {
 		ret = -ENODEV;
-		goto out_unlock_client_semphore;
+		goto out_unlock_client_semaphore;
 	}
 
 	down_read(&gxp->vd_semaphore);
@@ -737,7 +791,7 @@ gxp_etm_trace_start_command(struct gxp_client *client,
 
 out:
 	up_read(&gxp->vd_semaphore);
-out_unlock_client_semphore:
+out_unlock_client_semaphore:
 	up_read(&client->semaphore);
 
 	return ret;
@@ -760,7 +814,7 @@ static int gxp_etm_trace_sw_stop_command(struct gxp_client *client,
 	if (!check_client_has_available_vd_wakelock(
 		    client, "GXP_ETM_TRACE_SW_STOP_COMMAND")) {
 		ret = -ENODEV;
-		goto out_unlock_client_semphore;
+		goto out_unlock_client_semaphore;
 	}
 
 	down_read(&gxp->vd_semaphore);
@@ -781,7 +835,7 @@ static int gxp_etm_trace_sw_stop_command(struct gxp_client *client,
 
 out:
 	up_read(&gxp->vd_semaphore);
-out_unlock_client_semphore:
+out_unlock_client_semaphore:
 	up_read(&client->semaphore);
 
 	return ret;
@@ -804,7 +858,7 @@ static int gxp_etm_trace_cleanup_command(struct gxp_client *client,
 	if (!check_client_has_available_vd_wakelock(
 		    client, "GXP_ETM_TRACE_CLEANUP_COMMAND")) {
 		ret = -ENODEV;
-		goto out_unlock_client_semphore;
+		goto out_unlock_client_semaphore;
 	}
 
 	down_read(&gxp->vd_semaphore);
@@ -825,7 +879,7 @@ static int gxp_etm_trace_cleanup_command(struct gxp_client *client,
 
 out:
 	up_read(&gxp->vd_semaphore);
-out_unlock_client_semphore:
+out_unlock_client_semaphore:
 	up_read(&client->semaphore);
 
 	return ret;
@@ -854,7 +908,7 @@ gxp_etm_get_trace_info_command(struct gxp_client *client,
 	if (!check_client_has_available_vd_wakelock(
 		    client, "GXP_ETM_GET_TRACE_INFO_COMMAND")) {
 		ret = -ENODEV;
-		goto out_unlock_client_semphore;
+		goto out_unlock_client_semaphore;
 	}
 
 	down_read(&gxp->vd_semaphore);
@@ -906,7 +960,7 @@ out_free_header:
 
 out:
 	up_read(&gxp->vd_semaphore);
-out_unlock_client_semphore:
+out_unlock_client_semaphore:
 	up_read(&client->semaphore);
 
 	return ret;
@@ -946,7 +1000,7 @@ static int gxp_disable_telemetry(struct gxp_client *client, __u8 __user *argp)
 static int gxp_map_tpu_mbx_queue(struct gxp_client *client,
 				 struct gxp_tpu_mbx_queue_ioctl __user *argp)
 {
-#if IS_ENABLED(CONFIG_ANDROID) && !IS_ENABLED(CONFIG_GXP_GEM5)
+#if (IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_ANDROID)) && !IS_ENABLED(CONFIG_GXP_GEM5)
 	struct gxp_dev *gxp = client->gxp;
 	struct edgetpu_ext_mailbox_info *mbx_info;
 	struct gxp_tpu_mbx_queue_ioctl ibuf;
@@ -969,7 +1023,7 @@ static int gxp_map_tpu_mbx_queue(struct gxp_client *client,
 
 	if (!check_client_has_available_vd(client, "GXP_MAP_TPU_MBX_QUEUE")) {
 		ret = -ENODEV;
-		goto out_unlock_client_semphore;
+		goto out_unlock_client_semaphore;
 	}
 
 	down_read(&gxp->vd_semaphore);
@@ -1039,7 +1093,7 @@ out_free:
 
 out:
 	up_read(&gxp->vd_semaphore);
-out_unlock_client_semphore:
+out_unlock_client_semaphore:
 	up_write(&client->semaphore);
 
 	return ret;
@@ -1051,7 +1105,7 @@ out_unlock_client_semphore:
 static int gxp_unmap_tpu_mbx_queue(struct gxp_client *client,
 				   struct gxp_tpu_mbx_queue_ioctl __user *argp)
 {
-#if IS_ENABLED(CONFIG_ANDROID) && !IS_ENABLED(CONFIG_GXP_GEM5)
+#if (IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_ANDROID)) && !IS_ENABLED(CONFIG_GXP_GEM5)
 	struct gxp_dev *gxp = client->gxp;
 	struct gxp_tpu_mbx_queue_ioctl ibuf;
 	struct edgetpu_ext_client_info gxp_tpu_info;
@@ -1198,6 +1252,13 @@ static int gxp_acquire_wake_lock_compat(
 	}
 
 	down_write(&client->semaphore);
+	if ((ibuf.components_to_wake & WAKELOCK_VIRTUAL_DEVICE) &&
+	    (!client->vd)) {
+		dev_err(gxp->dev,
+			"Must allocate a virtual device to acquire VIRTUAL_DEVICE wakelock\n");
+		ret = -EINVAL;
+		goto out;
+	}
 
 	/* Acquire a BLOCK wakelock if requested */
 	if (ibuf.components_to_wake & WAKELOCK_BLOCK) {
@@ -1258,14 +1319,13 @@ static int gxp_acquire_wake_lock_compat(
 		client->has_vd_wakelock = true;
 	}
 
-	gxp_pm_update_requested_power_state(
+	gxp_pm_update_requested_power_states(
 		gxp, client->requested_power_state, client->requested_aggressor,
-		aur_state_array[ibuf.gxp_power_state], true);
+		aur_state_array[ibuf.gxp_power_state], true,
+		client->requested_memory_power_state,
+		aur_memory_state_array[ibuf.memory_power_state]);
 	client->requested_power_state = aur_state_array[ibuf.gxp_power_state];
 	client->requested_aggressor = true;
-	gxp_pm_update_requested_memory_power_state(
-		gxp, client->requested_memory_power_state,
-		aur_memory_state_array[ibuf.memory_power_state]);
 	client->requested_memory_power_state =
 		aur_memory_state_array[ibuf.memory_power_state];
 out:
@@ -1322,6 +1382,13 @@ static int gxp_acquire_wake_lock(struct gxp_client *client,
 	}
 
 	down_write(&client->semaphore);
+	if ((ibuf.components_to_wake & WAKELOCK_VIRTUAL_DEVICE) &&
+	    (!client->vd)) {
+		dev_err(gxp->dev,
+			"Must allocate a virtual device to acquire VIRTUAL_DEVICE wakelock\n");
+		ret = -EINVAL;
+		goto out;
+	}
 
 	/* Acquire a BLOCK wakelock if requested */
 	if (ibuf.components_to_wake & WAKELOCK_BLOCK) {
@@ -1377,14 +1444,13 @@ static int gxp_acquire_wake_lock(struct gxp_client *client,
 	}
 	requested_aggressor = (ibuf.flags & GXP_POWER_NON_AGGRESSOR) == 0;
 
-	gxp_pm_update_requested_power_state(
+	gxp_pm_update_requested_power_states(
 		gxp, client->requested_power_state, client->requested_aggressor,
-		aur_state_array[ibuf.gxp_power_state], requested_aggressor);
+		aur_state_array[ibuf.gxp_power_state], requested_aggressor,
+		client->requested_memory_power_state,
+		aur_memory_state_array[ibuf.memory_power_state]);
 	client->requested_power_state = aur_state_array[ibuf.gxp_power_state];
 	client->requested_aggressor = requested_aggressor;
-	gxp_pm_update_requested_memory_power_state(
-		gxp, client->requested_memory_power_state,
-		aur_memory_state_array[ibuf.memory_power_state]);
 	client->requested_memory_power_state =
 		aur_memory_state_array[ibuf.memory_power_state];
 out:
@@ -1464,13 +1530,12 @@ static int gxp_release_wake_lock(struct gxp_client *client, __u32 __user *argp)
 		 * Other clients may still be using the BLK_AUR, check if we need
 		 * to change the power state.
 		 */
-		gxp_pm_update_requested_power_state(
+		gxp_pm_update_requested_power_states(
 			gxp, client->requested_power_state,
-			client->requested_aggressor, AUR_OFF, true);
-		client->requested_power_state = AUR_OFF;
-		gxp_pm_update_requested_memory_power_state(
-			gxp, client->requested_memory_power_state,
+			client->requested_aggressor, AUR_OFF, true,
+			client->requested_memory_power_state,
 			AUR_MEM_UNDEFINED);
+		client->requested_power_state = AUR_OFF;
 		client->requested_memory_power_state = AUR_MEM_UNDEFINED;
 
 		client->has_block_wakelock = false;
@@ -1500,7 +1565,7 @@ static int gxp_map_dmabuf(struct gxp_client *client,
 
 	if (!check_client_has_available_vd(client, "GXP_MAP_DMABUF")) {
 		ret = -ENODEV;
-		goto out;
+		goto out_unlock;
 	}
 
 	mapping = gxp_dmabuf_map(gxp, client->vd, ibuf.virtual_core_list,
@@ -1510,18 +1575,33 @@ static int gxp_map_dmabuf(struct gxp_client *client,
 	if (IS_ERR(mapping)) {
 		ret = PTR_ERR(mapping);
 		dev_err(gxp->dev, "Failed to map dma-buf (ret=%d)\n", ret);
-		goto out;
+		goto out_unlock;
+	}
+
+	ret = gxp_vd_mapping_store(client->vd, mapping);
+	if (ret) {
+		dev_err(gxp->dev,
+			"Failed to store mapping for dma-buf (ret=%d)\n", ret);
+		goto out_put;
 	}
 
 	ibuf.device_address = mapping->device_address;
 
 	if (copy_to_user(argp, &ibuf, sizeof(ibuf))) {
 		/* If the IOCTL fails, the dma-buf must be unmapped */
-		gxp_dmabuf_unmap(gxp, client->vd, ibuf.device_address);
+		gxp_vd_mapping_remove(client->vd, mapping);
 		ret = -EFAULT;
 	}
 
-out:
+out_put:
+	/*
+	 * Release the reference from creating the dmabuf mapping
+	 * If the mapping was not successfully stored in the owning virtual
+	 * device, this will unmap and cleanup the dmabuf.
+	 */
+	gxp_mapping_put(mapping);
+
+out_unlock:
 	up_read(&client->semaphore);
 
 	return ret;
@@ -1532,6 +1612,7 @@ static int gxp_unmap_dmabuf(struct gxp_client *client,
 {
 	struct gxp_dev *gxp = client->gxp;
 	struct gxp_map_dmabuf_ioctl ibuf;
+	struct gxp_mapping *mapping;
 	int ret = 0;
 
 	if (copy_from_user(&ibuf, argp, sizeof(ibuf)))
@@ -1546,7 +1627,29 @@ static int gxp_unmap_dmabuf(struct gxp_client *client,
 		goto out;
 	}
 
-	gxp_dmabuf_unmap(gxp, client->vd, ibuf.device_address);
+	/*
+	 * Fetch and remove the internal mapping records.
+	 * If host_address is not 0, the provided device_address belongs to a
+	 * non-dma-buf mapping.
+	 */
+	mapping = gxp_vd_mapping_search(client->vd, ibuf.device_address);
+	if (IS_ERR_OR_NULL(mapping) || mapping->host_address) {
+		dev_warn(gxp->dev, "No dma-buf mapped for given IOVA\n");
+		/*
+		 * If the device address belongs to a non-dma-buf mapping,
+		 * release the reference to it obtained via the search.
+		 */
+		if (!IS_ERR_OR_NULL(mapping))
+			gxp_mapping_put(mapping);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Remove the mapping from its VD, releasing the VD's reference */
+	gxp_vd_mapping_remove(client->vd, mapping);
+
+	/* Release the reference from gxp_vd_mapping_search() */
+	gxp_mapping_put(mapping);
 
 out:
 	up_read(&client->semaphore);
@@ -1658,6 +1761,56 @@ gxp_get_interface_version(struct gxp_client *client,
 	return 0;
 }
 
+static int gxp_trigger_debug_dump(struct gxp_client *client,
+				  __u32 __user *argp)
+{
+	struct gxp_dev *gxp = client->gxp;
+	int phys_core, i;
+	u32 core_bits;
+	int ret = 0;
+
+	if (!uid_eq(current_euid(), GLOBAL_ROOT_UID))
+		return -EPERM;
+
+	if (copy_from_user(&core_bits, argp, sizeof(core_bits)))
+		return -EFAULT;
+
+	/* Caller must hold VIRTUAL_DEVICE wakelock */
+	down_read(&client->semaphore);
+
+	if (!check_client_has_available_vd_wakelock(client,
+						    "GXP_TRIGGER_DEBUG_DUMP")) {
+		ret = -ENODEV;
+		goto out_unlock_client_semaphore;
+	}
+
+	down_read(&gxp->vd_semaphore);
+
+	for (i = 0; i < GXP_NUM_CORES; i++) {
+		if (!(core_bits & BIT(i)))
+			continue;
+		phys_core = gxp_vd_virt_core_to_phys_core(client->vd, i);
+		if (phys_core < 0) {
+			dev_err(gxp->dev,
+				"Trigger debug dump failed: Invalid virtual core id (%u)\n",
+				i);
+			ret = -EINVAL;
+			continue;
+		}
+
+		if (gxp_is_fw_running(gxp, phys_core)) {
+			gxp_notification_send(gxp, phys_core,
+					      CORE_NOTIF_GENERATE_DEBUG_DUMP);
+		}
+	}
+
+	up_read(&gxp->vd_semaphore);
+out_unlock_client_semaphore:
+	up_read(&client->semaphore);
+
+	return ret;
+}
+
 static long gxp_ioctl(struct file *file, uint cmd, ulong arg)
 {
 	struct gxp_client *client = file->private_data;
@@ -1746,6 +1899,9 @@ static long gxp_ioctl(struct file *file, uint cmd, ulong arg)
 	case GXP_GET_INTERFACE_VERSION:
 		ret = gxp_get_interface_version(client, argp);
 		break;
+	case GXP_TRIGGER_DEBUG_DUMP:
+		ret = gxp_trigger_debug_dump(client, argp);
+		break;
 	default:
 		ret = -ENOTTY; /* unknown command */
 	}
@@ -1865,7 +2021,7 @@ static int gxp_platform_probe(struct platform_device *pdev)
 		if (IS_ERR_OR_NULL(r)) {
 			dev_err(dev, "Failed to get mailbox%d resource\n", i);
 			ret = -ENODEV;
-			goto err;
+			goto err_pm_destroy;
 		}
 
 		gxp->mbx[i].paddr = r->start;
@@ -1874,7 +2030,7 @@ static int gxp_platform_probe(struct platform_device *pdev)
 		if (IS_ERR_OR_NULL(gxp->mbx[i].vaddr)) {
 			dev_err(dev, "Failed to map mailbox%d registers\n", i);
 			ret = -ENODEV;
-			goto err;
+			goto err_pm_destroy;
 		}
 	}
 
@@ -1936,20 +2092,29 @@ static int gxp_platform_probe(struct platform_device *pdev)
 		gxp_debug_dump_exit(gxp);
 	}
 
-	ret = gxp_mapping_init(gxp);
-	if (ret) {
-		dev_err(dev, "Failed to initialize mapping (ret=%d)\n", ret);
+	mutex_init(&gxp->dsp_firmware_lock);
+	mutex_init(&gxp->pin_user_pages_lock);
+
+	gxp->domain_pool = kmalloc(sizeof(*gxp->domain_pool), GFP_KERNEL);
+	if (!gxp->domain_pool) {
+		ret = -ENOMEM;
 		goto err_debug_dump_exit;
 	}
-
+	ret = gxp_domain_pool_init(gxp, gxp->domain_pool,
+				   GXP_NUM_PREALLOCATED_DOMAINS);
+	if (ret) {
+		dev_err(dev,
+			"Failed to initialize IOMMU domain pool (ret=%d)\n",
+			ret);
+		goto err_free_domain_pool;
+	}
 	ret = gxp_vd_init(gxp);
 	if (ret) {
 		dev_err(dev,
 			"Failed to initialize virtual device manager (ret=%d)\n",
 			ret);
-		goto err_debug_dump_exit;
+		goto err_domain_pool_destroy;
 	}
-
 	gxp_dma_init_default_resources(gxp);
 
 	/* Get GSA device from device tree */
@@ -1994,15 +2159,20 @@ static int gxp_platform_probe(struct platform_device *pdev)
 	mutex_init(&gxp->client_list_lock);
 
 	return 0;
-
 err_vd_destroy:
 	gxp_vd_destroy(gxp);
+err_domain_pool_destroy:
+	gxp_domain_pool_destroy(gxp->domain_pool);
+err_free_domain_pool:
+	kfree(gxp->domain_pool);
 err_debug_dump_exit:
 	gxp_debug_dump_exit(gxp);
 err_dma_exit:
 	gxp_dma_exit(gxp);
 err_put_tpu_dev:
 	put_device(gxp->tpu_dev.dev);
+err_pm_destroy:
+	gxp_pm_destroy(gxp);
 err:
 	misc_deregister(&gxp->misc_dev);
 	devm_kfree(dev, (void *)gxp);
@@ -2014,17 +2184,18 @@ static int gxp_platform_remove(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct gxp_dev *gxp = platform_get_drvdata(pdev);
 
-	gxp_debug_dump_exit(gxp);
 	gxp_remove_debugfs(gxp);
 	gxp_fw_data_destroy(gxp);
+	if (gxp->gsa_dev)
+		put_device(gxp->gsa_dev);
 	gxp_vd_destroy(gxp);
+	gxp_domain_pool_destroy(gxp->domain_pool);
+	kfree(gxp->domain_pool);
+	gxp_debug_dump_exit(gxp);
 	gxp_dma_exit(gxp);
 	put_device(gxp->tpu_dev.dev);
-	if (gxp->gsa_dev)
-		put_device(gxp->gsa_dev);
-	misc_deregister(&gxp->misc_dev);
-
 	gxp_pm_destroy(gxp);
+	misc_deregister(&gxp->misc_dev);
 
 	devm_kfree(dev, (void *)gxp);
 
diff --git a/gxp-pm.c b/gxp-pm.c
index 45590a5..8b35939 100644
--- a/gxp-pm.c
+++ b/gxp-pm.c
@@ -5,15 +5,11 @@
  * Copyright (C) 2021 Google LLC
  */
 
+#include <linux/acpm_dvfs.h>
 #include <linux/io.h>
 #include <linux/pm_runtime.h>
-#include <linux/refcount.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
-
-#ifdef CONFIG_GXP_CLOUDRIPPER
-#include <linux/acpm_dvfs.h>
-#endif
 #include <soc/google/exynos_pm_qos.h>
 
 #include "gxp-bpm.h"
@@ -58,7 +54,6 @@ static int gxp_pm_blkpwr_up(struct gxp_dev *gxp)
 {
 	int ret = 0;
 
-#if defined(CONFIG_GXP_CLOUDRIPPER) && !defined(CONFIG_GXP_TEST)
 	/*
 	 * This function is equivalent to pm_runtime_get_sync, but will prevent
 	 * the pm_runtime refcount from increasing if the call fails. It also
@@ -68,7 +63,6 @@ static int gxp_pm_blkpwr_up(struct gxp_dev *gxp)
 	if (ret)
 		dev_err(gxp->dev, "%s: pm_runtime_resume_and_get returned %d\n",
 			__func__, ret);
-#endif
 	return ret;
 }
 
@@ -76,7 +70,6 @@ static int gxp_pm_blkpwr_down(struct gxp_dev *gxp)
 {
 	int ret = 0;
 
-#if defined(CONFIG_GXP_CLOUDRIPPER) && !defined(CONFIG_GXP_TEST)
 	/*
 	 * Need to put TOP LPM into active state before blk off
 	 * b/189396709
@@ -93,7 +86,6 @@ static int gxp_pm_blkpwr_down(struct gxp_dev *gxp)
 		 */
 		dev_err(gxp->dev, "%s: pm_runtime_put_sync returned %d\n",
 			__func__, ret);
-#endif
 	/* Remove our vote for INT/MIF state (if any) */
 	exynos_pm_qos_update_request(&gxp->power_mgr->int_min, 0);
 	exynos_pm_qos_update_request(&gxp->power_mgr->mif_min, 0);
@@ -105,6 +97,12 @@ static int gxp_pm_blk_set_state_acpm(struct gxp_dev *gxp, unsigned long state, b
 	unsigned long rate;
 
 	rate = aur_power_state2rate[state];
+	if (gxp->power_mgr->thermal_limit &&
+	    gxp->power_mgr->thermal_limit < rate)
+		dev_warn(
+			gxp->dev,
+			"Requesting power state higher than current thermal limit (%lu)\n",
+			rate);
 	if (!aggressor)
 		rate |= BIT(AUR_NON_AGGRESSOR_BIT);
 	return gxp_pm_blk_set_rate_acpm(gxp, rate);
@@ -112,12 +110,9 @@ static int gxp_pm_blk_set_state_acpm(struct gxp_dev *gxp, unsigned long state, b
 
 int gxp_pm_blk_set_rate_acpm(struct gxp_dev *gxp, unsigned long rate)
 {
-	int ret = 0;
+	int ret = exynos_acpm_set_rate(AUR_DVFS_DOMAIN, rate);
 
-#if defined(CONFIG_GXP_CLOUDRIPPER)
-	ret = exynos_acpm_set_rate(AUR_DVFS_DOMAIN, rate);
 	dev_dbg(gxp->dev, "%s: rate %lu, ret %d\n", __func__, rate, ret);
-#endif
 	return ret;
 }
 
@@ -192,12 +187,9 @@ out:
 
 int gxp_pm_blk_get_state_acpm(struct gxp_dev *gxp)
 {
-	int ret = 0;
+	int ret = exynos_acpm_get_rate(AUR_DVFS_DOMAIN, AUR_DEBUG_CORE_FREQ);
 
-#if defined(CONFIG_GXP_CLOUDRIPPER)
-	ret = exynos_acpm_get_rate(AUR_DVFS_DOMAIN, AUR_DEBUG_CORE_FREQ);
 	dev_dbg(gxp->dev, "%s: state %d\n", __func__, ret);
-#endif
 	return ret;
 }
 
@@ -210,6 +202,7 @@ int gxp_pm_blk_on(struct gxp_dev *gxp)
 		return -ENODEV;
 	}
 
+	dev_info(gxp->dev, "Powering on BLK ...\n");
 	mutex_lock(&gxp->power_mgr->pm_lock);
 	ret = gxp_pm_blkpwr_up(gxp);
 	if (!ret) {
@@ -235,12 +228,8 @@ int gxp_pm_blk_off(struct gxp_dev *gxp)
 		dev_err(gxp->dev, "%s: No PM found\n", __func__);
 		return -ENODEV;
 	}
+	dev_info(gxp->dev, "Powering off BLK ...\n");
 	mutex_lock(&gxp->power_mgr->pm_lock);
-	if (refcount_read(&(gxp->power_mgr->blk_wake_ref))) {
-		dev_err(gxp->dev, "%s: Wake lock not released\n", __func__);
-		mutex_unlock(&gxp->power_mgr->pm_lock);
-		return -EBUSY;
-	}
 	/*
 	 * Shouldn't happen unless this function has been called twice without blk_on
 	 * first.
@@ -326,9 +315,6 @@ int gxp_pm_core_off(struct gxp_dev *gxp, uint core)
 	mutex_lock(&gxp->power_mgr->pm_lock);
 	gxp_lpm_down(gxp, core);
 	mutex_unlock(&gxp->power_mgr->pm_lock);
-	/*
-	 * TODO: b/199467568 If all cores are off shutdown blk
-	 */
 	dev_notice(gxp->dev, "%s: Core %d down\n", __func__, core);
 	return 0;
 }
@@ -343,6 +329,11 @@ static int gxp_pm_req_state_locked(struct gxp_dev *gxp,
 		dev_err(gxp->dev, "Invalid state %d\n", state);
 		return -EINVAL;
 	}
+	if (gxp->power_mgr->curr_state == AUR_OFF) {
+		dev_err(gxp->dev,
+			"Cannot request power state when BLK is off\n");
+		return -EBUSY;
+	}
 	if (state != gxp->power_mgr->curr_state ||
 	    aggressor_vote != gxp->power_mgr->curr_aggressor_vote) {
 		if (state != AUR_OFF) {
@@ -353,7 +344,7 @@ static int gxp_pm_req_state_locked(struct gxp_dev *gxp,
 					     .using)
 					break;
 			}
-			/* The workqueue stucks, wait for it  */
+			/* The workqueue is full, wait for it  */
 			if (i == AUR_NUM_POWER_STATE_WORKER) {
 				dev_warn(
 					gxp->dev,
@@ -477,22 +468,26 @@ static void gxp_pm_get_max_voted_power_state(struct gxp_dev *gxp,
 	}
 }
 
-int gxp_pm_update_requested_power_state(struct gxp_dev *gxp,
-					enum aur_power_state origin_state,
-					bool origin_requested_aggressor,
-					enum aur_power_state requested_state,
-					bool requested_aggressor)
+static int gxp_pm_update_requested_power_state(
+	struct gxp_dev *gxp, enum aur_power_state origin_state,
+	bool origin_requested_aggressor, enum aur_power_state requested_state,
+	bool requested_aggressor)
 {
 	int ret;
 	unsigned long max_state = AUR_OFF;
 	bool aggressor_vote = false;
 
-	mutex_lock(&gxp->power_mgr->pm_lock);
+	lockdep_assert_held(&gxp->power_mgr->pm_lock);
+	if (gxp->power_mgr->curr_state == AUR_OFF &&
+	    requested_state != AUR_OFF) {
+		dev_warn(gxp->dev,
+			 "The client vote power state %d when BLK is off\n",
+			 requested_state);
+	}
 	gxp_pm_revoke_power_state_vote(gxp, origin_state, origin_requested_aggressor);
 	gxp_pm_vote_power_state(gxp, requested_state, requested_aggressor);
 	gxp_pm_get_max_voted_power_state(gxp, &max_state, &aggressor_vote);
 	ret = gxp_pm_req_state_locked(gxp, max_state, aggressor_vote);
-	mutex_unlock(&gxp->power_mgr->pm_lock);
 	return ret;
 }
 
@@ -527,6 +522,11 @@ static int gxp_pm_req_memory_state_locked(struct gxp_dev *gxp,
 		dev_err(gxp->dev, "Invalid memory state %d\n", state);
 		return -EINVAL;
 	}
+	if (gxp->power_mgr->curr_state == AUR_OFF) {
+		dev_err(gxp->dev,
+			"Cannot request memory power state when BLK is off\n");
+		return -EBUSY;
+	}
 	if (state != gxp->power_mgr->curr_memory_state) {
 		mutex_lock(&gxp->power_mgr->req_pm_qos_work_lock);
 
@@ -534,7 +534,7 @@ static int gxp_pm_req_memory_state_locked(struct gxp_dev *gxp,
 			if (!gxp->power_mgr->req_pm_qos_work[i].using)
 				break;
 		}
-		/* The workqueue stucks, wait for it  */
+		/* The workqueue is full, wait for it  */
 		if (i == AUR_NUM_POWER_STATE_WORKER) {
 			dev_warn(
 				gxp->dev,
@@ -625,46 +625,44 @@ static unsigned long gxp_pm_get_max_voted_memory_power_state(struct gxp_dev *gxp
 	return state;
 }
 
-int gxp_pm_update_requested_memory_power_state(
+static int gxp_pm_update_requested_memory_power_state(
 	struct gxp_dev *gxp, enum aur_memory_power_state origin_state,
 	enum aur_memory_power_state requested_state)
 {
 	int ret;
 	unsigned long max_state;
 
-	mutex_lock(&gxp->power_mgr->pm_lock);
+	lockdep_assert_held(&gxp->power_mgr->pm_lock);
 	gxp_pm_revoke_memory_power_state_vote(gxp, origin_state);
 	gxp_pm_vote_memory_power_state(gxp, requested_state);
 	max_state = gxp_pm_get_max_voted_memory_power_state(gxp);
 	ret = gxp_pm_req_memory_state_locked(gxp, max_state);
-	mutex_unlock(&gxp->power_mgr->pm_lock);
 	return ret;
 }
 
-int gxp_pm_acquire_blk_wakelock(struct gxp_dev *gxp)
+int gxp_pm_update_requested_power_states(
+	struct gxp_dev *gxp, enum aur_power_state origin_state,
+	bool origin_requested_aggressor, enum aur_power_state requested_state,
+	bool requested_aggressor, enum aur_memory_power_state origin_mem_state,
+	enum aur_memory_power_state requested_mem_state)
 {
-	mutex_lock(&gxp->power_mgr->pm_lock);
-	refcount_inc(&(gxp->power_mgr->blk_wake_ref));
-	dev_dbg(gxp->dev, "Blk wakelock ref count: %d\n",
-		refcount_read(&(gxp->power_mgr->blk_wake_ref)));
-	mutex_unlock(&gxp->power_mgr->pm_lock);
-	return 0;
-}
+	int ret = 0;
 
-int gxp_pm_release_blk_wakelock(struct gxp_dev *gxp)
-{
 	mutex_lock(&gxp->power_mgr->pm_lock);
-	if (refcount_read(&(gxp->power_mgr->blk_wake_ref))) {
-		refcount_dec(&(gxp->power_mgr->blk_wake_ref));
-	} else {
-		dev_err(gxp->dev, "Blk wakelock is already zero\n");
-		WARN_ON(1);
-		mutex_unlock(&gxp->power_mgr->pm_lock);
-		return -EIO;
+	if (origin_state != requested_state ||
+	    origin_requested_aggressor != requested_aggressor) {
+		ret = gxp_pm_update_requested_power_state(
+			gxp, origin_state, origin_requested_aggressor,
+			requested_state, requested_aggressor);
+		if (ret)
+			goto out;
 	}
+	if (origin_mem_state != requested_mem_state)
+		ret = gxp_pm_update_requested_memory_power_state(
+			gxp, origin_mem_state, requested_mem_state);
+out:
 	mutex_unlock(&gxp->power_mgr->pm_lock);
-	dev_notice(gxp->dev, "Release blk wakelock\n");
-	return 0;
+	return ret;
 }
 
 int gxp_pm_init(struct gxp_dev *gxp)
@@ -680,7 +678,6 @@ int gxp_pm_init(struct gxp_dev *gxp)
 	mgr->curr_state = AUR_OFF;
 	mgr->curr_memory_state = AUR_MEM_UNDEFINED;
 	mgr->curr_aggressor_vote = true;
-	refcount_set(&(mgr->blk_wake_ref), 0);
 	mgr->ops = &gxp_aur_ops;
 	gxp->power_mgr = mgr;
 	for (i = 0; i < AUR_NUM_POWER_STATE_WORKER; i++) {
@@ -700,9 +697,7 @@ int gxp_pm_init(struct gxp_dev *gxp)
 	gxp->power_mgr->force_noc_mux_normal_count = 0;
 	gxp->power_mgr->blk_switch_count = 0l;
 
-#if defined(CONFIG_GXP_CLOUDRIPPER) && !defined(CONFIG_GXP_TEST)
 	pm_runtime_enable(gxp->dev);
-#endif
 	exynos_pm_qos_add_request(&mgr->int_min, PM_QOS_DEVICE_THROUGHPUT, 0);
 	exynos_pm_qos_add_request(&mgr->mif_min, PM_QOS_BUS_THROUGHPUT, 0);
 
@@ -714,12 +709,35 @@ int gxp_pm_destroy(struct gxp_dev *gxp)
 	struct gxp_power_manager *mgr;
 
 	mgr = gxp->power_mgr;
-	exynos_pm_qos_remove_request(&mgr->int_min);
 	exynos_pm_qos_remove_request(&mgr->mif_min);
-#if defined(CONFIG_GXP_CLOUDRIPPER) && !defined(CONFIG_GXP_TEST)
+	exynos_pm_qos_remove_request(&mgr->int_min);
 	pm_runtime_disable(gxp->dev);
-#endif
+	flush_workqueue(mgr->wq);
 	destroy_workqueue(mgr->wq);
 	mutex_destroy(&mgr->pm_lock);
 	return 0;
 }
+
+void gxp_pm_set_thermal_limit(struct gxp_dev *gxp, unsigned long thermal_limit)
+{
+	mutex_lock(&gxp->power_mgr->pm_lock);
+
+	if (thermal_limit >= aur_power_state2rate[AUR_NOM]) {
+		dev_warn(gxp->dev, "Thermal limit on DVFS removed\n");
+	} else if (thermal_limit >= aur_power_state2rate[AUR_UD]) {
+		dev_warn(gxp->dev, "Thermals limited to UD\n");
+	} else if (thermal_limit >= aur_power_state2rate[AUR_SUD]) {
+		dev_warn(gxp->dev, "Thermal limited to SUD\n");
+	} else if (thermal_limit >= aur_power_state2rate[AUR_UUD]) {
+		dev_warn(gxp->dev, "Thermal limited to UUD\n");
+	} else if (thermal_limit >= aur_power_state2rate[AUR_READY]) {
+		dev_warn(gxp->dev, "Thermal limited to READY\n");
+	} else {
+		dev_warn(gxp->dev,
+			 "Thermal limit disallows all valid DVFS states\n");
+	}
+
+	gxp->power_mgr->thermal_limit = thermal_limit;
+
+	mutex_unlock(&gxp->power_mgr->pm_lock);
+}
diff --git a/gxp-pm.h b/gxp-pm.h
index 25b4792..28a455d 100644
--- a/gxp-pm.h
+++ b/gxp-pm.h
@@ -7,10 +7,10 @@
 #ifndef __GXP_PM_H__
 #define __GXP_PM_H__
 
-#include "gxp-internal.h"
-#include <linux/refcount.h>
 #include <soc/google/exynos_pm_qos.h>
 
+#include "gxp-internal.h"
+
 #define AUR_DVFS_MIN_RATE 178000
 static const uint aur_power_state2rate[] = { 0,	     178000,  373000,
 					     750000, 1160000, 178000 };
@@ -94,7 +94,6 @@ struct gxp_power_manager {
 	bool curr_aggressor_vote;
 	int curr_state;
 	int curr_memory_state;
-	refcount_t blk_wake_ref;
 	struct gxp_pm_device_ops *ops;
 	struct gxp_set_acpm_state_work
 		set_acpm_state_work[AUR_NUM_POWER_STATE_WORKER];
@@ -108,6 +107,8 @@ struct gxp_power_manager {
 	struct exynos_pm_qos_request int_min;
 	struct exynos_pm_qos_request mif_min;
 	int force_noc_mux_normal_count;
+	/* Max frequency that the thermal driver/ACPM will allow in Hz */
+	unsigned long thermal_limit;
 	u64 blk_switch_count;
 };
 
@@ -173,34 +174,6 @@ int gxp_pm_core_on(struct gxp_dev *gxp, uint core, bool verbose);
 int gxp_pm_core_off(struct gxp_dev *gxp, uint core);
 
 /**
- * gxp_pm_acquire_blk_wakelock() - Acquire blk wakelock to make sure block won't
- * shutdown.
- *
- * Can be called multiple times and it will increase
- * reference count.
- *
- * @gxp: The GXP device to operate
- *
- * Return:
- * * 0       - Wakelock acquired
- */
-int gxp_pm_acquire_blk_wakelock(struct gxp_dev *gxp);
-
-/**
- * gxp_pm_release_blk_wakelock() - Release blk wakelock.
- *
- * Can be called multiple times and it will decrease
- * reference count till 0.
- *
- * @gxp: The GXP device to operate
- *
- * Return:
- * * 0       - Wakelock released
- * * -EIO    - No wakelock is currently held
- */
-int gxp_pm_release_blk_wakelock(struct gxp_dev *gxp);
-
-/**
  * gxp_pm_init() - API for initialize PM interface for GXP, should only be
  * called once per probe
  * @gxp: The GXP device to operate
@@ -247,8 +220,8 @@ int gxp_pm_blk_set_rate_acpm(struct gxp_dev *gxp, unsigned long rate);
 int gxp_pm_blk_get_state_acpm(struct gxp_dev *gxp);
 
 /**
- * gxp_pm_update_requested_power_state() - API for a GXP client to vote for a
- * requested state.
+ * gxp_pm_update_requested_power_states() - API for a GXP client to vote for a
+ * requested power state and a requested memory power state.
  * @gxp: The GXP device to operate.
  * @origin_state: An existing old requested state, will be cleared. If this is
  *                the first vote, pass AUR_OFF.
@@ -258,32 +231,20 @@ int gxp_pm_blk_get_state_acpm(struct gxp_dev *gxp);
  * @requested_aggressor: Specify whether the new vote is requested with aggressor
  *                       flag. Will take no effect if the @requested state is
  *                       AUR_OFF.
- *
- * Return:
- * * 0       - Voting registered
- * * -EINVAL - Invalid original state or requested state
- */
-int gxp_pm_update_requested_power_state(struct gxp_dev *gxp,
-					enum aur_power_state origin_state,
-					bool origin_requested_aggressor,
-					enum aur_power_state requested_state,
-					bool requested_aggressor);
-
-/**
- * gxp_pm_update_requested_memory_power_state() - API for a GXP client to vote for a
- * requested memory power state.
- * @gxp: The GXP device to operate.
- * @origin_state: An existing old requested state, will be cleared. If this is
+ * @origin_mem_state: An existing old requested state, will be cleared. If this is
  *                the first vote, pass AUR_MEM_UNDEFINED.
- * @requested_state: The new requested state.
+ * @requested_mem_state: The new requested state.
  *
  * Return:
  * * 0       - Voting registered
  * * -EINVAL - Invalid original state or requested state
  */
-int gxp_pm_update_requested_memory_power_state(
-	struct gxp_dev *gxp, enum aur_memory_power_state origin_state,
-	enum aur_memory_power_state requested_state);
+
+int gxp_pm_update_requested_power_states(
+	struct gxp_dev *gxp, enum aur_power_state origin_state,
+	bool origin_requested_aggressor, enum aur_power_state requested_state,
+	bool requested_aggressor, enum aur_memory_power_state origin_mem_state,
+	enum aur_memory_power_state requested_mem_state);
 
 /*
  * gxp_pm_force_cmu_noc_user_mux_normal() - Force PLL_CON0_NOC_USER MUX switch to the
@@ -299,4 +260,13 @@ void gxp_pm_force_cmu_noc_user_mux_normal(struct gxp_dev *gxp);
  */
 void gxp_pm_check_cmu_noc_user_mux(struct gxp_dev *gxp);
 
+/**
+ * gxp_pm_set_thermal_limit() - Notify the power manager of a thermal limit
+ * @gxp: The GXP device the limit is set for
+ * @thermal_limit: The highest frequency, in Hz, the thermal limit allows
+ *
+ * The power management code will only use this information for logging.
+ */
+void gxp_pm_set_thermal_limit(struct gxp_dev *gxp, unsigned long thermal_limit);
+
 #endif /* __GXP_PM_H__ */
diff --git a/gxp-telemetry.c b/gxp-telemetry.c
index 18533db..be56c84 100644
--- a/gxp-telemetry.c
+++ b/gxp-telemetry.c
@@ -340,7 +340,7 @@ int gxp_telemetry_mmap_buffers(struct gxp_dev *gxp, u8 type,
 		return -ENODEV;
 
 	/* Total size must divide evenly into 1 page-aligned buffer per core */
-	if (!total_size || !IS_ALIGNED(total_size, PAGE_SIZE * GXP_NUM_CORES))
+	if (!total_size || total_size % (PAGE_SIZE * GXP_NUM_CORES))
 		return -EINVAL;
 
 	mutex_lock(&gxp->telemetry_mgr->lock);
@@ -628,11 +628,13 @@ int gxp_telemetry_unregister_eventfd(struct gxp_dev *gxp, u8 type)
 
 	switch (type) {
 	case GXP_TELEMETRY_TYPE_LOGGING:
-		eventfd_ctx_put(gxp->telemetry_mgr->logging_efd);
+		if (gxp->telemetry_mgr->logging_efd)
+			eventfd_ctx_put(gxp->telemetry_mgr->logging_efd);
 		gxp->telemetry_mgr->logging_efd = NULL;
 		break;
 	case GXP_TELEMETRY_TYPE_TRACING:
-		eventfd_ctx_put(gxp->telemetry_mgr->tracing_efd);
+		if (gxp->telemetry_mgr->tracing_efd)
+			eventfd_ctx_put(gxp->telemetry_mgr->tracing_efd);
 		gxp->telemetry_mgr->tracing_efd = NULL;
 		break;
 	default:
diff --git a/gxp-telemetry.h b/gxp-telemetry.h
index 80436ba..92d12df 100644
--- a/gxp-telemetry.h
+++ b/gxp-telemetry.h
@@ -11,8 +11,8 @@
 #include <linux/refcount.h>
 #include <linux/types.h>
 
-#include "gxp.h"
 #include "gxp-internal.h"
+#include "gxp.h"
 
 struct gxp_telemetry_work {
 	struct work_struct work;
diff --git a/gxp-thermal.c b/gxp-thermal.c
index d6d867a..ae6049d 100644
--- a/gxp-thermal.c
+++ b/gxp-thermal.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2021 Google LLC
  */
 
+#include <linux/acpm_dvfs.h>
 #include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/gfp.h>
@@ -17,10 +18,6 @@
 #include <linux/thermal.h>
 #include <linux/version.h>
 
-#ifdef CONFIG_GXP_CLOUDRIPPER
-#include <linux/acpm_dvfs.h>
-#endif
-
 #include "gxp-internal.h"
 #include "gxp-pm.h"
 #include "gxp-thermal.h"
@@ -91,6 +88,7 @@ static int gxp_set_cur_state(struct thermal_cooling_device *cdev,
 			goto out;
 		}
 		thermal->cooling_state = cooling_state;
+		gxp_pm_set_thermal_limit(thermal->gxp, pwr_state);
 	} else {
 		ret = -EALREADY;
 	}
@@ -136,17 +134,13 @@ static int gxp_state2power_internal(unsigned long state, u32 *power,
 }
 
 static int gxp_get_requested_power(struct thermal_cooling_device *cdev,
-				       u32 *power)
+				   u32 *power)
 {
-	/* Use ACTIVE_NOM as default value */
-	unsigned long power_state = AUR_NOM;
+	unsigned long power_state;
 	struct gxp_thermal_manager *cooling = cdev->devdata;
-#ifdef CONFIG_GXP_CLOUDRIPPER
 
 	power_state = exynos_acpm_get_rate(AUR_DVFS_DOMAIN, 0);
-#endif
-	return gxp_state2power_internal(power_state, power,
-					    cooling);
+	return gxp_state2power_internal(power_state, power, cooling);
 }
 
 /* TODO(b/213272324): Move state2power table to dts */
diff --git a/gxp-vd.c b/gxp-vd.c
index a413091..021d35a 100644
--- a/gxp-vd.c
+++ b/gxp-vd.c
@@ -9,6 +9,7 @@
 #include <linux/slab.h>
 
 #include "gxp-dma.h"
+#include "gxp-domain-pool.h"
 #include "gxp-firmware.h"
 #include "gxp-firmware-data.h"
 #include "gxp-host-device-structs.h"
@@ -57,7 +58,7 @@ struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 {
 	struct gxp_virtual_device *vd;
 	int i;
-	int err = 0;
+	int err;
 
 	/* Assumes 0 < requested_cores <= GXP_NUM_CORES */
 	if (requested_cores == 0 || requested_cores > GXP_NUM_CORES)
@@ -78,9 +79,11 @@ struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 		goto error_free_vd;
 	}
 	for (i = 0; i < requested_cores; i++) {
-		vd->core_domains[i] = iommu_domain_alloc(gxp->dev->bus);
-		if (!vd->core_domains[i])
+		vd->core_domains[i] = gxp_domain_pool_alloc(gxp->domain_pool);
+		if (!vd->core_domains[i]) {
+			err = -EBUSY;
 			goto error_free_domains;
+		}
 	}
 
 	vd->mailbox_resp_queues = kcalloc(
@@ -96,16 +99,19 @@ struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 		init_waitqueue_head(&vd->mailbox_resp_queues[i].waitq);
 	}
 
+	vd->mappings_root = RB_ROOT;
+	init_rwsem(&vd->mappings_semaphore);
+
 	return vd;
 
 error_free_domains:
 	for (i -= 1; i >= 0; i--)
-		iommu_domain_free(vd->core_domains[i]);
+		gxp_domain_pool_free(gxp->domain_pool, vd->core_domains[i]);
 	kfree(vd->core_domains);
 error_free_vd:
 	kfree(vd);
 
-	return err ? ERR_PTR(err) : NULL;
+	return ERR_PTR(err);
 }
 
 void gxp_vd_release(struct gxp_virtual_device *vd)
@@ -113,6 +119,8 @@ void gxp_vd_release(struct gxp_virtual_device *vd)
 	struct gxp_async_response *cur, *nxt;
 	int i;
 	unsigned long flags;
+	struct rb_node *node;
+	struct gxp_mapping *mapping;
 
 	/* Cleanup any unconsumed responses */
 	for (i = 0; i < vd->num_cores; i++) {
@@ -130,8 +138,21 @@ void gxp_vd_release(struct gxp_virtual_device *vd)
 		spin_unlock_irqrestore(&vd->mailbox_resp_queues[i].lock, flags);
 	}
 
+	/*
+	 * Release any un-mapped mappings
+	 * Once again, it's not necessary to lock the mappings_semaphore here
+	 * but do it anyway for consistency.
+	 */
+	down_write(&vd->mappings_semaphore);
+	while ((node = rb_first(&vd->mappings_root))) {
+		mapping = rb_entry(node, struct gxp_mapping, node);
+		rb_erase(node, &vd->mappings_root);
+		gxp_mapping_put(mapping);
+	}
+	up_write(&vd->mappings_semaphore);
+
 	for (i = 0; i < vd->num_cores; i++)
-		iommu_domain_free(vd->core_domains[i]);
+		gxp_domain_pool_free(vd->gxp->domain_pool, vd->core_domains[i]);
 	kfree(vd->core_domains);
 	kfree(vd->mailbox_resp_queues);
 	kfree(vd);
@@ -294,7 +315,7 @@ void gxp_vd_stop(struct gxp_virtual_device *vd)
 		}
 	}
 
-	if (vd->fw_app) {
+	if (!IS_ERR_OR_NULL(vd->fw_app)) {
 		gxp_fw_data_destroy_app(gxp, vd->fw_app);
 		vd->fw_app = NULL;
 	}
@@ -312,6 +333,7 @@ void gxp_vd_suspend(struct gxp_virtual_device *vd)
 	uint virt_core;
 
 	lockdep_assert_held_write(&gxp->vd_semaphore);
+	dev_info(gxp->dev, "Suspending VD ...\n");
 	if (vd->state == GXP_VD_SUSPENDED) {
 		dev_err(gxp->dev,
 			"Attempt to suspend a virtual device twice\n");
@@ -398,6 +420,7 @@ int gxp_vd_resume(struct gxp_virtual_device *vd)
 	uint failed_cores = 0;
 
 	lockdep_assert_held_write(&gxp->vd_semaphore);
+	dev_info(gxp->dev, "Resuming VD ...\n");
 	if (vd->state != GXP_VD_SUSPENDED) {
 		dev_err(gxp->dev,
 			"Attempt to resume a virtual device which was not suspended\n");
@@ -419,7 +442,7 @@ int gxp_vd_resume(struct gxp_virtual_device *vd)
 			 */
 			if (vd->blk_switch_count_when_suspended != curr_blk_switch_count) {
 				ret = gxp_firmware_setup_hw_after_block_off(
-					gxp, core, false);
+					gxp, core, /*verbose=*/false);
 				if (ret) {
 					vd->state = GXP_VD_UNAVAILABLE;
 					failed_cores |= BIT(core);
@@ -435,7 +458,8 @@ int gxp_vd_resume(struct gxp_virtual_device *vd)
 			 * Power on the core by explicitly switching its PSM to
 			 * PS0 (LPM_ACTIVE_STATE).
 			 */
-			gxp_lpm_set_state(gxp, core, LPM_ACTIVE_STATE);
+			gxp_lpm_set_state(gxp, core, LPM_ACTIVE_STATE,
+					  /*verbose=*/false);
 			virt_core++;
 		}
 	}
@@ -560,3 +584,144 @@ int gxp_vd_phys_core_to_virt_core(struct gxp_virtual_device *vd,
 out:
 	return virt_core;
 }
+
+int gxp_vd_mapping_store(struct gxp_virtual_device *vd,
+			 struct gxp_mapping *map)
+{
+	struct rb_node **link;
+	struct rb_node *parent = NULL;
+	dma_addr_t device_address = map->device_address;
+	struct gxp_mapping *mapping;
+
+	link = &vd->mappings_root.rb_node;
+
+	down_write(&vd->mappings_semaphore);
+
+	/* Figure out where to put the new node */
+	while (*link) {
+		parent = *link;
+		mapping = rb_entry(parent, struct gxp_mapping, node);
+
+		if (mapping->device_address > device_address)
+			link = &(*link)->rb_left;
+		else if (mapping->device_address < device_address)
+			link = &(*link)->rb_right;
+		else
+			goto out;
+	}
+
+	/* Add new node and rebalance the tree. */
+	rb_link_node(&map->node, parent, link);
+	rb_insert_color(&map->node, &vd->mappings_root);
+
+	/* Acquire a reference to the mapping */
+	gxp_mapping_get(map);
+
+	up_write(&vd->mappings_semaphore);
+
+	return 0;
+
+out:
+	up_write(&vd->mappings_semaphore);
+	dev_err(vd->gxp->dev, "Duplicate mapping: %pad\n",
+		&map->device_address);
+	return -EEXIST;
+}
+
+void gxp_vd_mapping_remove(struct gxp_virtual_device *vd,
+			   struct gxp_mapping *map)
+{
+	down_write(&vd->mappings_semaphore);
+
+	/* Drop the mapping from this virtual device's records */
+	rb_erase(&map->node, &vd->mappings_root);
+
+	/* Release the reference obtained in gxp_vd_mapping_store() */
+	gxp_mapping_put(map);
+
+	up_write(&vd->mappings_semaphore);
+}
+
+static bool is_device_address_in_mapping(struct gxp_mapping *mapping,
+					 dma_addr_t device_address)
+{
+	return ((device_address >= mapping->device_address) &&
+		(device_address < (mapping->device_address + mapping->size)));
+}
+
+static struct gxp_mapping *
+gxp_vd_mapping_internal_search(struct gxp_virtual_device *vd,
+			       dma_addr_t device_address, bool check_range)
+{
+	struct rb_node *node;
+	struct gxp_mapping *mapping;
+
+	down_read(&vd->mappings_semaphore);
+
+	node = vd->mappings_root.rb_node;
+
+	while (node) {
+		mapping = rb_entry(node, struct gxp_mapping, node);
+		if ((mapping->device_address == device_address) ||
+		    (check_range &&
+		     is_device_address_in_mapping(mapping, device_address))) {
+			gxp_mapping_get(mapping);
+			up_read(&vd->mappings_semaphore);
+			return mapping; /* Found it */
+		} else if (mapping->device_address > device_address) {
+			node = node->rb_left;
+		} else {
+			node = node->rb_right;
+		}
+	}
+
+	up_read(&vd->mappings_semaphore);
+
+	return NULL;
+}
+
+struct gxp_mapping *gxp_vd_mapping_search(struct gxp_virtual_device *vd,
+					  dma_addr_t device_address)
+{
+	return gxp_vd_mapping_internal_search(vd, device_address, false);
+}
+
+struct gxp_mapping *
+gxp_vd_mapping_search_in_range(struct gxp_virtual_device *vd,
+			       dma_addr_t device_address)
+{
+	return gxp_vd_mapping_internal_search(vd, device_address, true);
+}
+
+struct gxp_mapping *gxp_vd_mapping_search_host(struct gxp_virtual_device *vd,
+					       u64 host_address)
+{
+	struct rb_node *node;
+	struct gxp_mapping *mapping;
+
+	/*
+	 * dma-buf mappings can not be looked-up by host address since they are
+	 * not mapped from a user-space address.
+	 */
+	if (!host_address) {
+		dev_dbg(vd->gxp->dev,
+			"Unable to get dma-buf mapping by host address\n");
+		return NULL;
+	}
+
+	down_read(&vd->mappings_semaphore);
+
+	/* Iterate through the elements in the rbtree */
+	for (node = rb_first(&vd->mappings_root); node; node = rb_next(node)) {
+		mapping = rb_entry(node, struct gxp_mapping, node);
+		if (mapping->host_address == host_address) {
+			gxp_mapping_get(mapping);
+			up_read(&vd->mappings_semaphore);
+			return mapping;
+		}
+	}
+
+	up_read(&vd->mappings_semaphore);
+
+	return NULL;
+}
diff --git a/gxp-vd.h b/gxp-vd.h
index e973638..feab79f 100644
--- a/gxp-vd.h
+++ b/gxp-vd.h
@@ -9,11 +9,14 @@
 
 #include <linux/iommu.h>
 #include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/wait.h>
 
 #include "gxp-internal.h"
+#include "gxp-mapping.h"
 
 struct mailbox_resp_queue {
 	/* Queue of `struct gxp_async_response`s */
@@ -42,6 +45,8 @@ struct gxp_virtual_device {
 	void *fw_app;
 	struct iommu_domain **core_domains;
 	struct mailbox_resp_queue *mailbox_resp_queues;
+	struct rb_root mappings_root;
+	struct rw_semaphore mappings_semaphore;
 	enum gxp_virtual_device_state state;
 	/*
 	 * Record the gxp->power_mgr->blk_switch_count when the vd was
@@ -77,6 +82,7 @@ void gxp_vd_destroy(struct gxp_dev *gxp);
  * Return: The virtual address of the virtual device or an ERR_PTR on failure
  * * -EINVAL - The number of requested cores was invalid
  * * -ENOMEM - Unable to allocate the virtual device
+ * * -EBUSY  - Not enough iommu domains available
  */
 struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp, u16 requested_cores);
 
@@ -137,6 +143,71 @@ uint gxp_vd_virt_core_list_to_phys_core_list(struct gxp_virtual_device *vd,
 int gxp_vd_phys_core_to_virt_core(struct gxp_virtual_device *vd, u16 phys_core);
 
 /**
+ * gxp_vd_mapping_store() - Store a mapping in a virtual device's records
+ * @vd: The virtual device @map was created for and will be stored in
+ * @map: The mapping to store
+ *
+ * Acquires a reference to @map if it was successfully stored
+ *
+ * Return:
+ * * 0: Success
+ * * -EINVAL: @map is already stored in @vd's records
+ */
+int gxp_vd_mapping_store(struct gxp_virtual_device *vd,
+			 struct gxp_mapping *map);
+
+/**
+ * gxp_vd_mapping_remove() - Remove a mapping from a virtual device's records
+ * @vd: The VD to remove @map from
+ * @map: The mapping to remove
+ *
+ * Releases a reference to @map if it was successfully removed
+ */
+void gxp_vd_mapping_remove(struct gxp_virtual_device *vd,
+			   struct gxp_mapping *map);
+
+/**
+ * gxp_vd_mapping_search() - Obtain a reference to the mapping starting at the
+ *                           specified device address
+ * @vd: The virtual device to search for the mapping
+ * @device_address: The starting device address of the mapping to find
+ *
+ * Obtains a reference to the returned mapping
+ *
+ * Return: A pointer to the mapping if found; NULL otherwise
+ */
+struct gxp_mapping *gxp_vd_mapping_search(struct gxp_virtual_device *vd,
+					  dma_addr_t device_address);
+
+/**
+ * gxp_vd_mapping_search_in_range() - Obtain a reference to the mapping which
+ *                                    contains the specified device address
+ * @vd: The virtual device to search for the mapping
+ * @device_address: A device address contained in the buffer the mapping to
+ *                  find describes.
+ *
+ * Obtains a reference to the returned mapping
+ *
+ * Return: A pointer to the mapping if found; NULL otherwise
+ */
+struct gxp_mapping *
+gxp_vd_mapping_search_in_range(struct gxp_virtual_device *vd,
+			       dma_addr_t device_address);
+
+/**
+ * gxp_vd_mapping_search_host() - Obtain a reference to the mapping starting at
+ *                                the specified user-space address
+ * @vd: The virtual device to search for the mapping
+ * @host_address: The starting user-space address of the mapping to find
+ *
+ * Obtains a reference to the returned mapping
+ *
+ * Return: A pointer to the mapping if found; NULL otherwise
+ */
+struct gxp_mapping *gxp_vd_mapping_search_host(struct gxp_virtual_device *vd,
+					       u64 host_address);
+
+/**
  * gxp_vd_suspend() - Suspend a running virtual device
  * @vd: The virtual device to suspend
  *
diff --git a/gxp-wakelock.h b/gxp-wakelock.h
index 50e4628..ff76325 100644
--- a/gxp-wakelock.h
+++ b/gxp-wakelock.h
@@ -7,8 +7,8 @@
 #ifndef __GXP_WAKELOCK_H__
 #define __GXP_WAKELOCK_H__
 
-#include "gxp.h"
 #include "gxp-internal.h"
+#include "gxp.h"
 
 struct gxp_wakelock_manager {
 	/* Protects count and suspended */
diff --git a/gxp.h b/gxp.h
index c320878..847c414 100644
--- a/gxp.h
+++ b/gxp.h
@@ -809,4 +809,21 @@ struct gxp_tpu_mbx_queue_ioctl {
 #define GXP_UNMAP_TPU_MBX_QUEUE \
 	_IOW(GXP_IOCTL_BASE, 14, struct gxp_tpu_mbx_queue_ioctl)
 
+/*
+ * Triggers a debug dump to be generated for cores.
+ *
+ * The cores requested to generate a debug dump are indicated by the bitmap of
+ * the argument. For example, an argument of 'b1001 represents a request to
+ * generate debug dumps for core 0 and 3.
+ *
+ * Returns 0 if all the debug dumps for the requested cores are successfully
+ * triggered. If a debug dump fails to be triggered for one or more requested
+ * cores, -EINVAL will be returned.
+ *
+ * The client must hold a VIRTUAL_DEVICE wakelock.
+ *
+ * Note: Root access is required to use this IOCTL.
+ */
+#define GXP_TRIGGER_DEBUG_DUMP _IOW(GXP_IOCTL_BASE, 27, __u32)
+
 #endif /* __GXP_H__ */
author	Aurora pro automerger <aurora-pro-automerger@google.com>	2022-05-14 14:55:22 -0700
committer	Copybara-Service <copybara-worker@google.com>	2022-05-16 12:19:42 -0700
commit	35e3403a4d6660b3db2e434d5fa93e23961222ec (patch)
tree	08ae62eb3b9613f11afaf8289651cd04f130c8fa
parent	fa5cf5721220d5b97544ea56b91bd9f2590debac (diff)
download	gs201-35e3403a4d6660b3db2e434d5fa93e23961222ec.tar.gz