diff options
author | Jörg Wagner <jorwag@google.com> | 2022-12-15 16:21:51 +0000 |
---|---|---|
committer | Jörg Wagner <jorwag@google.com> | 2022-12-15 16:28:12 +0000 |
commit | 25e383ffa36a9916065804029fbe3552c71329fe (patch) | |
tree | 1fd24ee61cf42115c75121f9de544814c76cb5a7 | |
parent | 9ff5b6f2510d94765def3cf7c1fda01e387cabab (diff) | |
download | gpu-25e383ffa36a9916065804029fbe3552c71329fe.tar.gz |
Mali Valhall Android DDK r41p0-01eac0 KMD
Provenance 7bb206ede984968bd1014b29529e94763b043202 (ipdelivery/EAC/v_r41p0)
VX504X08X-BU-00000-r41p0-01eac0 - Valhall Android DDK
VX504X08X-BU-60000-r41p0-01eac0 - Valhall Android
Document Bundle
VX504X08X-DC-11001-r41p0-01eac0 - Valhall Android
DDK Software Errata
VX504X08X-SW-99006-r41p0-01eac0 - Valhall
Android Renderscript AOSP parts
Change-Id: I95f741ffe0ec4ee4c8f2c0338778294f1a2a2836
59 files changed, 3752 insertions, 710 deletions
diff --git a/common/include/linux/version_compat_defs.h b/common/include/linux/version_compat_defs.h index d0a0998..335147c 100644 --- a/common/include/linux/version_compat_defs.h +++ b/common/include/linux/version_compat_defs.h @@ -24,10 +24,12 @@ #include <linux/version.h> -#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE typedef unsigned int __poll_t; #endif +#if KERNEL_VERSION(4, 9, 78) >= LINUX_VERSION_CODE + #ifndef EPOLLHUP #define EPOLLHUP POLLHUP #endif @@ -44,4 +46,6 @@ typedef unsigned int __poll_t; #define EPOLLRDNORM POLLRDNORM #endif +#endif + #endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h index 613eb1f..7bb91be 100644 --- a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h +++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h @@ -29,7 +29,11 @@ #include <linux/types.h> #define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4) +#if MALI_USE_CSF +#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (65) +#else /* MALI_USE_CSF */ #define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60) +#endif /* !MALI_USE_CSF */ #define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4) #define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \ (enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT))) @@ -57,6 +61,10 @@ (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE) #define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX (0x7FFFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX (0x3FFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX (0x7FFull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX (0xFFFull) #define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull) #define DUMMY_IMPLEMENTATION_L2_PRESENT (0x1ull) #define DUMMY_IMPLEMENTATION_STACK_PRESENT (0xFull) diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index 3c35d59..fc08158 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -69,7 +69,7 @@ endif # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"r40p0-01eac0"' +MALI_RELEASE_NAME ?= '"r41p0-01eac0"' # Set up defaults if not defined by build system ifeq ($(CONFIG_MALI_DEBUG), y) MALI_UNIT_TEST = 1 diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig index 8e689c1..701b68f 100644 --- a/mali_kbase/Kconfig +++ b/mali_kbase/Kconfig @@ -94,6 +94,21 @@ config MALI_MIDGARD_ENABLE_TRACE Enables tracing in kbase. Trace log available through the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled +config MALI_FW_CORE_DUMP + bool "Enable support for FW core dump" + depends on MALI_MIDGARD && MALI_CSF_SUPPORT + default y + help + Adds ability to request firmware core dump through the "fw_core_dump" + debugfs file + + Example: + * To explicitly request core dump: + echo 1 > /sys/kernel/debug/mali0/fw_core_dump + * To output current core dump (after explicitly requesting a core dump, + or kernel driver reported an internal firmware error): + cat /sys/kernel/debug/mali0/fw_core_dump + config MALI_ARBITER_SUPPORT bool "Enable arbiter support for Mali" depends on MALI_MIDGARD && !MALI_CSF_SUPPORT @@ -207,20 +222,6 @@ config MALI_GEM5_BUILD comment "Debug options" depends on MALI_MIDGARD && MALI_EXPERT -config MALI_FW_CORE_DUMP - bool "Enable support for FW core dump" - depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT - default n - help - Adds ability to request firmware core dump - - Example: - * To explicitly request core dump: - echo 1 >/sys/kernel/debug/mali0/fw_core_dump - * To output current core dump (after explicitly requesting a core dump, - or kernel driver reported an internal firmware error): - cat /sys/kernel/debug/mali0/fw_core_dump - config MALI_DEBUG bool "Enable debug build" depends on MALI_MIDGARD && MALI_EXPERT diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile index 01fad8f..e135d86 100644 --- a/mali_kbase/Makefile +++ b/mali_kbase/Makefile @@ -70,6 +70,12 @@ ifeq ($(CONFIG_MALI_MIDGARD),m) endif endif + ifeq ($(CONFIG_MALI_CSF_SUPPORT), y) + CONFIG_MALI_FW_CORE_DUMP ?= y + else + CONFIG_MALI_FW_CORE_DUMP ?= n + endif + # # Expert/Debug/Test released configurations # @@ -149,6 +155,7 @@ else CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = n CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n + CONFIG_MALI_FW_CORE_DUMP = n endif # All Mali CONFIG should be listed here @@ -189,7 +196,8 @@ CONFIGS := \ CONFIG_MALI_KUTF_IRQ_TEST \ CONFIG_MALI_KUTF_CLK_RATE_TRACE \ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \ - CONFIG_MALI_XEN + CONFIG_MALI_XEN \ + CONFIG_MALI_FW_CORE_DUMP # diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig index a7f038f..d294543 100644 --- a/mali_kbase/Mconfig +++ b/mali_kbase/Mconfig @@ -97,6 +97,21 @@ config MALI_MIDGARD_ENABLE_TRACE Enables tracing in kbase. Trace log available through the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled +config MALI_FW_CORE_DUMP + bool "Enable support for FW core dump" + depends on MALI_MIDGARD && MALI_CSF_SUPPORT + default y + help + Adds ability to request firmware core dump through the "fw_core_dump" + debugfs file + + Example: + * To explicitly request core dump: + echo 1 > /sys/kernel/debug/mali0/fw_core_dump + * To output current core dump (after explicitly requesting a core dump, + or kernel driver reported an internal firmware error): + cat /sys/kernel/debug/mali0/fw_core_dump + config MALI_ARBITER_SUPPORT bool "Enable arbiter support for Mali" depends on MALI_MIDGARD && !MALI_CSF_SUPPORT @@ -170,20 +185,6 @@ config MALI_CORESTACK If unsure, say N. -config MALI_FW_CORE_DUMP - bool "Enable support for FW core dump" - depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT - default n - help - Adds ability to request firmware core dump - - Example: - * To explicitly request core dump: - echo 1 >/sys/kernel/debug/mali0/fw_core_dump - * To output current core dump (after explicitly requesting a core dump, - or kernel driver reported an internal firmware error): - cat /sys/kernel/debug/mali0/fw_core_dump - choice prompt "Error injection level" depends on MALI_MIDGARD && MALI_EXPERT diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c index 3967929..19c5021 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -319,7 +319,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, - .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX, .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { @@ -364,7 +364,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, - .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX, .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { @@ -412,7 +412,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0xf, .gpu_features_hi = 0, - .shader_present = 0xFF, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX, .stack_present = 0xF, }, { @@ -428,7 +428,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0xf, .gpu_features_hi = 0, - .shader_present = 0xFF, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX, .stack_present = 0xF, }, }; @@ -530,17 +530,18 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; /* Currently only primary counter blocks are supported */ - if (WARN_ON(event_index >= 64)) + if (WARN_ON(event_index >= + (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE))) return 0; /* The actual events start index 4 onwards. Spec also says PRFCNT_EN, * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for * IPA counters. If selected, the value returned for them will be zero. */ - if (WARN_ON(event_index <= 3)) + if (WARN_ON(event_index < KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS)) return 0; - event_index -= 4; + event_index -= KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; spin_lock_irqsave(&performance_counters.access_lock, flags); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index 2345db5..f864661 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -1142,13 +1142,22 @@ static bool can_power_down_l2(struct kbase_device *kbdev) #if MALI_USE_CSF /* Due to the HW issue GPU2019-3878, need to prevent L2 power off * whilst MMU command is in progress. + * Also defer the power-down if MMU is in process of page migration. */ - return !kbdev->mmu_hw_operation_in_progress; + return !kbdev->mmu_hw_operation_in_progress && !kbdev->mmu_page_migrate_in_progress; #else - return true; + return !kbdev->mmu_page_migrate_in_progress; #endif } +static bool can_power_up_l2(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Avoiding l2 transition if MMU is undergoing page migration */ + return !kbdev->mmu_page_migrate_in_progress; +} + static bool need_tiler_control(struct kbase_device *kbdev) { #if MALI_USE_CSF @@ -1220,7 +1229,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) switch (backend->l2_state) { case KBASE_L2_OFF: - if (kbase_pm_is_l2_desired(kbdev)) { + if (kbase_pm_is_l2_desired(kbdev) && can_power_up_l2(kbdev)) { #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) /* Enable HW timer of IPA control before * L2 cache is powered-up. diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index cd5a6a3..cdc51d5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -995,4 +995,27 @@ static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbd } #endif +/** + * kbase_pm_l2_allow_mmu_page_migration - L2 state allows MMU page migration or not + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Check whether the L2 state is in power transition phase or not. If it is, the MMU + * page migration should be deferred. The caller must hold hwaccess_lock, and, if MMU + * page migration is intended, immediately start the MMU migration action without + * dropping the lock. When page migration begins, a flag is set in kbdev that would + * prevent the L2 state machine traversing into power transition phases, until + * the MMU migration action ends. + * + * Return: true if MMU page migration is allowed + */ +static inline bool kbase_pm_l2_allow_mmu_page_migration(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + return (backend->l2_state != KBASE_L2_PEND_ON && backend->l2_state != KBASE_L2_PEND_OFF); +} + #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c index b6abfc4..792f724 100644 --- a/mali_kbase/context/mali_kbase_context.c +++ b/mali_kbase/context/mali_kbase_context.c @@ -129,10 +129,6 @@ int kbase_context_common_init(struct kbase_context *kctx) /* creating a context is considered a disjoint event */ kbase_disjoint_event(kctx->kbdev); - kctx->as_nr = KBASEP_AS_NR_INVALID; - - atomic_set(&kctx->refcount, 0); - spin_lock_init(&kctx->mm_update_lock); kctx->process_mm = NULL; atomic_set(&kctx->nonmapped_pages, 0); @@ -251,15 +247,8 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx) void kbase_context_common_term(struct kbase_context *kctx) { - unsigned long flags; int pages; - mutex_lock(&kctx->kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); - kbase_ctx_sched_remove_ctx(kctx); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); - mutex_unlock(&kctx->kbdev->mmu_hw_mutex); - pages = atomic_read(&kctx->used_pages); if (pages != 0) dev_warn(kctx->kbdev->dev, diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild index 56c69a1..2b02279 100644 --- a/mali_kbase/csf/Kbuild +++ b/mali_kbase/csf/Kbuild @@ -36,6 +36,7 @@ mali_kbase-y += \ csf/mali_kbase_csf_cpu_queue_debugfs.o \ csf/mali_kbase_csf_event.o \ csf/mali_kbase_csf_firmware_log.o \ + csf/mali_kbase_csf_firmware_core_dump.o \ csf/mali_kbase_csf_tiler_heap_reclaim.o mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o @@ -44,7 +45,6 @@ mali_kbase-$(CONFIG_MALI_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o mali_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o - ifeq ($(KBUILD_EXTMOD),) # in-tree -include $(src)/csf/ipa_control/Kbuild diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c index f48344e..b17c010 100644 --- a/mali_kbase/csf/mali_kbase_csf.c +++ b/mali_kbase/csf/mali_kbase_csf.c @@ -40,6 +40,8 @@ #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) #define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1) +#define PROTM_ALLOC_MAX_RETRIES ((u8)5) + const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = { KBASE_QUEUE_GROUP_PRIORITY_HIGH, KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, @@ -136,7 +138,7 @@ static void gpu_munmap_user_io_pages(struct kbase_context *kctx, struct kbase_va size_t num_pages = 2; kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn, phys, - num_pages, MCU_AS_NR); + num_pages, MCU_AS_NR, true); WARN_ON(reg->flags & KBASE_REG_FREE); @@ -194,25 +196,25 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev, return ret; /* Map input page */ - ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, - &phys[0], 1, mem_flags, MCU_AS_NR, - KBASE_MEM_GROUP_CSF_IO, mmu_sync_info); + ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, &phys[0], 1, + mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO, mmu_sync_info, + NULL, false); if (ret) goto bad_insert; /* Map output page, it needs rw access */ mem_flags |= KBASE_REG_GPU_WR; - ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, - reg->start_pfn + 1, &phys[1], 1, mem_flags, - MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO, - mmu_sync_info); + ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn + 1, &phys[1], 1, + mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO, mmu_sync_info, + NULL, false); if (ret) goto bad_insert_output_page; return 0; bad_insert_output_page: - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR, + true); bad_insert: mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(kbdev, reg); @@ -307,8 +309,7 @@ static void release_queue(struct kbase_queue *queue); * If an explicit or implicit unbind was missed by the userspace then the * mapping will persist. On process exit kernel itself will remove the mapping. */ -static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, - struct kbase_queue *queue) +void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) { const size_t num_pages = 2; @@ -327,6 +328,7 @@ static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, */ release_queue(queue); } +KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages); int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) @@ -345,7 +347,6 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], num_pages, queue->phys, false); - if (ret != num_pages) goto phys_alloc_failed; @@ -396,6 +397,7 @@ phys_alloc_failed: return -ENOMEM; } +KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages); static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, u8 group_handle) @@ -413,6 +415,12 @@ static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, return NULL; } +struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle) +{ + return find_queue_group(kctx, group_handle); +} +KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group); + int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, u8 group_handle) { @@ -463,6 +471,17 @@ static void release_queue(struct kbase_queue *queue) "Remove any pending command queue fatal from ctx %d_%d", queue->kctx->tgid, queue->kctx->id); kbase_csf_event_remove_error(queue->kctx, &queue->error); + + /* After this the Userspace would be able to free the + * memory for GPU queue. In case the Userspace missed + * terminating the queue, the cleanup will happen on + * context termination where tear down of region tracker + * would free up the GPU queue memory. + */ + kbase_gpu_vm_lock(queue->kctx); + kbase_va_region_no_user_free_put(queue->kctx, queue->queue_reg); + kbase_gpu_vm_unlock(queue->kctx); + kfree(queue); } } @@ -516,7 +535,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx, region = kbase_region_tracker_find_region_enclosing_address(kctx, queue_addr); - if (kbase_is_region_invalid_or_free(region)) { + if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) || + region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { ret = -ENOENT; goto out_unlock_vm; } @@ -565,7 +585,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->kctx = kctx; queue->base_addr = queue_addr; - queue->queue_reg = region; + queue->queue_reg = kbase_va_region_no_user_free_get(kctx, region); queue->size = (queue_size << PAGE_SHIFT); queue->csi_index = KBASEP_IF_NR_INVALID; queue->enabled = false; @@ -603,7 +623,6 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->extract_ofs = 0; - region->flags |= KBASE_REG_NO_USER_FREE; region->user_data = queue; /* Initialize the cs_trace configuration parameters, When buffer_size @@ -697,16 +716,8 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, unbind_queue(kctx, queue); kbase_gpu_vm_lock(kctx); - if (!WARN_ON(!queue->queue_reg)) { - /* After this the Userspace would be able to free the - * memory for GPU queue. In case the Userspace missed - * terminating the queue, the cleanup will happen on - * context termination where tear down of region tracker - * would free up the GPU queue memory. - */ - queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE; + if (!WARN_ON(!queue->queue_reg)) queue->queue_reg->user_data = NULL; - } kbase_gpu_vm_unlock(kctx); release_queue(queue); @@ -870,6 +881,15 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, if (WARN_ON(slot_bitmap > allowed_bitmap)) return; + /* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and + * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request + * or 2 CSI requests overlap and FW ends up missing the 2nd request. + * Memory barrier is required, both on Host and FW side, to guarantee the ordering. + * + * 'osh' is used as CPU and GPU would be in the same Outer shareable domain. + */ + dmb(osh); + value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK); value ^= slot_bitmap; kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value, @@ -1168,10 +1188,9 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx, goto add_va_region_failed; /* Update MMU table */ - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, - reg->start_pfn, &s_buf->phy[0], nr_pages, - mem_flags, MCU_AS_NR, - KBASE_MEM_GROUP_CSF_FW, mmu_sync_info); + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn, + &s_buf->phy[0], nr_pages, mem_flags, MCU_AS_NR, + KBASE_MEM_GROUP_CSF_FW, mmu_sync_info, NULL, false); if (err) goto mmu_insert_failed; @@ -1198,83 +1217,47 @@ phy_alloc_failed: } /** - * create_protected_suspend_buffer() - Create protected-mode suspend buffer - * per queue group + * init_protected_suspend_buffer() - Reserve the VA range for the protected-mode + * suspend buffer of a queue group. + * Allocation of physical pages will happen when + * queue group enters protected mode. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @s_buf: Pointer to suspend buffer that is attached to queue group * - * Return: 0 if suspend buffer is successfully allocated and reflected to GPU - * MMU page table. Otherwise -ENOMEM. + * Return: 0 if suspend buffer init is successful, Otherwise Negative error value. */ -static int create_protected_suspend_buffer(struct kbase_device *const kbdev, - struct kbase_protected_suspend_buffer *s_buf) +static int init_protected_suspend_buffer(struct kbase_device *const kbdev, + struct kbase_protected_suspend_buffer *s_buf) { struct kbase_va_region *reg = NULL; - struct tagged_addr *phys = NULL; - const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); int err = 0; - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + s_buf->reg = NULL; + s_buf->pma = NULL; + s_buf->alloc_retries = 0; /* Allocate and initialize Region Object */ reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, nr_pages, KBASE_REG_ZONE_MCU_SHARED); - if (!reg) + if (unlikely(!reg)) return -ENOMEM; - phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); - if (!phys) { - err = -ENOMEM; - goto phy_alloc_failed; - } - - s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, - nr_pages, true); - if (s_buf->pma == NULL) { - err = -ENOMEM; - goto pma_alloc_failed; - } - /* Insert Region Object into rbtree and make virtual address available - * to map it to physical page + * to map it to physical page. */ mutex_lock(&kbdev->csf.reg_lock); err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1); reg->flags &= ~KBASE_REG_FREE; mutex_unlock(&kbdev->csf.reg_lock); - if (err) - goto add_va_region_failed; - - /* Update MMU table */ - err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, - phys, nr_pages, mem_flags, MCU_AS_NR, - KBASE_MEM_GROUP_CSF_FW, mmu_sync_info); - if (err) - goto mmu_insert_failed; - - s_buf->reg = reg; - kfree(phys); - return 0; - -mmu_insert_failed: - mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(kbdev, reg); - mutex_unlock(&kbdev->csf.reg_lock); - -add_va_region_failed: - kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true); -pma_alloc_failed: - kfree(phys); -phy_alloc_failed: - kfree(reg); + if (unlikely(err)) + kbase_free_alloced_region(reg); + else + s_buf->reg = reg; return err; } @@ -1305,12 +1288,10 @@ static int create_suspend_buffers(struct kbase_context *const kctx, } if (kctx->kbdev->csf.pma_dev) { - err = create_protected_suspend_buffer(kctx->kbdev, - &group->protected_suspend_buf); + err = init_protected_suspend_buffer(kctx->kbdev, &group->protected_suspend_buf); if (err) { term_normal_suspend_buffer(kctx, &group->normal_suspend_buf); - dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n"); } } else { group->protected_suspend_buf.reg = NULL; @@ -1521,7 +1502,8 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx, lockdep_assert_held(&kctx->csf.lock); WARN_ON(kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, - s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR)); + s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR, + true)); WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); @@ -1540,38 +1522,41 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx, } /** - * term_protected_suspend_buffer() - Free normal-mode suspend buffer of + * term_protected_suspend_buffer() - Free protected-mode suspend buffer of * queue group * * @kbdev: Instance of a GPU platform device that implements a CSF interface. - * @s_buf: Pointer to queue group suspend buffer to be freed + * @sbuf: Pointer to queue group suspend buffer to be freed */ static void term_protected_suspend_buffer(struct kbase_device *const kbdev, - struct kbase_protected_suspend_buffer *s_buf) + struct kbase_protected_suspend_buffer *sbuf) { - const size_t nr_pages = - PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); - struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL); - size_t i = 0; + if (sbuf->pma) { + const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + size_t i = 0; + struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL); - for (i = 0; phys && i < nr_pages; i++) - phys[i] = as_tagged(s_buf->pma[i]->pa); + for (i = 0; phys && i < nr_pages; i++) + phys[i] = as_tagged(sbuf->pma[i]->pa); - WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, s_buf->reg->start_pfn, phys, - nr_pages, MCU_AS_NR)); + WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, sbuf->reg->start_pfn, + phys, nr_pages, MCU_AS_NR, true)); - kfree(phys); + kfree(phys); + kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true); + sbuf->pma = NULL; + } - WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); + if (sbuf->reg) { + WARN_ON(sbuf->reg->flags & KBASE_REG_FREE); - mutex_lock(&kbdev->csf.reg_lock); - kbase_remove_va_region(kbdev, s_buf->reg); - mutex_unlock(&kbdev->csf.reg_lock); + mutex_lock(&kbdev->csf.reg_lock); + kbase_remove_va_region(kbdev, sbuf->reg); + mutex_unlock(&kbdev->csf.reg_lock); - kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true); - s_buf->pma = NULL; - kfree(s_buf->reg); - s_buf->reg = NULL; + kbase_free_alloced_region(sbuf->reg); + sbuf->reg = NULL; + } } void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) @@ -1738,6 +1723,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx, kfree(group); } +KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate); int kbase_csf_queue_group_suspend(struct kbase_context *kctx, struct kbase_suspend_copy_buffer *sus_buf, @@ -2017,12 +2003,10 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) * registered. */ #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - if (atomic_read(&queue->refcount) != 1) + WARN_ON(atomic_read(&queue->refcount) != 1); #else - if (refcount_read(&queue->refcount) != 1) + WARN_ON(refcount_read(&queue->refcount) != 1); #endif - dev_warn(kctx->kbdev->dev, - "Releasing queue with incorrect refcounting!\n"); list_del_init(&queue->link); release_queue(queue); } @@ -2369,6 +2353,85 @@ static void handle_progress_timer_event(struct kbase_queue_group *const group) } /** + * alloc_grp_protected_suspend_buffer_pages() - Allocate physical pages from the protected + * memory for the protected mode suspend buffer. + * @group: Pointer to the GPU queue group. + * + * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise + * negative error value. + */ +static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group) +{ + struct kbase_device *const kbdev = group->kctx->kbdev; + struct kbase_context *kctx = group->kctx; + struct tagged_addr *phys = NULL; + const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; + struct protected_memory_allocation **pma = NULL; + struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; + size_t nr_pages; + int err = 0; + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + if (likely(sbuf->pma)) + return 0; + + nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); + if (unlikely(!phys)) { + err = -ENOMEM; + goto phys_free; + } + + pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true); + if (pma == NULL) { + err = -ENOMEM; + goto phys_free; + } + + mutex_lock(&kctx->csf.lock); + + if (unlikely(!sbuf->reg)) { + dev_err(kbdev->dev, + "No VA region for the group %d of context %d_%d trying to enter protected mode", + group->handle, group->kctx->tgid, group->kctx->id); + err = -EINVAL; + kbase_csf_protected_memory_free(kbdev, pma, nr_pages, true); + goto unlock; + } + + /* Update MMU table */ + err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, sbuf->reg->start_pfn, phys, + nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, + mmu_sync_info, NULL, true); + if (unlikely(err)) + kbase_csf_protected_memory_free(kbdev, pma, nr_pages, true); + else + sbuf->pma = pma; + +unlock: + mutex_unlock(&kctx->csf.lock); +phys_free: + kfree(phys); + return err; +} + +static void report_group_fatal_error(struct kbase_queue_group *const group) +{ + struct base_gpu_queue_group_error const + err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + .payload = { .fatal_group = { + .status = GPU_EXCEPTION_TYPE_SW_FAULT_0, + } } }; + + kbase_csf_add_group_fatal_error(group, &err_payload); + kbase_event_wakeup(group->kctx); +} + +/** * protm_event_worker - Protected mode switch request event handler * called from a workqueue. * @@ -2380,10 +2443,26 @@ static void protm_event_worker(struct work_struct *data) { struct kbase_queue_group *const group = container_of(data, struct kbase_queue_group, protm_event_work); + struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; + int err = 0; KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u); - kbase_csf_scheduler_group_protm_enter(group); + + err = alloc_grp_protected_suspend_buffer_pages(group); + if (!err) { + kbase_csf_scheduler_group_protm_enter(group); + } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { + sbuf->alloc_retries++; + /* try again to allocate pages */ + queue_work(group->kctx->csf.wq, &group->protm_event_work); + } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { + dev_err(group->kctx->kbdev->dev, + "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", + group->handle, group->kctx->tgid, group->kctx->id); + report_group_fatal_error(group); + } + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u); } @@ -2750,6 +2829,9 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, track->protm_grp = group; } + if (!group->protected_suspend_buf.pma) + queue_work(group->kctx->csf.wq, &group->protm_event_work); + if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) { clear_bit(group->csg_nr, scheduler->csg_slots_idle_mask); @@ -2791,8 +2873,6 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) return; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); - ginfo = &kbdev->csf.global_iface.groups[csg_nr]; req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); @@ -2801,7 +2881,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c /* There may not be any pending CSG/CS interrupts to process */ if ((req == ack) && (irqreq == irqack)) - goto out; + return; /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before * examining the CS_ACK & CS_REQ bits. This would ensure that Host @@ -2822,10 +2902,12 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c * slot scheduler spinlock is required. */ if (!group) - goto out; + return; if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr)) - goto out; + return; + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { kbase_csf_firmware_csg_input_mask(ginfo, @@ -2887,8 +2969,6 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c process_cs_interrupts(group, ginfo, irqreq, irqack, track); -out: - /* group may still be NULL here */ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group, ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32)); } diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h index b267740..fc3342e 100644 --- a/mali_kbase/csf/mali_kbase_csf.h +++ b/mali_kbase/csf/mali_kbase_csf.h @@ -45,7 +45,7 @@ */ #define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX) -#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */ +#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (10000) /* Default 10 milliseconds */ /* Idle hysteresis time can be scaled down when GPU sleep feature is used */ #define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5) @@ -124,6 +124,25 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_terminate *term); /** + * kbase_csf_free_command_stream_user_pages() - Free the resources allocated + * for a queue at the time of bind. + * + * @kctx: Address of the kbase context within which the queue was created. + * @queue: Pointer to the queue to be unlinked. + * + * This function will free the pair of physical pages allocated for a GPU + * command queue, and also release the hardware doorbell page, that were mapped + * into the process address space to enable direct submission of commands to + * the hardware. Also releases the reference taken on the queue when the mapping + * was created. + * + * If an explicit or implicit unbind was missed by the userspace then the + * mapping will persist. On process exit kernel itself will remove the mapping. + */ +void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, + struct kbase_queue *queue); + +/** * kbase_csf_alloc_command_stream_user_pages - Allocate resources for a * GPU command queue. * @@ -186,6 +205,20 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick); /** + * kbase_csf_queue_group_handle_is_valid - Find the queue group corresponding + * to the indicated handle. + * + * @kctx: The kbase context under which the queue group exists. + * @group_handle: Handle for the group which uniquely identifies it within + * the context with which it was created. + * + * This function is used to find the queue group when passed a handle. + * + * Return: Pointer to a queue group on success, NULL on failure + */ +struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle); + +/** * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle * is valid. * @@ -464,4 +497,5 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev) return 0; #endif } + #endif /* _KBASE_CSF_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h index e27c568..b7ceebc 100644 --- a/mali_kbase/csf/mali_kbase_csf_defs.h +++ b/mali_kbase/csf/mali_kbase_csf_defs.h @@ -437,10 +437,13 @@ struct kbase_normal_suspend_buffer { * @pma: Array of pointer to protected mode allocations containing * information about memory pages allocated for protected mode * suspend buffer. + * @alloc_retries: Number of times we retried allocing physical pages + * for protected suspend buffers. */ struct kbase_protected_suspend_buffer { struct kbase_va_region *reg; struct protected_memory_allocation **pma; + u8 alloc_retries; }; /** @@ -1328,6 +1331,24 @@ struct kbase_csf_firmware_log { u32 func_call_list_va_end; }; +/** + * struct kbase_csf_firmware_core_dump - Object containing members for handling + * firmware core dump. + * + * @mcu_regs_addr: GPU virtual address of the start of the MCU registers buffer + * in Firmware. + * @version: Version of the FW image header core dump data format. Bits + * 7:0 specify version minor and 15:8 specify version major. + * @available: Flag to identify if the FW core dump buffer is available. + * True if entry is available in the FW image header and version + * is supported, False otherwise. + */ +struct kbase_csf_firmware_core_dump { + u32 mcu_regs_addr; + u16 version; + bool available; +}; + #if IS_ENABLED(CONFIG_DEBUG_FS) /** * struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon @@ -1458,9 +1479,9 @@ struct kbase_csf_dump_on_fault { * the glb_pwoff register. This is separated from * the @p mcu_core_pwroff_dur_count as an update * to the latter is asynchronous. - * @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time - * window in unit of ms. The firmware does not use it - * directly. + * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time + * window in unit of microseconds. The firmware does not + * use it directly. * @gpu_idle_dur_count: The counterpart of the hysteresis time window in * interface required format, ready to be used * directly in the firmware. @@ -1470,6 +1491,8 @@ struct kbase_csf_dump_on_fault { * HW counters. * @fw: Copy of the loaded MCU firmware image. * @fw_log: Contain members required for handling firmware log. + * @fw_core_dump: Contain members required for handling the firmware + * core dump. * @dof: Structure for dump on fault. */ struct kbase_csf_device { @@ -1507,12 +1530,13 @@ struct kbase_csf_device { u32 mcu_core_pwroff_dur_us; u32 mcu_core_pwroff_dur_count; u32 mcu_core_pwroff_reg_shadow; - u32 gpu_idle_hysteresis_ms; + u32 gpu_idle_hysteresis_us; u32 gpu_idle_dur_count; unsigned int fw_timeout_ms; struct kbase_csf_hwcnt hwcnt; struct kbase_csf_mcu_fw fw; struct kbase_csf_firmware_log fw_log; + struct kbase_csf_firmware_core_dump fw_core_dump; #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_csf_dump_on_fault dof; #endif /* CONFIG_DEBUG_FS */ diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c index fc4121e..1e409ac 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware.c @@ -22,6 +22,7 @@ #include "mali_kbase.h" #include "mali_kbase_csf_firmware_cfg.h" #include "mali_kbase_csf_firmware_log.h" +#include "mali_kbase_csf_firmware_core_dump.h" #include "mali_kbase_csf_trace_buffer.h" #include "mali_kbase_csf_timeout.h" #include "mali_kbase_mem.h" @@ -81,7 +82,7 @@ MODULE_PARM_DESC(fw_debug, #define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) #define FIRMWARE_HEADER_VERSION_MAJOR (0ul) -#define FIRMWARE_HEADER_VERSION_MINOR (2ul) +#define FIRMWARE_HEADER_VERSION_MINOR (3ul) #define FIRMWARE_HEADER_LENGTH (0x14ul) #define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \ @@ -93,12 +94,13 @@ MODULE_PARM_DESC(fw_debug, CSF_FIRMWARE_ENTRY_ZERO | \ CSF_FIRMWARE_ENTRY_CACHE_MODE) -#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) -#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) -#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) -#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) +#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) +#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) +#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) +#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) #define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6) -#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) +#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) +#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9) #define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) #define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) @@ -120,7 +122,6 @@ MODULE_PARM_DESC(fw_debug, (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) - static inline u32 input_page_read(const u32 *const input, const u32 offset) { WARN_ON(offset % sizeof(u32)); @@ -488,6 +489,7 @@ out: * @kbdev: Kbase device structure * @virtual_start: Start of the virtual address range required for an entry allocation * @virtual_end: End of the virtual address range required for an entry allocation + * @flags: Firmware entry flags for comparison with the reusable pages found * @phys: Pointer to the array of physical (tagged) addresses making up the new * FW interface entry. It is an output parameter which would be made to * point to an already existing array allocated for the previously parsed @@ -508,10 +510,12 @@ out: * * Return: true if a large page can be reused, false otherwise. */ -static inline bool entry_find_large_page_to_reuse( - struct kbase_device *kbdev, const u32 virtual_start, const u32 virtual_end, - struct tagged_addr **phys, struct protected_memory_allocation ***pma, - u32 num_pages, u32 *num_pages_aligned, bool *is_small_page) +static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev, + const u32 virtual_start, const u32 virtual_end, + const u32 flags, struct tagged_addr **phys, + struct protected_memory_allocation ***pma, + u32 num_pages, u32 *num_pages_aligned, + bool *is_small_page) { struct kbase_csf_firmware_interface *interface = NULL; struct kbase_csf_firmware_interface *target_interface = NULL; @@ -557,7 +561,7 @@ static inline bool entry_find_large_page_to_reuse( if (interface->virtual & (SZ_2M - 1)) continue; - if (virtual_diff < virtual_diff_min) { + if ((virtual_diff < virtual_diff_min) && (interface->flags == flags)) { target_interface = interface; virtual_diff_min = virtual_diff; } @@ -620,6 +624,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, struct protected_memory_allocation **pma = NULL; bool reuse_pages = false; bool is_small_page = true; + bool ignore_page_migration = true; if (data_end < data_start) { dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n", @@ -662,9 +667,9 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, num_pages = (virtual_end - virtual_start) >> PAGE_SHIFT; - reuse_pages = entry_find_large_page_to_reuse( - kbdev, virtual_start, virtual_end, &phys, &pma, - num_pages, &num_pages_aligned, &is_small_page); + reuse_pages = + entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys, + &pma, num_pages, &num_pages_aligned, &is_small_page); if (!reuse_pages) phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL); @@ -685,6 +690,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), num_pages_aligned, phys, false); + ignore_page_migration = false; } } @@ -794,7 +800,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, virtual_start >> PAGE_SHIFT, phys, num_pages_aligned, mem_flags, - KBASE_MEM_GROUP_CSF_FW, NULL); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, + ignore_page_migration); if (ret != 0) { dev_err(kbdev->dev, "Failed to insert firmware pages\n"); @@ -1023,20 +1030,26 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs return parse_build_info_metadata_entry(kbdev, fw, entry, size); case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST: /* Function call list section */ - if (size < 2 * sizeof(*entry)) { + if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) { dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n", size); return -EINVAL; } kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry); - break; - } - - if (!optional) { - dev_err(kbdev->dev, - "Unsupported non-optional entry type %u in firmware\n", - type); - return -EINVAL; + return 0; + case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP: + /* Core Dump section */ + if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) { + dev_err(kbdev->dev, "FW Core dump entry too short (size=%u)\n", size); + return -EINVAL; + } + return kbase_csf_firmware_core_dump_entry_parse(kbdev, entry); + default: + if (!optional) { + dev_err(kbdev->dev, "Unsupported non-optional entry type %u in firmware\n", + type); + return -EINVAL; + } } return 0; @@ -1687,6 +1700,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbdev->csf.gpu_idle_dur_count); } +static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + bool complete = false; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) + complete = true; + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return complete; +} + +static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) +{ + u32 glb_debug_req; + + kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + + glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_debug_req ^= req_mask; + + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); +} + +static void request_fw_core_dump( + const struct kbase_csf_global_iface *const global_iface) +{ + uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); + + set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); +} + +int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + int ret; + + /* Serialize CORE_DUMP requests. */ + mutex_lock(&kbdev->csf.reg_lock); + + /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + request_fw_core_dump(global_iface); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ + ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + if (!ret) + WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); + + mutex_unlock(&kbdev->csf.reg_lock); + + return ret; +} /** * kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core @@ -1714,7 +1792,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | - GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; + GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -1890,12 +1968,12 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); } -static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) +static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us) { #define HYSTERESIS_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); - u64 dur_val = dur_ms; + u64 dur_val = dur_us; u32 cnt_val_u32, reg_val_u32; bool src_system_timestamp = freq > 0; @@ -1913,9 +1991,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); } - /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ + /* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; - dur_val = div_u64(dur_val, 1000); + dur_val = div_u64(dur_val, 1000000); /* Interface limits the value field to S32_MAX */ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; @@ -1938,7 +2016,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 dur; kbase_csf_scheduler_spin_lock(kbdev, &flags); - dur = kbdev->csf.gpu_idle_hysteresis_ms; + dur = kbdev->csf.gpu_idle_hysteresis_us; kbase_csf_scheduler_spin_unlock(kbdev, flags); return dur; @@ -1955,7 +2033,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, mutex_lock(&kbdev->fw_load_lock); if (unlikely(!kbdev->csf.firmware_inited)) { kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_scheduler_spin_unlock(kbdev, flags); mutex_unlock(&kbdev->fw_load_lock); @@ -1986,7 +2064,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_firmware_enable_gpu_idle_timer(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -2166,14 +2244,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev) int kbase_csf_firmware_late_init(struct kbase_device *kbdev) { - kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; + kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; + kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; #endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); kbdev->csf.gpu_idle_dur_count = - convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms); + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); return 0; } @@ -2353,6 +2431,10 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) goto err_out; } +#ifdef CONFIG_MALI_FW_CORE_DUMP + kbase_csf_firmware_core_dump_init(kbdev); +#endif + /* Firmware loaded successfully, ret = 0 */ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL, (((u64)version_hash) << 32) | @@ -2848,7 +2930,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, &phys[0], num_pages, gpu_map_properties, - KBASE_MEM_GROUP_CSF_FW, NULL); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); if (ret) goto mmu_insert_pages_error; @@ -2909,4 +2991,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term( vunmap(csf_mapping->cpu_addr); kfree(csf_mapping->phys); } - diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h index bf4bb6f..cc20f9a 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.h +++ b/mali_kbase/csf/mali_kbase_csf_firmware.h @@ -246,7 +246,6 @@ void kbase_csf_firmware_csg_input_mask( u32 kbase_csf_firmware_csg_output( const struct kbase_csf_cmd_stream_group_info *info, u32 offset); - /** * struct kbase_csf_global_iface - Global CSF interface * provided by the firmware. @@ -858,5 +857,16 @@ static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch) */ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev); +/** + * kbase_csf_firmware_req_core_dump - Request a firmware core dump + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Request a firmware core dump and wait for for firmware to acknowledge. + * Firmware will enter infinite loop after the firmware core dump is created. + * + * Return: 0 if success, or negative error code on failure. + */ +int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev); #endif diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c new file mode 100644 index 0000000..f0a10d1 --- /dev/null +++ b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c @@ -0,0 +1,807 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/list.h> +#include <linux/file.h> +#include <linux/elf.h> +#include <linux/elfcore.h> + +#include "mali_kbase.h" +#include "mali_kbase_csf_firmware_core_dump.h" +#include "backend/gpu/mali_kbase_pm_internal.h" + +/* Page size in bytes in use by MCU. */ +#define FW_PAGE_SIZE 4096 + +/* + * FW image header core dump data format supported. + * Currently only version 0.1 is supported. + */ +#define FW_CORE_DUMP_DATA_VERSION_MAJOR 0 +#define FW_CORE_DUMP_DATA_VERSION_MINOR 1 + +/* Full version of the image header core dump data format */ +#define FW_CORE_DUMP_DATA_VERSION \ + ((FW_CORE_DUMP_DATA_VERSION_MAJOR << 8) | FW_CORE_DUMP_DATA_VERSION_MINOR) + +/* Validity flag to indicate if the MCU registers in the buffer are valid */ +#define FW_MCU_STATUS_MASK 0x1 +#define FW_MCU_STATUS_VALID (1 << 0) + +/* Core dump entry fields */ +#define FW_CORE_DUMP_VERSION_INDEX 0 +#define FW_CORE_DUMP_START_ADDR_INDEX 1 + +/* MCU registers stored by a firmware core dump */ +struct fw_core_dump_mcu { + u32 r0; + u32 r1; + u32 r2; + u32 r3; + u32 r4; + u32 r5; + u32 r6; + u32 r7; + u32 r8; + u32 r9; + u32 r10; + u32 r11; + u32 r12; + u32 sp; + u32 lr; + u32 pc; +}; + +/* Any ELF definitions used in this file are from elf.h/elfcore.h except + * when specific 32-bit versions are required (mainly for the + * ELF_PRSTATUS32 note that is used to contain the MCU registers). + */ + +/* - 32-bit version of timeval structures used in ELF32 PRSTATUS note. */ +struct prstatus32_timeval { + int tv_sec; + int tv_usec; +}; + +/* - Structure defining ELF32 PRSTATUS note contents, as defined by the + * GNU binutils BFD library used by GDB, in bfd/hosts/x86-64linux.h. + * Note: GDB checks for the size of this structure to be 0x94. + * Modified pr_reg (array containing the Arm 32-bit MCU registers) to + * use u32[18] instead of elf_gregset32_t to prevent introducing new typedefs. + */ +struct elf_prstatus32 { + struct elf_siginfo pr_info; /* Info associated with signal. */ + short int pr_cursig; /* Current signal. */ + unsigned int pr_sigpend; /* Set of pending signals. */ + unsigned int pr_sighold; /* Set of held signals. */ + pid_t pr_pid; + pid_t pr_ppid; + pid_t pr_pgrp; + pid_t pr_sid; + struct prstatus32_timeval pr_utime; /* User time. */ + struct prstatus32_timeval pr_stime; /* System time. */ + struct prstatus32_timeval pr_cutime; /* Cumulative user time. */ + struct prstatus32_timeval pr_cstime; /* Cumulative system time. */ + u32 pr_reg[18]; /* GP registers. */ + int pr_fpvalid; /* True if math copro being used. */ +}; + +/** + * struct fw_core_dump_data - Context for seq_file operations used on 'fw_core_dump' + * debugfs file. + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +struct fw_core_dump_data { + struct kbase_device *kbdev; +}; + +/* + * struct fw_core_dump_seq_off - Iterator for seq_file operations used on 'fw_core_dump' + * debugfs file. + * @interface: current firmware memory interface + * @page_num: current page number (0..) within @interface + */ +struct fw_core_dump_seq_off { + struct kbase_csf_firmware_interface *interface; + u32 page_num; +}; + +/** + * fw_get_core_dump_mcu - Get the MCU registers saved by a firmware core dump + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @regs: Pointer to a core dump mcu struct where the MCU registers are copied + * to. Should be allocated by the called. + * + * Return: 0 if successfully copied the MCU registers, negative error code otherwise. + */ +static int fw_get_core_dump_mcu(struct kbase_device *kbdev, struct fw_core_dump_mcu *regs) +{ + unsigned int i; + u32 status = 0; + u32 data_addr = kbdev->csf.fw_core_dump.mcu_regs_addr; + u32 *data = (u32 *)regs; + + /* Check if the core dump entry exposed the buffer */ + if (!regs || !kbdev->csf.fw_core_dump.available) + return -EPERM; + + /* Check if the data in the buffer is valid, if not, return error */ + kbase_csf_read_firmware_memory(kbdev, data_addr, &status); + if ((status & FW_MCU_STATUS_MASK) != FW_MCU_STATUS_VALID) + return -EPERM; + + /* According to image header documentation, the MCU registers core dump + * buffer is 32-bit aligned. + */ + for (i = 1; i <= sizeof(struct fw_core_dump_mcu) / sizeof(u32); ++i) + kbase_csf_read_firmware_memory(kbdev, data_addr + i * sizeof(u32), &data[i - 1]); + + return 0; +} + +/** + * fw_core_dump_fill_elf_header - Initializes an ELF32 header + * @hdr: ELF32 header to initialize + * @sections: Number of entries in the ELF program header table + * + * Initializes an ELF32 header for an ARM 32-bit little-endian + * 'Core file' object file. + */ +static void fw_core_dump_fill_elf_header(struct elf32_hdr *hdr, unsigned int sections) +{ + /* Reset all members in header. */ + memset(hdr, 0, sizeof(*hdr)); + + /* Magic number identifying file as an ELF object. */ + memcpy(hdr->e_ident, ELFMAG, SELFMAG); + + /* Identify file as 32-bit, little-endian, using current + * ELF header version, with no OS or ABI specific ELF + * extensions used. + */ + hdr->e_ident[EI_CLASS] = ELFCLASS32; + hdr->e_ident[EI_DATA] = ELFDATA2LSB; + hdr->e_ident[EI_VERSION] = EV_CURRENT; + hdr->e_ident[EI_OSABI] = ELFOSABI_NONE; + + /* 'Core file' type of object file. */ + hdr->e_type = ET_CORE; + + /* ARM 32-bit architecture (AARCH32) */ + hdr->e_machine = EM_ARM; + + /* Object file version: the original format. */ + hdr->e_version = EV_CURRENT; + + /* Offset of program header table in file. */ + hdr->e_phoff = sizeof(struct elf32_hdr); + + /* No processor specific flags. */ + hdr->e_flags = 0; + + /* Size of the ELF header in bytes. */ + hdr->e_ehsize = sizeof(struct elf32_hdr); + + /* Size of the ELF program header entry in bytes. */ + hdr->e_phentsize = sizeof(struct elf32_phdr); + + /* Number of entries in the program header table. */ + hdr->e_phnum = sections; +} + +/** + * fw_core_dump_fill_elf_program_header_note - Initializes an ELF32 program header + * for holding auxiliary information + * @phdr: ELF32 program header + * @file_offset: Location of the note in the file in bytes + * @size: Size of the note in bytes. + * + * Initializes an ELF32 program header describing auxiliary information (containing + * one or more notes) of @size bytes alltogether located in the file at offset + * @file_offset. + */ +static void fw_core_dump_fill_elf_program_header_note(struct elf32_phdr *phdr, u32 file_offset, + u32 size) +{ + /* Auxiliary information (note) in program header. */ + phdr->p_type = PT_NOTE; + + /* Location of first note in file in bytes. */ + phdr->p_offset = file_offset; + + /* Size of all notes combined in bytes. */ + phdr->p_filesz = size; + + /* Other members not relevant for a note. */ + phdr->p_vaddr = 0; + phdr->p_paddr = 0; + phdr->p_memsz = 0; + phdr->p_align = 0; + phdr->p_flags = 0; +} + +/** + * fw_core_dump_fill_elf_program_header - Initializes an ELF32 program header for a loadable segment + * @phdr: ELF32 program header to initialize. + * @file_offset: Location of loadable segment in file in bytes + * (aligned to FW_PAGE_SIZE bytes) + * @vaddr: 32-bit virtual address where to write the segment + * (aligned to FW_PAGE_SIZE bytes) + * @size: Size of the segment in bytes. + * @flags: CSF_FIRMWARE_ENTRY_* flags describing access permissions. + * + * Initializes an ELF32 program header describing a loadable segment of + * @size bytes located in the file at offset @file_offset to be loaded + * at virtual address @vaddr with access permissions as described by + * CSF_FIRMWARE_ENTRY_* flags in @flags. + */ +static void fw_core_dump_fill_elf_program_header(struct elf32_phdr *phdr, u32 file_offset, + u32 vaddr, u32 size, u32 flags) +{ + /* Loadable segment in program header. */ + phdr->p_type = PT_LOAD; + + /* Location of segment in file in bytes. Aligned to p_align bytes. */ + phdr->p_offset = file_offset; + + /* Virtual address of segment. Aligned to p_align bytes. */ + phdr->p_vaddr = vaddr; + + /* Physical address of segment. Not relevant. */ + phdr->p_paddr = 0; + + /* Size of segment in file and memory. */ + phdr->p_filesz = size; + phdr->p_memsz = size; + + /* Alignment of segment in the file and memory in bytes (integral power of 2). */ + phdr->p_align = FW_PAGE_SIZE; + + /* Set segment access permissions. */ + phdr->p_flags = 0; + if (flags & CSF_FIRMWARE_ENTRY_READ) + phdr->p_flags |= PF_R; + if (flags & CSF_FIRMWARE_ENTRY_WRITE) + phdr->p_flags |= PF_W; + if (flags & CSF_FIRMWARE_ENTRY_EXECUTE) + phdr->p_flags |= PF_X; +} + +/** + * fw_core_dump_get_prstatus_note_size - Calculates size of a ELF32 PRSTATUS note + * @name: Name given to the PRSTATUS note. + * + * Calculates the size of a 32-bit PRSTATUS note (which contains information + * about a process like the current MCU registers) taking into account + * @name must be padded to a 4-byte multiple. + * + * Return: size of 32-bit PRSTATUS note in bytes. + */ +static unsigned int fw_core_dump_get_prstatus_note_size(char *name) +{ + return sizeof(struct elf32_note) + roundup(strlen(name) + 1, 4) + + sizeof(struct elf_prstatus32); +} + +/** + * fw_core_dump_fill_elf_prstatus - Initializes an ELF32 PRSTATUS structure + * @prs: ELF32 PRSTATUS note to initialize + * @regs: MCU registers to copy into the PRSTATUS note + * + * Initializes an ELF32 PRSTATUS structure with MCU registers @regs. + * Other process information is N/A for CSF Firmware. + */ +static void fw_core_dump_fill_elf_prstatus(struct elf_prstatus32 *prs, + struct fw_core_dump_mcu *regs) +{ + /* Only fill in registers (32-bit) of PRSTATUS note. */ + memset(prs, 0, sizeof(*prs)); + prs->pr_reg[0] = regs->r0; + prs->pr_reg[1] = regs->r1; + prs->pr_reg[2] = regs->r2; + prs->pr_reg[3] = regs->r3; + prs->pr_reg[4] = regs->r4; + prs->pr_reg[5] = regs->r5; + prs->pr_reg[6] = regs->r0; + prs->pr_reg[7] = regs->r7; + prs->pr_reg[8] = regs->r8; + prs->pr_reg[9] = regs->r9; + prs->pr_reg[10] = regs->r10; + prs->pr_reg[11] = regs->r11; + prs->pr_reg[12] = regs->r12; + prs->pr_reg[13] = regs->sp; + prs->pr_reg[14] = regs->lr; + prs->pr_reg[15] = regs->pc; +} + +/** + * fw_core_dump_create_prstatus_note - Creates an ELF32 PRSTATUS note + * @name: Name for the PRSTATUS note + * @prs: ELF32 PRSTATUS structure to put in the PRSTATUS note + * @created_prstatus_note: + * Pointer to the allocated ELF32 PRSTATUS note + * + * Creates an ELF32 note with one PRSTATUS entry containing the + * ELF32 PRSTATUS structure @prs. Caller needs to free the created note in + * @created_prstatus_note. + * + * Return: 0 on failure, otherwise size of ELF32 PRSTATUS note in bytes. + */ +static unsigned int fw_core_dump_create_prstatus_note(char *name, struct elf_prstatus32 *prs, + struct elf32_note **created_prstatus_note) +{ + struct elf32_note *note; + unsigned int note_name_sz; + unsigned int note_sz; + + /* Allocate memory for ELF32 note containing a PRSTATUS note. */ + note_name_sz = strlen(name) + 1; + note_sz = sizeof(struct elf32_note) + roundup(note_name_sz, 4) + + sizeof(struct elf_prstatus32); + note = kmalloc(note_sz, GFP_KERNEL); + if (!note) + return 0; + + /* Fill in ELF32 note with one entry for a PRSTATUS note. */ + note->n_namesz = note_name_sz; + note->n_descsz = sizeof(struct elf_prstatus32); + note->n_type = NT_PRSTATUS; + memcpy(note + 1, name, note_name_sz); + memcpy((char *)(note + 1) + roundup(note_name_sz, 4), prs, sizeof(*prs)); + + /* Return pointer and size of the created ELF32 note. */ + *created_prstatus_note = note; + return note_sz; +} + +/** + * fw_core_dump_write_elf_header - Writes ELF header for the FW core dump + * @m: the seq_file handle + * + * Writes the ELF header of the core dump including program headers for + * memory sections and a note containing the current MCU register + * values. + * + * Excludes memory sections without read access permissions or + * are for protected memory. + * + * The data written is as follows: + * - ELF header + * - ELF PHDRs for memory sections + * - ELF PHDR for program header NOTE + * - ELF PRSTATUS note + * - 0-bytes padding to multiple of ELF_EXEC_PAGESIZE + * + * The actual memory section dumps should follow this (not written + * by this function). + * + * Retrieves the necessary information via the struct + * fw_core_dump_data stored in the private member of the seq_file + * handle. + * + * Return: + * * 0 - success + * * -ENOMEM - not enough memory for allocating ELF32 note + */ +static int fw_core_dump_write_elf_header(struct seq_file *m) +{ + struct elf32_hdr hdr; + struct elf32_phdr phdr; + struct fw_core_dump_data *dump_data = m->private; + struct kbase_device *const kbdev = dump_data->kbdev; + struct kbase_csf_firmware_interface *interface; + struct elf_prstatus32 elf_prs; + struct elf32_note *elf_prstatus_note; + unsigned int sections = 0; + unsigned int elf_prstatus_note_size; + u32 elf_prstatus_offset; + u32 elf_phdr_note_offset; + u32 elf_memory_sections_data_offset; + u32 total_pages = 0; + u32 padding_size, *padding; + struct fw_core_dump_mcu regs = { 0 }; + + /* Count number of memory sections. */ + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + /* Skip memory sections that cannot be read or are protected. */ + if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || + (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) + continue; + sections++; + } + + /* Prepare ELF header. */ + fw_core_dump_fill_elf_header(&hdr, sections + 1); + seq_write(m, &hdr, sizeof(struct elf32_hdr)); + + elf_prstatus_note_size = fw_core_dump_get_prstatus_note_size("CORE"); + /* PHDRs of PT_LOAD type. */ + elf_phdr_note_offset = sizeof(struct elf32_hdr) + sections * sizeof(struct elf32_phdr); + /* PHDR of PT_NOTE type. */ + elf_prstatus_offset = elf_phdr_note_offset + sizeof(struct elf32_phdr); + elf_memory_sections_data_offset = elf_prstatus_offset + elf_prstatus_note_size; + + /* Calculate padding size to page offset. */ + padding_size = roundup(elf_memory_sections_data_offset, ELF_EXEC_PAGESIZE) - + elf_memory_sections_data_offset; + elf_memory_sections_data_offset += padding_size; + + /* Prepare ELF program header table. */ + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + /* Skip memory sections that cannot be read or are protected. */ + if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || + (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) + continue; + + fw_core_dump_fill_elf_program_header(&phdr, elf_memory_sections_data_offset, + interface->virtual, + interface->num_pages * FW_PAGE_SIZE, + interface->flags); + + seq_write(m, &phdr, sizeof(struct elf32_phdr)); + + elf_memory_sections_data_offset += interface->num_pages * FW_PAGE_SIZE; + total_pages += interface->num_pages; + } + + /* Prepare PHDR of PT_NOTE type. */ + fw_core_dump_fill_elf_program_header_note(&phdr, elf_prstatus_offset, + elf_prstatus_note_size); + seq_write(m, &phdr, sizeof(struct elf32_phdr)); + + /* Prepare ELF note of PRSTATUS type. */ + if (fw_get_core_dump_mcu(kbdev, ®s)) + dev_dbg(kbdev->dev, "MCU Registers not available, all registers set to zero"); + /* Even if MCU Registers are not available the ELF prstatus is still + * filled with the registers equal to zero. + */ + fw_core_dump_fill_elf_prstatus(&elf_prs, ®s); + elf_prstatus_note_size = + fw_core_dump_create_prstatus_note("CORE", &elf_prs, &elf_prstatus_note); + if (elf_prstatus_note_size == 0) + return -ENOMEM; + + seq_write(m, elf_prstatus_note, elf_prstatus_note_size); + kfree(elf_prstatus_note); + + /* Pad file to page size. */ + padding = kzalloc(padding_size, GFP_KERNEL); + seq_write(m, padding, padding_size); + kfree(padding); + + return 0; +} + +/** + * fw_core_dump_create - Requests firmware to save state for a firmware core dump + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_create(struct kbase_device *kbdev) +{ + int err; + + /* Ensure MCU is active before requesting the core dump. */ + kbase_csf_scheduler_pm_active(kbdev); + err = kbase_csf_scheduler_wait_mcu_active(kbdev); + if (!err) + err = kbase_csf_firmware_req_core_dump(kbdev); + + kbase_csf_scheduler_pm_idle(kbdev); + + return err; +} + +/** + * fw_core_dump_seq_start - seq_file start operation for firmware core dump file + * @m: the seq_file handle + * @_pos: holds the current position in pages + * (0 or most recent position used in previous session) + * + * Starts a seq_file session, positioning the iterator for the session to page @_pos - 1 + * within the firmware interface memory sections. @_pos value 0 is used to indicate the + * position of the ELF header at the start of the file. + * + * Retrieves the necessary information via the struct fw_core_dump_data stored in + * the private member of the seq_file handle. + * + * Return: + * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off + * * SEQ_START_TOKEN - special iterator pointer indicating its is the start of the file + * * NULL - iterator could not be allocated + */ +static void *fw_core_dump_seq_start(struct seq_file *m, loff_t *_pos) +{ + struct fw_core_dump_data *dump_data = m->private; + struct fw_core_dump_seq_off *data; + struct kbase_csf_firmware_interface *interface; + loff_t pos = *_pos; + + if (pos == 0) + return SEQ_START_TOKEN; + + /* Move iterator in the right position based on page number within + * available pages of firmware interface memory sections. + */ + pos--; /* ignore start token */ + list_for_each_entry(interface, &dump_data->kbdev->csf.firmware_interfaces, node) { + /* Skip memory sections that cannot be read or are protected. */ + if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || + (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) + continue; + + if (pos >= interface->num_pages) { + pos -= interface->num_pages; + } else { + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + data->interface = interface; + data->page_num = pos; + return data; + } + } + + return NULL; +} + +/** + * fw_core_dump_seq_stop - seq_file stop operation for firmware core dump file + * @m: the seq_file handle + * @v: the current iterator (pointer to struct fw_core_dump_seq_off) + * + * Closes the current session and frees any memory related. + */ +static void fw_core_dump_seq_stop(struct seq_file *m, void *v) +{ + kfree(v); +} + +/** + * fw_core_dump_seq_next - seq_file next operation for firmware core dump file + * @m: the seq_file handle + * @v: the current iterator (pointer to struct fw_core_dump_seq_off) + * @pos: holds the current position in pages + * (0 or most recent position used in previous session) + * + * Moves the iterator @v forward to the next page within the firmware interface + * memory sections and returns the updated position in @pos. + * @v value SEQ_START_TOKEN indicates the ELF header position. + * + * Return: + * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off + * * NULL - iterator could not be allocated + */ +static void *fw_core_dump_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct fw_core_dump_data *dump_data = m->private; + struct fw_core_dump_seq_off *data = v; + struct kbase_csf_firmware_interface *interface; + struct list_head *interfaces = &dump_data->kbdev->csf.firmware_interfaces; + + /* Is current position at the ELF header ? */ + if (v == SEQ_START_TOKEN) { + if (list_empty(interfaces)) + return NULL; + + /* Prepare iterator for starting at first page in firmware interface + * memory sections. + */ + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + data->interface = + list_first_entry(interfaces, struct kbase_csf_firmware_interface, node); + data->page_num = 0; + ++*pos; + return data; + } + + /* First attempt to satisfy from current firmware interface memory section. */ + interface = data->interface; + if (data->page_num + 1 < interface->num_pages) { + data->page_num++; + ++*pos; + return data; + } + + /* Need next firmware interface memory section. This could be the last one. */ + if (list_is_last(&interface->node, interfaces)) { + kfree(data); + return NULL; + } + + /* Move to first page in next firmware interface memory section. */ + data->interface = list_next_entry(interface, node); + data->page_num = 0; + ++*pos; + + return data; +} + +/** + * fw_core_dump_seq_show - seq_file show operation for firmware core dump file + * @m: the seq_file handle + * @v: the current iterator (pointer to struct fw_core_dump_seq_off) + * + * Writes the current page in a firmware interface memory section indicated + * by the iterator @v to the file. If @v is SEQ_START_TOKEN the ELF + * header is written. + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_seq_show(struct seq_file *m, void *v) +{ + struct fw_core_dump_seq_off *data = v; + struct page *page; + u32 *p; + + /* Either write the ELF header or current page. */ + if (v == SEQ_START_TOKEN) + return fw_core_dump_write_elf_header(m); + + /* Write the current page. */ + page = as_page(data->interface->phys[data->page_num]); + p = kmap_atomic(page); + seq_write(m, p, FW_PAGE_SIZE); + kunmap_atomic(p); + + return 0; +} + +/* Sequence file operations for firmware core dump file. */ +static const struct seq_operations fw_core_dump_seq_ops = { + .start = fw_core_dump_seq_start, + .next = fw_core_dump_seq_next, + .stop = fw_core_dump_seq_stop, + .show = fw_core_dump_seq_show, +}; + +/** + * fw_core_dump_debugfs_open - callback for opening the 'fw_core_dump' debugfs file + * @inode: inode of the file + * @file: file pointer + * + * Prepares for servicing a write request to request a core dump from firmware and + * a read request to retrieve the core dump. + * + * Returns an error if the firmware is not initialized yet. + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_debugfs_open(struct inode *inode, struct file *file) +{ + struct kbase_device *const kbdev = inode->i_private; + struct fw_core_dump_data *dump_data; + int ret; + + /* Fail if firmware is not initialized yet. */ + if (!kbdev->csf.firmware_inited) { + ret = -ENODEV; + goto open_fail; + } + + /* Open a sequence file for iterating through the pages in the + * firmware interface memory pages. seq_open stores a + * struct seq_file * in the private_data field of @file. + */ + ret = seq_open(file, &fw_core_dump_seq_ops); + if (ret) + goto open_fail; + + /* Allocate a context for sequence file operations. */ + dump_data = kmalloc(sizeof(*dump_data), GFP_KERNEL); + if (!dump_data) { + ret = -ENOMEM; + goto out; + } + + /* Kbase device will be shared with sequence file operations. */ + dump_data->kbdev = kbdev; + + /* Link our sequence file context. */ + ((struct seq_file *)file->private_data)->private = dump_data; + + return 0; +out: + seq_release(inode, file); +open_fail: + return ret; +} + +/** + * fw_core_dump_debugfs_write - callback for a write to the 'fw_core_dump' debugfs file + * @file: file pointer + * @ubuf: user buffer containing data to store + * @count: number of bytes in user buffer + * @ppos: file position + * + * Any data written to the file triggers a firmware core dump request which + * subsequently can be retrieved by reading from the file. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t fw_core_dump_debugfs_write(struct file *file, const char __user *ubuf, size_t count, + loff_t *ppos) +{ + int err; + struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private; + struct kbase_device *const kbdev = dump_data->kbdev; + + CSTD_UNUSED(ppos); + + err = fw_core_dump_create(kbdev); + + return err ? err : count; +} + +/** + * fw_core_dump_debugfs_release - callback for releasing the 'fw_core_dump' debugfs file + * @inode: inode of the file + * @file: file pointer + * + * Return: 0 on success, error code otherwise. + */ +static int fw_core_dump_debugfs_release(struct inode *inode, struct file *file) +{ + struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private; + + seq_release(inode, file); + + kfree(dump_data); + + return 0; +} +/* Debugfs file operations for firmware core dump file. */ +static const struct file_operations kbase_csf_fw_core_dump_fops = { + .owner = THIS_MODULE, + .open = fw_core_dump_debugfs_open, + .read = seq_read, + .write = fw_core_dump_debugfs_write, + .llseek = seq_lseek, + .release = fw_core_dump_debugfs_release, +}; + +void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev) +{ +#if IS_ENABLED(CONFIG_DEBUG_FS) + debugfs_create_file("fw_core_dump", 0600, kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_fw_core_dump_fops); +#endif /* CONFIG_DEBUG_FS */ +} + +int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry) +{ + /* Casting to u16 as version is defined by bits 15:0 */ + kbdev->csf.fw_core_dump.version = (u16)entry[FW_CORE_DUMP_VERSION_INDEX]; + + if (kbdev->csf.fw_core_dump.version != FW_CORE_DUMP_DATA_VERSION) + return -EPERM; + + kbdev->csf.fw_core_dump.mcu_regs_addr = entry[FW_CORE_DUMP_START_ADDR_INDEX]; + kbdev->csf.fw_core_dump.available = true; + + return 0; +} diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.h b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.h new file mode 100644 index 0000000..0537dca --- /dev/null +++ b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ +#define _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ + +struct kbase_device; + +/** Offset of the last field of core dump entry from the image header */ +#define CORE_DUMP_ENTRY_START_ADDR_OFFSET (0x4) + +/** + * kbase_csf_firmware_core_dump_entry_parse() - Parse a "core dump" entry from + * the image header. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @entry: Pointer to section. + * + * Read a "core dump" entry from the image header, check the version for + * compatibility and store the address pointer. + * + * Return: 0 if successfully parse entry, negative error code otherwise. + */ +int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry); + +/** + * kbase_csf_firmware_core_dump_init() - Initialize firmware core dump support + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * Must be zero-initialized. + * + * Creates the fw_core_dump debugfs file through which to request a firmware + * core dump. The created debugfs file is cleaned up as part of kbdev debugfs + * cleanup. + * + * The fw_core_dump debugs file that case be used in the following way: + * + * To explicitly request core dump: + * echo 1 >/sys/kernel/debug/mali0/fw_core_dump + * + * To output current core dump (after explicitly requesting a core dump, or + * kernel driver reported an internal firmware error): + * cat /sys/kernel/debug/mali0/fw_core_dump + */ +void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev); + +#endif /* _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.c b/mali_kbase/csf/mali_kbase_csf_firmware_log.c index a046112..77d3b1e 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_log.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.c @@ -85,7 +85,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val) dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count); enable_bits_count = 64; } - new_mask = val & ((1 << enable_bits_count) - 1); + new_mask = val & (UINT64_MAX >> (64 - enable_bits_count)); if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb)) return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask); @@ -350,7 +350,7 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo diff = callee_address - calling_address - 4; sign = !!(diff & 0x80000000); - if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff && + if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff || ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) { dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping", calling_address); diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.h b/mali_kbase/csf/mali_kbase_csf_firmware_log.h index 8d7a221..1008320 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_log.h +++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.h @@ -24,6 +24,9 @@ #include <mali_kbase.h> +/** Offset of the last field of functions call list entry from the image header */ +#define FUNC_CALL_LIST_ENTRY_NAME_OFFSET (0x8) + /* * Firmware log dumping buffer size. */ diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c index 0eaaddf..2e2b59f 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c @@ -33,6 +33,7 @@ #include "mmu/mali_kbase_mmu.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include <backend/gpu/mali_kbase_model_dummy.h> +#include <csf/mali_kbase_csf_registers.h> #include <linux/list.h> #include <linux/slab.h> @@ -104,7 +105,6 @@ struct dummy_firmware_interface { (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) - static inline u32 input_page_read(const u32 *const input, const u32 offset) { WARN_ON(offset % sizeof(u32)); @@ -716,6 +716,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbdev->csf.gpu_idle_dur_count); } +static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + bool complete = false; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) + complete = true; + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return complete; +} + +static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) +{ + u32 glb_debug_req; + + kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + + glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); + glb_debug_req ^= req_mask; + + kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); +} + +static void request_fw_core_dump( + const struct kbase_csf_global_iface *const global_iface) +{ + uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); + + set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); + + set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); +} + +int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + int ret; + + /* Serialize CORE_DUMP requests. */ + mutex_lock(&kbdev->csf.reg_lock); + + /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + request_fw_core_dump(global_iface); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ + ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + if (!ret) + WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); + + mutex_unlock(&kbdev->csf.reg_lock); + + return ret; +} static void global_init(struct kbase_device *const kbdev, u64 core_mask) { @@ -724,8 +789,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | - GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | - 0; + GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK; const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -917,7 +981,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 dur; kbase_csf_scheduler_spin_lock(kbdev, &flags); - dur = kbdev->csf.gpu_idle_hysteresis_ms; + dur = kbdev->csf.gpu_idle_hysteresis_us; kbase_csf_scheduler_spin_unlock(kbdev, flags); return dur; @@ -934,7 +998,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, mutex_lock(&kbdev->fw_load_lock); if (unlikely(!kbdev->csf.firmware_inited)) { kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_scheduler_spin_unlock(kbdev, flags); mutex_unlock(&kbdev->fw_load_lock); @@ -965,7 +1029,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbase_csf_firmware_enable_gpu_idle_timer(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -1076,14 +1140,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev) int kbase_csf_firmware_late_init(struct kbase_device *kbdev) { - kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; + kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; + kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; #endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); kbdev->csf.gpu_idle_dur_count = - convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms); + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); return 0; } @@ -1533,7 +1597,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, &phys[0], num_pages, gpu_map_properties, - KBASE_MEM_GROUP_CSF_FW, NULL); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); if (ret) goto mmu_insert_pages_error; @@ -1594,4 +1658,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term( vunmap(csf_mapping->cpu_addr); kfree(csf_mapping->phys); } - diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c index 1876d50..f357e9e 100644 --- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c +++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c @@ -142,7 +142,14 @@ void kbase_csf_heap_context_allocator_term( if (ctx_alloc->region) { kbase_gpu_vm_lock(kctx); - ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * We can't enforce (nor check) the no_user_free refcount + * to be 0 here as other code regions can take such a reference. + * Anyway, this isn't an issue as the region will eventually + * be freed by the region tracker if its refcount didn't drop + * to 0. + */ + kbase_va_region_no_user_free_put(kctx, ctx_alloc->region); kbase_mem_free_region(kctx, ctx_alloc->region); kbase_gpu_vm_unlock(kctx); } diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c index 99ab002..06a6990 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.c +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c @@ -674,9 +674,8 @@ static int kbase_csf_queue_group_suspend_prepare( (kbase_reg_current_backed_size(reg) < nr_pages) || !(reg->flags & KBASE_REG_CPU_WR) || (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) || - (reg->flags & KBASE_REG_DONT_NEED) || - (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) || - (reg->flags & KBASE_REG_NO_USER_FREE)) { + (kbase_is_region_shrinkable(reg)) || + (kbase_va_region_is_no_user_free(kctx, reg))) { ret = -EINVAL; goto out_clean_pages; } @@ -1234,9 +1233,8 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, queue_work(kcpu_queue->wq, &kcpu_queue->work); } -static void kbase_kcpu_fence_wait_cancel( - struct kbase_kcpu_command_queue *kcpu_queue, - struct kbase_kcpu_command_fence_info *fence_info) +static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) { struct kbase_context *const kctx = kcpu_queue->kctx; @@ -1410,15 +1408,14 @@ static int kbase_kcpu_fence_wait_process( */ if (fence_status) - kbase_kcpu_fence_wait_cancel(kcpu_queue, fence_info); + kbasep_kcpu_fence_wait_cancel(kcpu_queue, fence_info); return fence_status; } -static int kbase_kcpu_fence_wait_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_fence_info *fence_info, - struct kbase_kcpu_command *current_command) +static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_fence_info *fence_info, + struct kbase_kcpu_command *current_command) { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_in; @@ -1429,8 +1426,7 @@ static int kbase_kcpu_fence_wait_prepare( lockdep_assert_held(&kcpu_queue->lock); - if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), - sizeof(fence))) + if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) return -ENOMEM; fence_in = sync_file_get_fence(fence.basep.fd); @@ -1444,9 +1440,8 @@ static int kbase_kcpu_fence_wait_prepare( return 0; } -static int kbase_kcpu_fence_signal_process( - struct kbase_kcpu_command_queue *kcpu_queue, - struct kbase_kcpu_command_fence_info *fence_info) +static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) { struct kbase_context *const kctx = kcpu_queue->kctx; int ret; @@ -1467,37 +1462,37 @@ static int kbase_kcpu_fence_signal_process( fence_info->fence->seqno); /* dma_fence refcount needs to be decreased to release it. */ - dma_fence_put(fence_info->fence); + kbase_fence_put(fence_info->fence); fence_info->fence = NULL; return ret; } -static int kbase_kcpu_fence_signal_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_fence_info *fence_info, - struct kbase_kcpu_command *current_command) +static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command *current_command, + struct base_fence *fence, struct sync_file **sync_file, + int *fd) { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_out; #else struct dma_fence *fence_out; #endif - struct base_fence fence; - struct sync_file *sync_file; + struct kbase_kcpu_dma_fence *kcpu_fence; int ret = 0; - int fd; lockdep_assert_held(&kcpu_queue->lock); - if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), - sizeof(fence))) - return -EFAULT; - - fence_out = kzalloc(sizeof(*fence_out), GFP_KERNEL); - if (!fence_out) + kcpu_fence = kzalloc(sizeof(*kcpu_fence), GFP_KERNEL); + if (!kcpu_fence) return -ENOMEM; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + fence_out = (struct fence *)kcpu_fence; +#else + fence_out = (struct dma_fence *)kcpu_fence; +#endif + dma_fence_init(fence_out, &kbase_fence_ops, &kbase_csf_fence_lock, @@ -1513,28 +1508,70 @@ static int kbase_kcpu_fence_signal_prepare( dma_fence_get(fence_out); #endif + /* Set reference to KCPU metadata and increment refcount */ + kcpu_fence->metadata = kcpu_queue->metadata; +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + WARN_ON(!atomic_inc_not_zero(&kcpu_fence->metadata->refcount)); +#else + WARN_ON(!refcount_inc_not_zero(&kcpu_fence->metadata->refcount)); +#endif + /* create a sync_file fd representing the fence */ - sync_file = sync_file_create(fence_out); - if (!sync_file) { + *sync_file = sync_file_create(fence_out); + if (!(*sync_file)) { ret = -ENOMEM; goto file_create_fail; } - fd = get_unused_fd_flags(O_CLOEXEC); - if (fd < 0) { - ret = fd; + *fd = get_unused_fd_flags(O_CLOEXEC); + if (*fd < 0) { + ret = *fd; goto fd_flags_fail; } - fence.basep.fd = fd; + fence->basep.fd = *fd; current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL; current_command->info.fence.fence = fence_out; + return 0; + +fd_flags_fail: + fput((*sync_file)->file); +file_create_fail: + /* + * Upon failure, dma_fence refcount that was increased by + * dma_fence_get() or sync_file_create() needs to be decreased + * to release it. + */ + kbase_fence_put(fence_out); + current_command->info.fence.fence = NULL; + + return ret; +} + +static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_fence_info *fence_info, + struct kbase_kcpu_command *current_command) +{ + struct base_fence fence; + struct sync_file *sync_file = NULL; + int fd; + int ret = 0; + + lockdep_assert_held(&kcpu_queue->lock); + + if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) + return -EFAULT; + + ret = kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, &fence, &sync_file, &fd); + if (ret) + return ret; + if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence, sizeof(fence))) { ret = -EFAULT; - goto fd_flags_fail; + goto fail; } /* 'sync_file' pointer can't be safely dereferenced once 'fd' is @@ -1544,21 +1581,34 @@ static int kbase_kcpu_fence_signal_prepare( fd_install(fd, sync_file->file); return 0; -fd_flags_fail: +fail: fput(sync_file->file); -file_create_fail: - /* - * Upon failure, dma_fence refcount that was increased by - * dma_fence_get() or sync_file_create() needs to be decreased - * to release it. - */ - dma_fence_put(fence_out); - + kbase_fence_put(current_command->info.fence.fence); current_command->info.fence.fence = NULL; - kfree(fence_out); return ret; } + +int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) +{ + if (!kcpu_queue || !fence_info) + return -EINVAL; + + return kbasep_kcpu_fence_signal_process(kcpu_queue, fence_info); +} +KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_process); + +int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command *current_command, + struct base_fence *fence, struct sync_file **sync_file, int *fd) +{ + if (!kcpu_queue || !current_command || !fence || !sync_file || !fd) + return -EINVAL; + + return kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, fence, sync_file, fd); +} +KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init); #endif /* CONFIG_SYNC_FILE */ static void kcpu_queue_process_worker(struct work_struct *data) @@ -1595,6 +1645,9 @@ static int delete_queue(struct kbase_context *kctx, u32 id) mutex_lock(&queue->lock); + /* Metadata struct may outlive KCPU queue. */ + kbase_kcpu_dma_fence_meta_put(queue->metadata); + /* Drain the remaining work for this queue first and go past * all the waits. */ @@ -1701,8 +1754,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, status = 0; #if IS_ENABLED(CONFIG_SYNC_FILE) if (drain_queue) { - kbase_kcpu_fence_wait_cancel(queue, - &cmd->info.fence); + kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence); } else { status = kbase_kcpu_fence_wait_process(queue, &cmd->info.fence); @@ -1732,8 +1784,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, status = 0; #if IS_ENABLED(CONFIG_SYNC_FILE) - status = kbase_kcpu_fence_signal_process( - queue, &cmd->info.fence); + status = kbasep_kcpu_fence_signal_process(queue, &cmd->info.fence); if (status < 0) queue->has_error = true; @@ -2275,6 +2326,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) mutex_destroy(&kctx->csf.kcpu_queues.lock); } +KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term); int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_delete *del) @@ -2288,7 +2340,9 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_kcpu_command_queue *queue; int idx; int ret = 0; - +#if IS_ENABLED(CONFIG_SYNC_FILE) + struct kbase_kcpu_dma_fence_meta *metadata; +#endif /* The queue id is of u8 type and we use the index of the kcpu_queues * array as an id, so the number of elements in the array can't be * more than 256. @@ -2334,7 +2388,27 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, queue->fence_context = dma_fence_context_alloc(1); queue->fence_seqno = 0; queue->fence_wait_processed = false; + + metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); + if (!metadata) { + kfree(queue); + ret = -ENOMEM; + goto out; + } + + metadata->kbdev = kctx->kbdev; + metadata->kctx_id = kctx->id; + snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", kctx->kbdev->id, + kctx->tgid, kctx->id, queue->fence_context); + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + atomic_set(&metadata->refcount, 1); +#else + refcount_set(&metadata->refcount, 1); #endif + queue->metadata = metadata; + atomic_inc(&kctx->kbdev->live_fence_metadata); +#endif /* CONFIG_SYNC_FILE */ queue->enqueue_failed = false; queue->command_started = false; INIT_LIST_HEAD(&queue->jit_blocked); @@ -2360,3 +2434,4 @@ out: return ret; } +KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new); diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h index 9848652..bc3cafa 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.h +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h @@ -22,6 +22,9 @@ #ifndef _KBASE_CSF_KCPU_H_ #define _KBASE_CSF_KCPU_H_ +#include <mali_kbase_fence.h> +#include <mali_kbase_sync.h> + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) #include <linux/fence.h> #else @@ -44,8 +47,8 @@ struct kbase_kcpu_command_import_info { }; /** - * struct kbase_kcpu_command_fence_info - Structure which holds information - * about the fence object enqueued in the kcpu command queue + * struct kbase_kcpu_command_fence_info - Structure which holds information about the + * fence object enqueued in the kcpu command queue * * @fence_cb: Fence callback * @fence: Fence @@ -274,6 +277,8 @@ struct kbase_kcpu_command { * @jit_blocked: Used to keep track of command queues blocked * by a pending JIT allocation command. * @fence_timeout: Timer used to detect the fence wait timeout. + * @metadata: Metadata structure containing basic information about this + * queue for any fence objects associated with this queue. */ struct kbase_kcpu_command_queue { struct mutex lock; @@ -295,6 +300,9 @@ struct kbase_kcpu_command_queue { #ifdef CONFIG_MALI_FENCE_DEBUG struct timer_list fence_timeout; #endif /* CONFIG_MALI_FENCE_DEBUG */ +#if IS_ENABLED(CONFIG_SYNC_FILE) + struct kbase_kcpu_dma_fence_meta *metadata; +#endif /* CONFIG_SYNC_FILE */ }; /** @@ -359,4 +367,14 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx); */ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx); +#if IS_ENABLED(CONFIG_SYNC_FILE) +/* Test wrappers for dma fence operations. */ +int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info); + +int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command *current_command, + struct base_fence *fence, struct sync_file **sync_file, int *fd); +#endif /* CONFIG_SYNC_FILE */ + #endif /* _KBASE_CSF_KCPU_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h index 6dde56c..b133efd 100644 --- a/mali_kbase/csf/mali_kbase_csf_registers.h +++ b/mali_kbase/csf/mali_kbase_csf_registers.h @@ -229,20 +229,32 @@ #define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */ #define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */ -#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */ -#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */ -#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */ -#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */ +#define GLB_DEBUG_ARG_IN0 0x0FE0 /* Firmware Debug argument array element 0 */ +#define GLB_DEBUG_ARG_IN1 0x0FE4 /* Firmware Debug argument array element 1 */ +#define GLB_DEBUG_ARG_IN2 0x0FE8 /* Firmware Debug argument array element 2 */ +#define GLB_DEBUG_ARG_IN3 0x0FEC /* Firmware Debug argument array element 3 */ + +/* Mappings based on GLB_DEBUG_REQ.FWUTF_RUN bit being different from GLB_DEBUG_ACK.FWUTF_RUN */ +#define GLB_DEBUG_FWUTF_DESTROY GLB_DEBUG_ARG_IN0 /* () Test fixture destroy function address */ +#define GLB_DEBUG_FWUTF_TEST GLB_DEBUG_ARG_IN1 /* () Test index */ +#define GLB_DEBUG_FWUTF_FIXTURE GLB_DEBUG_ARG_IN2 /* () Test fixture index */ +#define GLB_DEBUG_FWUTF_CREATE GLB_DEBUG_ARG_IN3 /* () Test fixture create function address */ + #define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */ #define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */ /* GLB_OUTPUT_BLOCK register offsets */ +#define GLB_DEBUG_ARG_OUT0 0x0FE0 /* Firmware debug result element 0 */ +#define GLB_DEBUG_ARG_OUT1 0x0FE4 /* Firmware debug result element 1 */ +#define GLB_DEBUG_ARG_OUT2 0x0FE8 /* Firmware debug result element 2 */ +#define GLB_DEBUG_ARG_OUT3 0x0FEC /* Firmware debug result element 3 */ + #define GLB_ACK 0x0000 /* () Global acknowledge */ #define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ #define GLB_HALT_STATUS 0x0010 /* () Global halt status */ #define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ #define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ -#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */ +#define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */ #define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */ /* USER register offsets */ @@ -1590,4 +1602,43 @@ ((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \ GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK)) +/* GLB_DEBUG_REQ register */ +#define GLB_DEBUG_REQ_DEBUG_RUN_SHIFT GPU_U(23) +#define GLB_DEBUG_REQ_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_REQ_DEBUG_RUN_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_REQ_DEBUG_RUN_MASK) >> GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_REQ_DEBUG_RUN_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_REQ_DEBUG_RUN_MASK) | \ + (((value) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) & GLB_DEBUG_REQ_DEBUG_RUN_MASK)) + +#define GLB_DEBUG_REQ_RUN_MODE_SHIFT GPU_U(24) +#define GLB_DEBUG_REQ_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) +#define GLB_DEBUG_REQ_RUN_MODE_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_REQ_RUN_MODE_MASK) >> GLB_DEBUG_REQ_RUN_MODE_SHIFT) +#define GLB_DEBUG_REQ_RUN_MODE_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \ + (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK)) + +/* GLB_DEBUG_ACK register */ +#define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23) +#define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_ACK_DEBUG_RUN_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_ACK_DEBUG_RUN_MASK) >> GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) +#define GLB_DEBUG_ACK_DEBUG_RUN_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_ACK_DEBUG_RUN_MASK) | \ + (((value) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) & GLB_DEBUG_ACK_DEBUG_RUN_MASK)) + +#define GLB_DEBUG_ACK_RUN_MODE_SHIFT GPU_U(24) +#define GLB_DEBUG_ACK_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) +#define GLB_DEBUG_ACK_RUN_MODE_GET(reg_val) \ + (((reg_val)&GLB_DEBUG_ACK_RUN_MODE_MASK) >> GLB_DEBUG_ACK_RUN_MODE_SHIFT) +#define GLB_DEBUG_ACK_RUN_MODE_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) | \ + (((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK)) + +/* RUN_MODE values */ +#define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0 +#define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1 +/* End of RUN_MODE values */ + #endif /* _KBASE_CSF_REGISTERS_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c index cab2ebb..282f7e2 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.c +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c @@ -553,7 +553,7 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) * updated whilst gpu_idle_worker() is executing. */ scheduler->fast_gpu_idle_handling = - (kbdev->csf.gpu_idle_hysteresis_ms == 0) || + (kbdev->csf.gpu_idle_hysteresis_us == 0) || !kbase_csf_scheduler_all_csgs_idle(kbdev); /* The GPU idle worker relies on update_on_slot_queues_offsets() to have @@ -2297,7 +2297,7 @@ static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, insert_group_to_idle_wait(group); } -static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group) +static void update_offslot_non_idle_cnt(struct kbase_queue_group *group) { struct kbase_device *kbdev = group->kctx->kbdev; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; @@ -2789,7 +2789,7 @@ static void remove_scheduled_group(struct kbase_device *kbdev, } static void sched_evict_group(struct kbase_queue_group *group, bool fault, - bool update_non_idle_offslot_grps_cnt) + bool update_non_idle_offslot_grps_cnt_from_run_state) { struct kbase_context *kctx = group->kctx; struct kbase_device *kbdev = kctx->kbdev; @@ -2800,7 +2800,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, if (queue_group_scheduled_locked(group)) { u32 i; - if (update_non_idle_offslot_grps_cnt && + if (update_non_idle_offslot_grps_cnt_from_run_state && (group->run_state == KBASE_CSF_GROUP_SUSPENDED || group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { int new_val = atomic_dec_return( @@ -2815,8 +2815,11 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, } if (group->prepared_seq_num != - KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) + KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) { + if (!update_non_idle_offslot_grps_cnt_from_run_state) + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); + } if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) remove_group_from_idle_wait(group); @@ -3222,8 +3225,7 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, scheduler->remaining_tick_slots--; } } else { - update_offslot_non_idle_cnt_for_faulty_grp( - group); + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); } } @@ -3413,8 +3415,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) */ clear_bit(i, slot_mask); set_bit(i, scheduler->csgs_events_enable_mask); - update_offslot_non_idle_cnt_for_onslot_grp( - group); } suspend_wait_failed = true; @@ -3874,11 +3874,16 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, struct kbase_queue_group *const input_grp) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf; unsigned long flags; bool protm_in_use; lockdep_assert_held(&scheduler->lock); + /* Return early if the physical pages have not been allocated yet */ + if (unlikely(!sbuf->pma)) + return; + /* This lock is taken to prevent the issuing of MMU command during the * transition to protected mode. This helps avoid the scenario where the * entry to protected mode happens with a memory region being locked and @@ -4049,8 +4054,7 @@ static void scheduler_apply(struct kbase_device *kbdev) if (!kctx_as_enabled(group->kctx) || group->faulted) { /* Drop the head group and continue */ - update_offslot_non_idle_cnt_for_faulty_grp( - group); + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); continue; } @@ -4329,6 +4333,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, set_bit(i, csg_bitmap); } else { group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); } } @@ -5165,16 +5171,12 @@ redo_local_tock: * queue jobs. */ if (protm_grp && scheduler->top_grp == protm_grp) { - int new_val; - dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", protm_grp->handle); - new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, protm_grp, - new_val); - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + update_offslot_non_idle_cnt_for_onslot_grp(protm_grp); + remove_scheduled_group(kbdev, protm_grp); scheduler_check_pmode_progress(kbdev); } else if (scheduler->top_grp) { if (protm_grp) @@ -5988,8 +5990,11 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) mutex_lock(&scheduler->lock); - if (group->run_state == KBASE_CSF_GROUP_IDLE) + if (group->run_state == KBASE_CSF_GROUP_IDLE) { group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); + } /* Check if the group is now eligible for execution in protected mode. */ if (scheduler_get_protm_enter_async_group(kbdev, group)) scheduler_group_check_protm_enter(kbdev, group); @@ -6257,6 +6262,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) int priority; int err; + kbase_ctx_sched_init_ctx(kctx); + for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++priority) { INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]); @@ -6273,7 +6280,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) if (!kctx->csf.sched.sync_update_wq) { dev_err(kctx->kbdev->dev, "Failed to initialize scheduler context workqueue"); - return -ENOMEM; + err = -ENOMEM; + goto alloc_wq_failed; } INIT_WORK(&kctx->csf.sched.sync_update_work, @@ -6286,10 +6294,16 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) if (err) { dev_err(kctx->kbdev->dev, "Failed to register a sync update callback"); - destroy_workqueue(kctx->csf.sched.sync_update_wq); + goto event_wait_add_failed; } return err; + +event_wait_add_failed: + destroy_workqueue(kctx->csf.sched.sync_update_wq); +alloc_wq_failed: + kbase_ctx_sched_remove_ctx(kctx); + return err; } void kbase_csf_scheduler_context_term(struct kbase_context *kctx) @@ -6297,6 +6311,8 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); cancel_work_sync(&kctx->csf.sched.sync_update_work); destroy_workqueue(kctx->csf.sched.sync_update_wq); + + kbase_ctx_sched_remove_ctx(kctx); } int kbase_csf_scheduler_init(struct kbase_device *kbdev) diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c index 909362d..14d8097 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c @@ -101,7 +101,7 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk( * @kctx: kbase context the chunk belongs to. * @chunk: The chunk whose external mappings are going to be removed. * - * This function marks the region as DONT NEED. Along with KBASE_REG_NO_USER_FREE, this indicates + * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other * parts of kbase outside of tiler heap management should not take references on its physical * pages, and should not modify them. @@ -227,12 +227,14 @@ static void remove_unlinked_chunk(struct kbase_context *kctx, kbase_gpu_vm_lock(kctx); kbase_vunmap(kctx, &chunk->map); /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT - * regions), and so we must clear that flag too before freeing + * regions), and so we must clear that flag too before freeing. + * For "no user free", we check that the refcount is 1 as it is a shrinkable region; + * no other code part within kbase can take a reference to it. */ + WARN_ON(chunk->region->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, chunk->region); #if !defined(CONFIG_MALI_VECTOR_DUMP) - chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED); -#else - chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; + chunk->region->flags &= ~KBASE_REG_DONT_NEED; #endif kbase_mem_free_region(kctx, chunk->region); kbase_gpu_vm_unlock(kctx); @@ -297,7 +299,7 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * kbase_gpu_vm_lock(kctx); - /* Some checks done here as KBASE_REG_NO_USER_FREE still allows such things to be made + /* Some checks done here as NO_USER_FREE still allows such things to be made * whilst we had dropped the region lock */ if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) { @@ -305,32 +307,45 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * goto unroll_region; } + /* There is a race condition with regard to KBASE_REG_DONT_NEED, where another + * thread can have the "no user free" refcount increased between kbase_mem_alloc + * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by + * remove_external_chunk_mappings (below). + * + * It should be fine and not a security risk if we let the region leak till + * region tracker termination in such a case. + */ + if (unlikely(chunk->region->no_user_free_refcnt > 1)) { + dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_refcnt > 1!\n"); + goto unroll_region; + } + /* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE * being requested, it's useful to document in code what those restrictions are, and ensure * they remain in place in future. */ if (WARN(!chunk->region->gpu_alloc, - "KBASE_REG_NO_USER_FREE chunks should not have had their alloc freed")) { + "NO_USER_FREE chunks should not have had their alloc freed")) { goto unroll_region; } if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE, - "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) { + "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) { goto unroll_region; } if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC), - "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) { + "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) { goto unroll_region; } if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED), - "KBASE_REG_NO_USER_FREE chunks should not have been made ephemeral")) { + "NO_USER_FREE chunks should not have been made ephemeral")) { goto unroll_region; } if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1, - "KBASE_REG_NO_USER_FREE chunks should not have been aliased")) { + "NO_USER_FREE chunks should not have been aliased")) { goto unroll_region; } @@ -344,16 +359,21 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * remove_external_chunk_mappings(kctx, chunk); kbase_gpu_vm_unlock(kctx); + /* If page migration is enabled, we don't want to migrate tiler heap pages. + * This does not change if the constituent pages are already marked as isolated. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE); + return chunk; unroll_region: /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT * regions), and so we must clear that flag too before freeing. */ + kbase_va_region_no_user_free_put(kctx, chunk->region); #if !defined(CONFIG_MALI_VECTOR_DUMP) - chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED); -#else - chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; + chunk->region->flags &= ~KBASE_REG_DONT_NEED; #endif kbase_mem_free_region(kctx, chunk->region); kbase_gpu_vm_unlock(kctx); @@ -511,7 +531,7 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap) if (heap->buf_desc_reg) { kbase_vunmap(kctx, &heap->buf_desc_map); kbase_gpu_vm_lock(kctx); - heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg); kbase_gpu_vm_unlock(kctx); } @@ -629,8 +649,8 @@ static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *co return false; } - if (!(reg->flags & KBASE_REG_CPU_RD) || (reg->flags & KBASE_REG_DONT_NEED) || - (reg->flags & KBASE_REG_PF_GROW) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC)) { + if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) || + (reg->flags & KBASE_REG_PF_GROW)) { dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags); return false; } @@ -719,14 +739,17 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_ /* If we don't prevent userspace from unmapping this, we may run into * use-after-free, as we don't check for the existence of the region throughout. */ - buf_desc_reg->flags |= KBASE_REG_NO_USER_FREE; heap->buf_desc_va = buf_desc_va; - heap->buf_desc_reg = buf_desc_reg; + heap->buf_desc_reg = kbase_va_region_no_user_free_get(kctx, buf_desc_reg); vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE, KBASE_REG_CPU_RD, &heap->buf_desc_map, KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); + + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE); + kbase_gpu_vm_unlock(kctx); if (unlikely(!vmap_ptr)) { @@ -811,7 +834,7 @@ heap_context_alloc_failed: buf_desc_vmap_failed: if (heap->buf_desc_reg) { kbase_gpu_vm_lock(kctx); - heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg); kbase_gpu_vm_unlock(kctx); } buf_desc_not_suitable: @@ -866,6 +889,25 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, return err; } +/** + * validate_allocation_request - Check whether the chunk allocation request + * received on tiler OOM should be handled at + * current time. + * + * @heap: The tiler heap the OOM is associated with + * @nr_in_flight: Number of fragment jobs in flight + * @pending_frag_count: Number of pending fragment jobs + * + * Context: must hold the tiler heap lock to guarantee its lifetime + * + * Return: + * * 0 - allowed to allocate an additional chunk + * * -EINVAL - invalid + * * -EBUSY - there are fragment jobs still in flight, which may free chunks + * after completing + * * -ENOMEM - the targeted number of in-flight chunks has been reached and + * no new ones will be allocated + */ static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight, u32 pending_frag_count) { diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c index bcab31d..069e827 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c @@ -346,7 +346,11 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) reclaim->batch = HEAP_SHRINKER_BATCH; #if !defined(CONFIG_MALI_VECTOR_DUMP) +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE register_shrinker(reclaim); +#else + register_shrinker(reclaim, "mali-csf-tiler-heap"); +#endif #endif } diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c index ddd2fa8..6e7c64b 100644 --- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c @@ -149,9 +149,6 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) dev_dbg(kbdev->dev, "Doorbell mirror interrupt received"); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -#ifdef CONFIG_MALI_DEBUG - WARN_ON(!kbase_csf_scheduler_get_nr_active_csgs(kbdev)); -#endif kbase_pm_disable_db_mirror_interrupt(kbdev); kbdev->pm.backend.exit_gpu_sleep_mode = true; kbase_csf_scheduler_invoke_tick(kbdev); diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c index 5a12b32..4f5ac22 100644 --- a/mali_kbase/device/mali_kbase_device.c +++ b/mali_kbase/device/mali_kbase_device.c @@ -321,6 +321,10 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) "Unable to register OOM notifier for Mali - but will continue\n"); kbdev->oom_notifier_block.notifier_call = NULL; } + +#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) + atomic_set(&kbdev->live_fence_metadata, 0); +#endif return 0; term_as: @@ -344,6 +348,11 @@ void kbase_device_misc_term(struct kbase_device *kbdev) if (kbdev->oom_notifier_block.notifier_call) unregister_oom_notifier(&kbdev->oom_notifier_block); + +#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) + if (atomic_read(&kbdev->live_fence_metadata) > 0) + dev_warn(kbdev->dev, "Terminating Kbase device with live fence metadata!"); +#endif } void kbase_device_free(struct kbase_device *kbdev) diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c index a3a0e02..a2ecd08 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -345,7 +345,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( /* Update MMU table */ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys, num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, - mmu_sync_info); + mmu_sync_info, NULL, false); if (ret) goto mmu_insert_failed; @@ -480,7 +480,7 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys, - fw_ring_buf->num_pages, MCU_AS_NR)); + fw_ring_buf->num_pages, MCU_AS_NR, true)); vunmap(fw_ring_buf->cpu_dump_base); diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c index 43cdf18..21b4e52 100644 --- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c +++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c @@ -23,10 +23,13 @@ #include "mali_kbase.h" /* MEMSYS counter block offsets */ +#define L2_RD_MSG_IN_CU (13) #define L2_RD_MSG_IN (16) #define L2_WR_MSG_IN (18) +#define L2_SNP_MSG_IN (20) #define L2_RD_MSG_OUT (22) #define L2_READ_LOOKUP (26) +#define L2_EXT_READ_NOSNP (30) #define L2_EXT_WRITE_NOSNP_FULL (43) /* SC counter block offsets */ @@ -36,17 +39,23 @@ #define FULL_QUAD_WARPS (21) #define EXEC_INSTR_FMA (27) #define EXEC_INSTR_CVT (28) +#define EXEC_INSTR_SFU (29) #define EXEC_INSTR_MSG (30) #define TEX_FILT_NUM_OPS (39) #define LS_MEM_READ_SHORT (45) #define LS_MEM_WRITE_SHORT (47) #define VARY_SLOT_16 (51) +#define BEATS_RD_LSC_EXT (57) +#define BEATS_RD_TEX (58) +#define BEATS_RD_TEX_EXT (59) +#define FRAG_QUADS_COARSE (68) /* Tiler counter block offsets */ #define IDVS_POS_SHAD_STALL (23) #define PREFETCH_STALL (25) #define VFETCH_POS_READ_WAIT (29) #define VFETCH_VERTEX_WAIT (30) +#define PRIMASSY_STALL (32) #define IDVS_VAR_SHAD_STALL (38) #define ITER_STALL (40) #define PMGR_PTR_RD_STALL (48) @@ -111,6 +120,15 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = { TILER_COUNTER_DEF("vfetch_vertex_wait", -391964, VFETCH_VERTEX_WAIT), }; +static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttix[] = { + TILER_COUNTER_DEF("primassy_stall", 471953, PRIMASSY_STALL), + TILER_COUNTER_DEF("idvs_var_shad_stall", -460559, IDVS_VAR_SHAD_STALL), + + MEMSYS_COUNTER_DEF("l2_rd_msg_in_cu", -6189604, L2_RD_MSG_IN_CU), + MEMSYS_COUNTER_DEF("l2_snp_msg_in", 6289609, L2_SNP_MSG_IN), + MEMSYS_COUNTER_DEF("l2_ext_read_nosnp", 512341, L2_EXT_READ_NOSNP), +}; + /* These tables provide a description of each performance counter * used by the shader cores counter model for energy estimation. */ @@ -150,6 +168,17 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = { SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE), }; +static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttix[] = { + SC_COUNTER_DEF("exec_instr_fma", 192642, EXEC_INSTR_FMA), + SC_COUNTER_DEF("exec_instr_msg", 1326465, EXEC_INSTR_MSG), + SC_COUNTER_DEF("beats_rd_tex", 163518, BEATS_RD_TEX), + SC_COUNTER_DEF("beats_rd_lsc_ext", 127475, BEATS_RD_LSC_EXT), + SC_COUNTER_DEF("frag_quads_coarse", -36247, FRAG_QUADS_COARSE), + SC_COUNTER_DEF("ls_mem_write_short", 51547, LS_MEM_WRITE_SHORT), + SC_COUNTER_DEF("beats_rd_tex_ext", -43370, BEATS_RD_TEX_EXT), + SC_COUNTER_DEF("exec_instr_sfu", 31583, EXEC_INSTR_SFU), +}; + #define IPA_POWER_MODEL_OPS(gpu, init_token) \ const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ .name = "mali-" #gpu "-power-model", \ @@ -181,13 +210,13 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = { #define ALIAS_POWER_MODEL(gpu, as_gpu) \ IPA_POWER_MODEL_OPS(gpu, as_gpu) -/* Reference voltage value is 750 mV. - */ +/* Reference voltage value is 750 mV. */ STANDARD_POWER_MODEL(todx, 750); STANDARD_POWER_MODEL(tgrx, 750); STANDARD_POWER_MODEL(tvax, 750); - STANDARD_POWER_MODEL(ttux, 750); +/* Reference voltage value is 550 mV. */ +STANDARD_POWER_MODEL(ttix, 550); /* Assuming LODX is an alias of TODX for IPA */ ALIAS_POWER_MODEL(lodx, todx); @@ -195,10 +224,14 @@ ALIAS_POWER_MODEL(lodx, todx); /* Assuming LTUX is an alias of TTUX for IPA */ ALIAS_POWER_MODEL(ltux, ttux); +/* Assuming LTUX is an alias of TTUX for IPA */ +ALIAS_POWER_MODEL(ltix, ttix); + static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { &kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops, &kbase_tgrx_ipa_model_ops, &kbase_tvax_ipa_model_ops, - &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops + &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops, + &kbase_ttix_ipa_model_ops, &kbase_ltix_ipa_model_ops, }; const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( @@ -237,6 +270,10 @@ const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) return "mali-ttux-power-model"; case GPU_ID2_PRODUCT_LTUX: return "mali-ltux-power-model"; + case GPU_ID2_PRODUCT_TTIX: + return "mali-ttix-power-model"; + case GPU_ID2_PRODUCT_LTIX: + return "mali-ltix-power-model"; default: return NULL; } diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h index a360984..35c3828 100644 --- a/mali_kbase/mali_base_hwconfig_issues.h +++ b/mali_kbase/mali_base_hwconfig_issues.h @@ -596,7 +596,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDU }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, @@ -606,8 +605,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0 }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, @@ -617,7 +614,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tOD }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, @@ -626,8 +622,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0 }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, @@ -636,7 +630,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, @@ -645,8 +638,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0 }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, @@ -655,7 +646,17 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVA }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { - BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TURSEHW_1997, + BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_END +}; + +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_1997, @@ -667,8 +668,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0 }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, @@ -679,7 +678,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTU }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, @@ -690,7 +688,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0 }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, @@ -701,7 +698,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1 }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, @@ -712,8 +708,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2 }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, @@ -722,7 +716,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTI }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index 4522d6c..9c867d1 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -663,8 +663,10 @@ static int kbase_open(struct inode *inode, struct file *filp) if (!kbdev) return -ENODEV; - /* Set address space operation for page migration */ +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + /* Set address space operations for page migration */ kbase_mem_migrate_set_address_space_ops(kbdev, filp); +#endif /* Device-wide firmware load is moved here from probing to comply with * Android GKI vendor guideline. @@ -4578,8 +4580,18 @@ int power_control_init(struct kbase_device *kbdev) * from completing its initialization. */ #if defined(CONFIG_PM_OPP) -#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ - defined(CONFIG_REGULATOR)) +#if defined(CONFIG_REGULATOR) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + if (kbdev->nr_regulators > 0) { + kbdev->token = dev_pm_opp_set_regulators(kbdev->dev, regulator_names); + + if (kbdev->token < 0) { + err = kbdev->token; + goto regulators_probe_defer; + } + + } +#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) if (kbdev->nr_regulators > 0) { kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev, regulator_names, BASE_MAX_NR_CLOCKS_REGULATORS); @@ -4589,7 +4601,8 @@ int power_control_init(struct kbase_device *kbdev) goto regulators_probe_defer; } } -#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ +#endif /* CONFIG_REGULATOR */ err = dev_pm_opp_of_add_table(kbdev->dev); CSTD_UNUSED(err); #endif /* CONFIG_PM_OPP */ @@ -4624,11 +4637,15 @@ void power_control_term(struct kbase_device *kbdev) #if defined(CONFIG_PM_OPP) dev_pm_opp_of_remove_table(kbdev->dev); -#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ - defined(CONFIG_REGULATOR)) +#if defined(CONFIG_REGULATOR) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + if (kbdev->token > -EPERM) + dev_pm_opp_put_regulators(kbdev->token); +#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) if (!IS_ERR_OR_NULL(kbdev->opp_table)) dev_pm_opp_put_regulators(kbdev->opp_table); -#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ +#endif /* CONFIG_REGULATOR */ #endif /* CONFIG_PM_OPP */ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { @@ -5491,6 +5508,11 @@ static int kbase_platform_device_probe(struct platform_device *pdev) } kbdev->dev = &pdev->dev; + +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + kbdev->token = -EPERM; +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ + dev_set_drvdata(kbdev->dev, kbdev); #if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) mutex_lock(&kbase_probe_mutex); diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c index 60afde2..3e58500 100644 --- a/mali_kbase/mali_kbase_ctx_sched.c +++ b/mali_kbase/mali_kbase_ctx_sched.c @@ -69,6 +69,12 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev) } } +void kbase_ctx_sched_init_ctx(struct kbase_context *kctx) +{ + kctx->as_nr = KBASEP_AS_NR_INVALID; + atomic_set(&kctx->refcount, 0); +} + /* kbasep_ctx_sched_find_as_for_ctx - Find a free address space * * @kbdev: The context for which to find a free address space @@ -201,9 +207,10 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) { struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; - lockdep_assert_held(&kbdev->mmu_hw_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(atomic_read(&kctx->refcount) != 0); @@ -215,6 +222,9 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) kbdev->as_to_kctx[kctx->as_nr] = NULL; kctx->as_nr = KBASEP_AS_NR_INVALID; } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); } void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) diff --git a/mali_kbase/mali_kbase_ctx_sched.h b/mali_kbase/mali_kbase_ctx_sched.h index f787cc3..5a8d175 100644 --- a/mali_kbase/mali_kbase_ctx_sched.h +++ b/mali_kbase/mali_kbase_ctx_sched.h @@ -60,6 +60,15 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev); void kbase_ctx_sched_term(struct kbase_device *kbdev); /** + * kbase_ctx_sched_ctx_init - Initialize per-context data fields for scheduling + * @kctx: The context to initialize + * + * This must be called during context initialization before any other context + * scheduling functions are called on @kctx + */ +void kbase_ctx_sched_init_ctx(struct kbase_context *kctx); + +/** * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context * @kctx: The context to which to retain a reference * @@ -113,9 +122,6 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); * This function should be called when a context is being destroyed. The * context must no longer have any reference. If it has been assigned an * address space before then the AS will be unprogrammed. - * - * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be - * held whilst calling this function. */ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index 48f8795..722ffc7 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -643,7 +643,6 @@ struct kbase_process { * struct kbase_mem_migrate - Object representing an instance for managing * page migration. * - * @mapping: Pointer to address space struct used for page migration. * @free_pages_list: List of deferred pages to free. Mostly used when page migration * is enabled. Pages in memory pool that require migrating * will be freed instead. However page cannot be freed @@ -654,13 +653,17 @@ struct kbase_process { * @free_pages_workq: Work queue to process the work items queued to free * pages in @free_pages_list. * @free_pages_work: Work item to free pages in @free_pages_list. + * @inode: Pointer to inode whose address space operations are used + * for page migration purposes. */ struct kbase_mem_migrate { - struct address_space *mapping; struct list_head free_pages_list; spinlock_t free_pages_lock; struct workqueue_struct *free_pages_workq; struct work_struct free_pages_work; +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + struct inode *inode; +#endif }; /** @@ -701,6 +704,10 @@ struct kbase_mem_migrate { * @opp_table: Pointer to the device OPP structure maintaining the * link to OPPs attached to a device. This is obtained * after setting regulator names for the device. + * @token: Integer replacement for opp_table in kernel versions + * 6 and greater. Value is a token id number when 0 or greater, + * and a linux errno when negative. Must be initialised + * to an non-zero value as 0 is valid token id. * @devname: string containing the name used for GPU device instance, * miscellaneous device is registered using the same name. * @id: Unique identifier for the device, indicates the number of @@ -898,6 +905,10 @@ struct kbase_mem_migrate { * GPU2019-3878. PM state machine is invoked after * clearing this flag and @hwaccess_lock is used to * serialize the access. + * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction + * and cleared after the transaction completes. PM L2 state is + * prevented from entering powering up/down transitions when the + * flag is set, @hwaccess_lock is used to serialize the access. * @poweroff_pending: Set when power off operation for GPU is started, reset when * power on for GPU is started. * @infinite_cache_active_default: Set to enable using infinite cache for all the @@ -978,6 +989,10 @@ struct kbase_mem_migrate { * @oom_notifier_block: notifier_block containing kernel-registered out-of- * memory handler. * @mem_migrate: Per device object for managing page migration. + * @live_fence_metadata: Count of live fence metadata structures created by + * KCPU queue. These structures may outlive kbase module + * itself. Therefore, in such a case, a warning should be + * be produced. */ struct kbase_device { u32 hw_quirks_sc; @@ -1002,9 +1017,11 @@ struct kbase_device { #if IS_ENABLED(CONFIG_REGULATOR) struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; unsigned int nr_regulators; -#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + int token; +#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) struct opp_table *opp_table; -#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ #endif /* CONFIG_REGULATOR */ char devname[DEVNAME_SIZE]; u32 id; @@ -1173,6 +1190,7 @@ struct kbase_device { #if MALI_USE_CSF bool mmu_hw_operation_in_progress; #endif + bool mmu_page_migrate_in_progress; bool poweroff_pending; bool infinite_cache_active_default; @@ -1261,6 +1279,10 @@ struct kbase_device { struct kbase_mem_migrate mem_migrate; + +#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) + atomic_t live_fence_metadata; +#endif }; /** diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h index dfe33e5..25986f6 100644 --- a/mali_kbase/mali_kbase_fence.h +++ b/mali_kbase/mali_kbase_fence.h @@ -33,6 +33,49 @@ #include "mali_kbase_fence_defs.h" #include "mali_kbase.h" +#if MALI_USE_CSF +/* Maximum number of characters in DMA fence timeline name. */ +#define MAX_TIMELINE_NAME (32) + +/** + * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing + * information about KCPU queue. One instance per KCPU + * queue. + * + * @refcount: Atomic value to keep track of number of references to an instance. + * An instance can outlive the KCPU queue itself. + * @kbdev: Pointer to Kbase device. + * @kctx_id: Kbase context ID. + * @timeline_name: String of timeline name for associated fence object. + */ +struct kbase_kcpu_dma_fence_meta { +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + atomic_t refcount; +#else + refcount_t refcount; +#endif + struct kbase_device *kbdev; + int kctx_id; + char timeline_name[MAX_TIMELINE_NAME]; +}; + +/** + * struct kbase_kcpu_dma_fence - Structure which extends a dma fence object to include a + * reference to metadata containing more informaiton about it. + * + * @base: Fence object itself. + * @metadata: Pointer to metadata structure. + */ +struct kbase_kcpu_dma_fence { +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence base; +#else + struct dma_fence base; +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ + struct kbase_kcpu_dma_fence_meta *metadata; +}; +#endif + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) extern const struct fence_ops kbase_fence_ops; #else @@ -167,12 +210,56 @@ static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, */ #define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence) +#if MALI_USE_CSF +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct fence *fence) +#else +static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_fence *fence) +#endif +{ + if (fence->ops == &kbase_fence_ops) + return (struct kbase_kcpu_dma_fence *)fence; + + return NULL; +} + +static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata) +{ +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + if (atomic_dec_and_test(&metadata->refcount)) { +#else + if (refcount_dec_and_test(&metadata->refcount)) { +#endif + atomic_dec(&metadata->kbdev->live_fence_metadata); + kfree(metadata); + } +} + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +static inline void kbase_kcpu_dma_fence_put(struct fence *fence) +#else +static inline void kbase_kcpu_dma_fence_put(struct dma_fence *fence) +#endif +{ + struct kbase_kcpu_dma_fence *kcpu_fence = kbase_kcpu_dma_fence_get(fence); + + if (kcpu_fence) + kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata); +} +#endif /* MALI_USE_CSF */ + /** * kbase_fence_put() - Releases a reference to a fence * @fence: Fence to release reference for. */ -#define kbase_fence_put(fence) dma_fence_put(fence) - +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +static inline void kbase_fence_put(struct fence *fence) +#else +static inline void kbase_fence_put(struct dma_fence *fence) +#endif +{ + dma_fence_put(fence); +} #endif /* IS_ENABLED(CONFIG_SYNC_FILE) */ diff --git a/mali_kbase/mali_kbase_fence_ops.c b/mali_kbase/mali_kbase_fence_ops.c index be14155..dd0b63e 100644 --- a/mali_kbase/mali_kbase_fence_ops.c +++ b/mali_kbase/mali_kbase_fence_ops.c @@ -21,7 +21,7 @@ #include <linux/atomic.h> #include <linux/list.h> -#include <mali_kbase_fence_defs.h> +#include <mali_kbase_fence.h> #include <mali_kbase.h> static const char * @@ -41,7 +41,13 @@ kbase_fence_get_timeline_name(struct fence *fence) kbase_fence_get_timeline_name(struct dma_fence *fence) #endif { +#if MALI_USE_CSF + struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; + + return kcpu_fence->metadata->timeline_name; +#else return kbase_timeline_name; +#endif /* MALI_USE_CSF */ } static bool @@ -68,18 +74,36 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) #endif } +#if MALI_USE_CSF +static void +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +kbase_fence_release(struct fence *fence) +#else +kbase_fence_release(struct dma_fence *fence) +#endif +{ + struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; + + kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata); + kfree(kcpu_fence); +} +#endif + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) extern const struct fence_ops kbase_fence_ops; /* silence checker warning */ -const struct fence_ops kbase_fence_ops = { - .wait = fence_default_wait, +const struct fence_ops kbase_fence_ops = { .wait = fence_default_wait, #else extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */ -const struct dma_fence_ops kbase_fence_ops = { - .wait = dma_fence_default_wait, +const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait, +#endif + .get_driver_name = kbase_fence_get_driver_name, + .get_timeline_name = kbase_fence_get_timeline_name, + .enable_signaling = kbase_fence_enable_signaling, +#if MALI_USE_CSF + .fence_value_str = kbase_fence_fence_value_str, + .release = kbase_fence_release +#else + .fence_value_str = kbase_fence_fence_value_str #endif - .get_driver_name = kbase_fence_get_driver_name, - .get_timeline_name = kbase_fence_get_timeline_name, - .enable_signaling = kbase_fence_enable_signaling, - .fence_value_str = kbase_fence_fence_value_str }; - +KBASE_EXPORT_TEST_API(kbase_fence_ops); diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c index b6a8a2e..bb079c2 100644 --- a/mali_kbase/mali_kbase_hw.c +++ b/mali_kbase/mali_kbase_hw.c @@ -235,6 +235,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( { GPU_ID2_PRODUCT_TTUX, { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTUx_r0p1 }, { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c index 86d311a..491bc06 100644 --- a/mali_kbase/mali_kbase_js.c +++ b/mali_kbase/mali_kbase_js.c @@ -645,6 +645,8 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx) KBASE_DEBUG_ASSERT(kctx != NULL); + kbase_ctx_sched_init_ctx(kctx); + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); @@ -722,6 +724,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&kbdev->js_data.runpool_mutex); } + + kbase_ctx_sched_remove_ctx(kctx); } /* @@ -4030,4 +4034,3 @@ base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio pr req_priority); return kbasep_js_sched_prio_to_atom_prio(kbdev, out_priority); } - diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c index f67e00c..ca74540 100644 --- a/mali_kbase/mali_kbase_kinstr_jm.c +++ b/mali_kbase/mali_kbase_kinstr_jm.c @@ -48,6 +48,11 @@ #include <linux/version_compat_defs.h> #include <linux/wait.h> +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include <uapi/linux/eventpoll.h> +#endif + /* Define static_assert(). * * The macro was introduced in kernel 5.1. But older vendor kernels may define diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c index 7aa0ce9..ef9d224 100644 --- a/mali_kbase/mali_kbase_kinstr_prfcnt.c +++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c @@ -39,6 +39,11 @@ #include <linux/version_compat_defs.h> #include <linux/workqueue.h> +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include <uapi/linux/eventpoll.h> +#endif + /* The minimum allowed interval between dumps, in nanoseconds * (equivalent to 10KHz) */ @@ -128,6 +133,34 @@ struct kbase_kinstr_prfcnt_async { }; /** + * enum kbase_kinstr_prfcnt_client_init_state - A list of + * initialisation states that the + * kinstr_prfcnt client can be at + * during initialisation. Useful + * for terminating a partially + * initialised client. + * + * @KINSTR_PRFCNT_UNINITIALISED : Client is uninitialised + * @KINSTR_PRFCNT_PARSE_SETUP : Parse the setup session + * @KINSTR_PRFCNT_ENABLE_MAP : Allocate memory for enable map + * @KINSTR_PRFCNT_DUMP_BUFFER : Allocate memory for dump buffer + * @KINSTR_PRFCNT_SAMPLE_ARRAY : Allocate memory for and initialise sample array + * @KINSTR_PRFCNT_VIRTUALIZER_CLIENT : Create virtualizer client + * @KINSTR_PRFCNT_WAITQ_MUTEX : Create and initialise mutex and waitqueue + * @KINSTR_PRFCNT_INITIALISED : Client is fully initialised + */ +enum kbase_kinstr_prfcnt_client_init_state { + KINSTR_PRFCNT_UNINITIALISED, + KINSTR_PRFCNT_PARSE_SETUP = KINSTR_PRFCNT_UNINITIALISED, + KINSTR_PRFCNT_ENABLE_MAP, + KINSTR_PRFCNT_DUMP_BUFFER, + KINSTR_PRFCNT_SAMPLE_ARRAY, + KINSTR_PRFCNT_VIRTUALIZER_CLIENT, + KINSTR_PRFCNT_WAITQ_MUTEX, + KINSTR_PRFCNT_INITIALISED +}; + +/** * struct kbase_kinstr_prfcnt_client - A kinstr_prfcnt client attached * to a kinstr_prfcnt context. * @kinstr_ctx: kinstr_prfcnt context client is attached to. @@ -1163,19 +1196,46 @@ static void kbasep_kinstr_prfcnt_sample_array_free( memset(sample_arr, 0, sizeof(*sample_arr)); } -void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) +static void +kbasep_kinstr_prfcnt_client_destroy_partial(struct kbase_kinstr_prfcnt_client *cli, + enum kbase_kinstr_prfcnt_client_init_state init_state) { if (!cli) return; - kbase_hwcnt_virtualizer_client_destroy(cli->hvcli); - kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr); - kbase_hwcnt_dump_buffer_free(&cli->tmp_buf); - kbase_hwcnt_enable_map_free(&cli->enable_map); - mutex_destroy(&cli->cmd_sync_lock); + while (init_state-- > KINSTR_PRFCNT_UNINITIALISED) { + switch (init_state) { + case KINSTR_PRFCNT_INITIALISED: + /* This shouldn't be reached */ + break; + case KINSTR_PRFCNT_WAITQ_MUTEX: + mutex_destroy(&cli->cmd_sync_lock); + break; + case KINSTR_PRFCNT_VIRTUALIZER_CLIENT: + kbase_hwcnt_virtualizer_client_destroy(cli->hvcli); + break; + case KINSTR_PRFCNT_SAMPLE_ARRAY: + kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr); + break; + case KINSTR_PRFCNT_DUMP_BUFFER: + kbase_hwcnt_dump_buffer_free(&cli->tmp_buf); + break; + case KINSTR_PRFCNT_ENABLE_MAP: + kbase_hwcnt_enable_map_free(&cli->enable_map); + break; + case KINSTR_PRFCNT_PARSE_SETUP: + /* Nothing to do here */ + break; + } + } kfree(cli); } +void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) +{ + kbasep_kinstr_prfcnt_client_destroy_partial(cli, KINSTR_PRFCNT_INITIALISED); +} + /** * kbasep_kinstr_prfcnt_hwcnt_reader_release() - hwcnt reader's release. * @inode: Non-NULL pointer to inode structure. @@ -1790,6 +1850,7 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst { int err; struct kbase_kinstr_prfcnt_client *cli; + enum kbase_kinstr_prfcnt_client_init_state init_state; WARN_ON(!kinstr_ctx); WARN_ON(!setup); @@ -1800,73 +1861,86 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst if (!cli) return -ENOMEM; - cli->kinstr_ctx = kinstr_ctx; - err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, req_arr); - - if (err < 0) - goto error; + for (init_state = KINSTR_PRFCNT_UNINITIALISED; init_state < KINSTR_PRFCNT_INITIALISED; + init_state++) { + err = 0; + switch (init_state) { + case KINSTR_PRFCNT_PARSE_SETUP: + cli->kinstr_ctx = kinstr_ctx; + err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, + req_arr); - cli->config.buffer_count = MAX_BUFFER_COUNT; - cli->dump_interval_ns = cli->config.period_ns; - cli->next_dump_time_ns = 0; - cli->active = false; - atomic_set(&cli->write_idx, 0); - atomic_set(&cli->read_idx, 0); - atomic_set(&cli->fetch_idx, 0); + break; - err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, - &cli->enable_map); + case KINSTR_PRFCNT_ENABLE_MAP: + cli->config.buffer_count = MAX_BUFFER_COUNT; + cli->dump_interval_ns = cli->config.period_ns; + cli->next_dump_time_ns = 0; + cli->active = false; + atomic_set(&cli->write_idx, 0); + atomic_set(&cli->read_idx, 0); + atomic_set(&cli->fetch_idx, 0); - if (err < 0) - goto error; + err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, &cli->enable_map); + break; - kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &cli->config.phys_em); + case KINSTR_PRFCNT_DUMP_BUFFER: + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, + &cli->config.phys_em); - cli->sample_count = cli->config.buffer_count; - atomic_set(&cli->sync_sample_count, cli->sample_count); - cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata); + cli->sample_count = cli->config.buffer_count; + atomic_set(&cli->sync_sample_count, cli->sample_count); + cli->sample_size = + kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata); - /* Use virtualizer's metadata to alloc tmp buffer which interacts with - * the HWC virtualizer. - */ - err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, - &cli->tmp_buf); + /* Use virtualizer's metadata to alloc tmp buffer which interacts with + * the HWC virtualizer. + */ + err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, &cli->tmp_buf); + break; - if (err < 0) - goto error; + case KINSTR_PRFCNT_SAMPLE_ARRAY: + /* Disable clock map in setup, and enable clock map when start */ + cli->enable_map.clk_enable_map = 0; - /* Disable clock map in setup, and enable clock map when start */ - cli->enable_map.clk_enable_map = 0; + /* Use metadata from virtualizer to allocate dump buffers if + * kinstr_prfcnt doesn't have the truncated metadata. + */ + err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata); - /* Use metadata from virtualizer to allocate dump buffers if - * kinstr_prfcnt doesn't have the truncated metadata. - */ - err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata); + break; - if (err < 0) - goto error; + case KINSTR_PRFCNT_VIRTUALIZER_CLIENT: + /* Set enable map to be 0 to prevent virtualizer to init and kick the + * backend to count. + */ + kbase_hwcnt_gpu_enable_map_from_physical( + &cli->enable_map, &(struct kbase_hwcnt_physical_enable_map){ 0 }); - /* Set enable map to be 0 to prevent virtualizer to init and kick the backend to count */ - kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, - &(struct kbase_hwcnt_physical_enable_map){ 0 }); + err = kbase_hwcnt_virtualizer_client_create(kinstr_ctx->hvirt, + &cli->enable_map, &cli->hvcli); + break; - err = kbase_hwcnt_virtualizer_client_create( - kinstr_ctx->hvirt, &cli->enable_map, &cli->hvcli); + case KINSTR_PRFCNT_WAITQ_MUTEX: + init_waitqueue_head(&cli->waitq); + INIT_WORK(&cli->async.dump_work, kbasep_kinstr_prfcnt_async_dump_worker); + mutex_init(&cli->cmd_sync_lock); + break; - if (err < 0) - goto error; + case KINSTR_PRFCNT_INITIALISED: + /* This shouldn't be reached */ + break; + } - init_waitqueue_head(&cli->waitq); - INIT_WORK(&cli->async.dump_work, - kbasep_kinstr_prfcnt_async_dump_worker); - mutex_init(&cli->cmd_sync_lock); + if (err < 0) { + kbasep_kinstr_prfcnt_client_destroy_partial(cli, init_state); + return err; + } + } *out_vcli = cli; return 0; -error: - kbasep_kinstr_prfcnt_client_destroy(cli); - return err; } static size_t kbasep_kinstr_prfcnt_get_block_info_count( diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index 3743b4d..abd01c1 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -803,6 +803,40 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx, } #endif /* MALI_USE_CSF */ +static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg) +{ + struct kbase_context *kctx = NULL; + struct rb_root *rbtree = reg->rbtree; + + switch (reg->flags & KBASE_REG_ZONE_MASK) { + case KBASE_REG_ZONE_CUSTOM_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom); + break; + case KBASE_REG_ZONE_SAME_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same); + break; + case KBASE_REG_ZONE_EXEC_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec); + break; +#if MALI_USE_CSF + case KBASE_REG_ZONE_EXEC_FIXED_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); + break; + case KBASE_REG_ZONE_FIXED_VA: + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); + break; + case KBASE_REG_ZONE_MCU_SHARED: + /* This is only expected to be called on driver unload. */ + break; +#endif + default: + WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); + break; + } + + return kctx; +} + static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) { struct rb_node *rbnode; @@ -814,6 +848,8 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) rb_erase(rbnode, rbtree); reg = rb_entry(rbnode, struct kbase_va_region, rblink); WARN_ON(reg->va_refcnt != 1); + if (kbase_page_migration_enabled) + kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg); /* Reset the start_pfn - as the rbtree is being * destroyed and we've already erased this region, there * is no further need to attempt to remove it. @@ -830,6 +866,10 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) void kbase_region_tracker_term(struct kbase_context *kctx) { + WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", + kctx->tgid, kctx->id); + kbase_gpu_vm_lock(kctx); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); @@ -1554,6 +1594,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, return NULL; new_reg->va_refcnt = 1; + new_reg->no_user_free_refcnt = 0; new_reg->cpu_alloc = NULL; /* no alloc bound yet */ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ new_reg->rbtree = rbtree; @@ -1572,41 +1613,6 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, KBASE_EXPORT_TEST_API(kbase_alloc_free_region); -static struct kbase_context *kbase_reg_flags_to_kctx( - struct kbase_va_region *reg) -{ - struct kbase_context *kctx = NULL; - struct rb_root *rbtree = reg->rbtree; - - switch (reg->flags & KBASE_REG_ZONE_MASK) { - case KBASE_REG_ZONE_CUSTOM_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_custom); - break; - case KBASE_REG_ZONE_SAME_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_same); - break; - case KBASE_REG_ZONE_EXEC_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_exec); - break; -#if MALI_USE_CSF - case KBASE_REG_ZONE_EXEC_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); - break; - case KBASE_REG_ZONE_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); - break; -#endif - default: - WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); - break; - } - - return kctx; -} - /** * kbase_free_alloced_region - Free a region object. * @@ -1720,6 +1726,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned long gwt_mask = ~0; int group_id; struct kbase_mem_phy_alloc *alloc; + bool ignore_page_migration = false; #ifdef CONFIG_MALI_CINSTR_GWT if (kctx->gwt_enabled) @@ -1749,15 +1756,12 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, for (i = 0; i < alloc->imported.alias.nents; i++) { if (alloc->imported.alias.aliased[i].alloc) { err = kbase_mmu_insert_pages( - kctx->kbdev, &kctx->mmu, - reg->start_pfn + (i * stride), - alloc->imported.alias.aliased[i] - .alloc->pages + - alloc->imported.alias.aliased[i] - .offset, + kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), + alloc->imported.alias.aliased[i].alloc->pages + + alloc->imported.alias.aliased[i].offset, alloc->imported.alias.aliased[i].length, - reg->flags & gwt_mask, kctx->as_nr, - group_id, mmu_sync_info); + reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, + NULL, ignore_page_migration); if (err) goto bad_insert; @@ -1777,12 +1781,15 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, } } } else { - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM || + reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) + ignore_page_migration = true; + + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, - group_id, mmu_sync_info); + reg->flags & gwt_mask, kctx->as_nr, group_id, + mmu_sync_info, reg, ignore_page_migration); if (err) goto bad_insert; kbase_mem_phy_alloc_gpu_mapped(alloc); @@ -1816,7 +1823,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, bad_insert: kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, - reg->nr_pages, kctx->as_nr); + reg->nr_pages, kctx->as_nr, ignore_page_migration); kbase_remove_va_region(kctx->kbdev, reg); @@ -1845,7 +1852,6 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) switch (alloc->type) { case KBASE_MEM_TYPE_ALIAS: { size_t i = 0; - /* Due to the way the number of valid PTEs and ATEs are tracked * currently, only the GPU virtual range that is backed & mapped * should be passed to the kbase_mmu_teardown_pages() function, @@ -1853,27 +1859,37 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) * separately. */ for (i = 0; i < alloc->imported.alias.nents; i++) { - if (alloc->imported.alias.aliased[i].alloc) { - int err_loop = kbase_mmu_teardown_pages( - kctx->kbdev, &kctx->mmu, - reg->start_pfn + (i * alloc->imported.alias.stride), - alloc->pages + (i * alloc->imported.alias.stride), - alloc->imported.alias.aliased[i].length, - kctx->as_nr); - if (WARN_ON_ONCE(err_loop)) - err = err_loop; - } + struct tagged_addr *phys_alloc = NULL; + int err_loop; + + if (alloc->imported.alias.aliased[i].alloc != NULL) + phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + + alloc->imported.alias.aliased[i].offset; + + err_loop = kbase_mmu_teardown_pages( + kctx->kbdev, &kctx->mmu, + reg->start_pfn + (i * alloc->imported.alias.stride), + phys_alloc, alloc->imported.alias.aliased[i].length, + kctx->as_nr, false); + + if (WARN_ON_ONCE(err_loop)) + err = err_loop; } } break; case KBASE_MEM_TYPE_IMPORTED_UMM: err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, reg->nr_pages, kctx->as_nr); + alloc->pages, reg->nr_pages, kctx->as_nr, true); + break; + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, kbase_reg_current_backed_size(reg), + kctx->as_nr, true); break; default: err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, kbase_reg_current_backed_size(reg), - kctx->as_nr); + kctx->as_nr, false); break; } @@ -2197,7 +2213,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); - if (reg->flags & KBASE_REG_NO_USER_FREE) { + if (kbase_va_region_is_no_user_free(kctx, reg)) { dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); return -EINVAL; } @@ -2416,8 +2432,11 @@ int kbase_update_region_flags(struct kbase_context *kctx, if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING) reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; - if (flags & BASEP_MEM_NO_USER_FREE) - reg->flags |= KBASE_REG_NO_USER_FREE; + if (flags & BASEP_MEM_NO_USER_FREE) { + kbase_gpu_vm_lock(kctx); + kbase_va_region_no_user_free_get(kctx, reg); + kbase_gpu_vm_unlock(kctx); + } if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; @@ -3206,9 +3225,32 @@ out_rollback: out_term: return -1; } - KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); +void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, + enum kbase_page_status status) +{ + u32 i = 0; + + for (; i < alloc->nents; i++) { + struct tagged_addr phys = alloc->pages[i]; + struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys)); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(phys) || is_partial(phys)) + continue; + + if (!page_md) + continue; + + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status); + spin_unlock(&page_md->migrate_lock); + } +} + bool kbase_check_alloc_flags(unsigned long flags) { /* Only known input flags should be set. */ @@ -3766,7 +3808,15 @@ static void kbase_jit_destroy_worker(struct work_struct *work) mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_lock(kctx); - reg->flags &= ~KBASE_REG_NO_USER_FREE; + + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(reg->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, reg); kbase_mem_free_region(kctx, reg); kbase_gpu_vm_unlock(kctx); } while (1); @@ -4419,7 +4469,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, if (ret < 0) { /* * An update to an allocation from the pool failed, - * chances are slim a new allocation would fair any + * chances are slim a new allocation would fare any * better so return the allocation to the pool and * return the function with failure. */ @@ -4441,6 +4491,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, mutex_unlock(&kctx->jit_evict_lock); reg = NULL; goto end; + } else { + /* A suitable JIT allocation existed on the evict list, so we need + * to make sure that the NOT_MOVABLE property is cleared. + */ + if (kbase_page_migration_enabled) { + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED); + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + } } } else { /* No suitable JIT allocation was found so create a new one */ @@ -4497,6 +4558,29 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, } } + /* Similarly to tiler heap init, there is a short window of time + * where the (either recycled or newly allocated, in our case) region has + * "no user free" refcount incremented but is still missing the DONT_NEED flag, and + * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the + * allocation is the least bad option that doesn't lead to a security issue down the + * line (it will eventually be cleaned up during context termination). + * + * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region + * flags. + */ + kbase_gpu_vm_lock(kctx); + if (unlikely(reg->no_user_free_refcnt > 1)) { + kbase_gpu_vm_unlock(kctx); + dev_err(kctx->kbdev->dev, "JIT region has no_user_free_refcnt > 1!\n"); + + mutex_lock(&kctx->jit_evict_lock); + list_move(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_evict_lock); + + reg = NULL; + goto end; + } + trace_mali_jit_alloc(reg, info->id); kctx->jit_current_allocations++; @@ -4514,6 +4598,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, kbase_jit_report_update_pressure(kctx, reg, info->va_pages, KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + kbase_gpu_vm_unlock(kctx); end: for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) @@ -4584,6 +4669,12 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) list_move(®->jit_node, &kctx->jit_pool_head); + /* Inactive JIT regions should be freed by the shrinker and not impacted + * by page migration. Once freed, they will enter into the page migration + * state machine via the mempools. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE); mutex_unlock(&kctx->jit_evict_lock); } @@ -4630,7 +4721,14 @@ bool kbase_jit_evict(struct kbase_context *kctx) mutex_unlock(&kctx->jit_evict_lock); if (reg) { - reg->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(reg->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, reg); kbase_mem_free_region(kctx, reg); } @@ -4652,7 +4750,14 @@ void kbase_jit_term(struct kbase_context *kctx) list_del(&walker->jit_node); list_del_init(&walker->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); - walker->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(walker->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, walker); kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -4664,7 +4769,14 @@ void kbase_jit_term(struct kbase_context *kctx) list_del(&walker->jit_node); list_del_init(&walker->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); - walker->flags &= ~KBASE_REG_NO_USER_FREE; + /* + * Incrementing the refcount is prevented on JIT regions. + * If/when this ever changes we would need to compensate + * by implementing "free on putting the last reference", + * but only for JIT regions. + */ + WARN_ON(walker->no_user_free_refcnt > 1); + kbase_va_region_no_user_free_put(kctx, walker); kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -4922,10 +5034,9 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - pa, kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, - alloc->group_id, mmu_sync_info); + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, + kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true); if (err == 0) return 0; @@ -5113,7 +5224,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, kbase_reg_current_backed_size(reg), - kctx->as_nr); + kctx->as_nr, true); } if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index 7e791b3..f727538 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -193,10 +193,11 @@ struct kbase_mem_phy_alloc { * @SPILL_IN_PROGRESS: Transitory state. Corner case where pages in a memory * pool of a dying context are being moved to the device * memory pool. + * @NOT_MOVABLE: Stable state. Page has been allocated for an object that is + * not movable, but may return to be movable when the object + * is freed. * @ALLOCATED_MAPPED: Stable state. Page has been allocated, mapped to GPU * and has reference to kbase_mem_phy_alloc object. - * @MULTI_MAPPED: Stable state. This state is used to manage all use cases - * where a page may have "unusual" mappings. * @PT_MAPPED: Stable state. Similar to ALLOCATED_MAPPED, but page doesn't * reference kbase_mem_phy_alloc object. Used as a page in MMU * page table. @@ -205,9 +206,11 @@ struct kbase_mem_phy_alloc { * unmapping it. This status means that a memory release is * happening and it's still not complete. * @FREE_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case. - * A page is isolated while it is in ALLOCATED_MAPPED or - * PT_MAPPED state, but then the driver tries to destroy the - * allocation. + * A page is isolated while it is in ALLOCATED_MAPPED state, + * but then the driver tries to destroy the allocation. + * @FREE_PT_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case. + * A page is isolated while it is in PT_MAPPED state, but + * then the driver tries to destroy the allocation. * * Pages can only be migrated in stable states. */ @@ -215,23 +218,32 @@ enum kbase_page_status { MEM_POOL = 0, ALLOCATE_IN_PROGRESS, SPILL_IN_PROGRESS, + NOT_MOVABLE, ALLOCATED_MAPPED, - MULTI_MAPPED, PT_MAPPED, FREE_IN_PROGRESS, FREE_ISOLATED_IN_PROGRESS, + FREE_PT_ISOLATED_IN_PROGRESS, }; +#define PGD_VPFN_LEVEL_MASK ((u64)0x3) +#define PGD_VPFN_LEVEL_GET_LEVEL(pgd_vpfn_level) (pgd_vpfn_level & PGD_VPFN_LEVEL_MASK) +#define PGD_VPFN_LEVEL_GET_VPFN(pgd_vpfn_level) (pgd_vpfn_level & ~PGD_VPFN_LEVEL_MASK) +#define PGD_VPFN_LEVEL_SET(pgd_vpfn, level) \ + ((pgd_vpfn & ~PGD_VPFN_LEVEL_MASK) | (level & PGD_VPFN_LEVEL_MASK)) + /** * struct kbase_page_metadata - Metadata for each page in kbase * * @kbdev: Pointer to kbase device. * @dma_addr: DMA address mapped to page. * @migrate_lock: A spinlock to protect the private metadata. + * @data: Member in union valid based on @status. * @status: Status to keep track if page can be migrated at any * given moment. MSB will indicate if page is isolated. * Protected by @migrate_lock. - * @data: Member in union valid based on @status. + * @vmap_count: Counter of kernel mappings. + * @group_id: Memory group ID obtained at the time of page allocation. * * Each 4KB page will have a reference to this struct in the private field. * This will be used to keep track of information required for Linux page @@ -240,7 +252,6 @@ enum kbase_page_status { struct kbase_page_metadata { dma_addr_t dma_addr; spinlock_t migrate_lock; - u8 status; union { struct { @@ -251,19 +262,25 @@ struct kbase_page_metadata { struct kbase_device *kbdev; } mem_pool; struct { - struct kbase_mem_phy_alloc *phy_alloc; struct kbase_va_region *reg; struct kbase_mmu_table *mmut; - struct page *pgd; u64 vpfn; - size_t page_array_index; } mapped; struct { struct kbase_mmu_table *mmut; - struct page *pgd; - u16 entry_info; + u64 pgd_vpfn_level; } pt_mapped; + struct { + struct kbase_device *kbdev; + } free_isolated; + struct { + struct kbase_device *kbdev; + } free_pt_isolated; } data; + + u8 status; + u8 vmap_count; + u8 group_id; }; /* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is @@ -288,6 +305,20 @@ enum kbase_jit_report_flags { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) }; +/** + * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying + * physical allocation. + * @alloc: the physical allocation containing the pages whose metadata is going + * to be modified + * @status: the status the pages should end up in + * + * Note that this function does not go through all of the checking to ensure that + * proper states are set. Instead, it is only used when we change the allocation + * to NOT_MOVABLE or from NOT_MOVABLE to ALLOCATED_MAPPED + */ +void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, + enum kbase_page_status status); + static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) { KBASE_DEBUG_ASSERT(alloc); @@ -388,6 +419,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m * @jit_usage_id: The last just-in-time memory usage ID for this region. * @jit_bin_id: The just-in-time memory bin this region came from. * @va_refcnt: Number of users of this region. Protected by reg_lock. + * @no_user_free_refcnt: Number of users that want to prevent the region from + * being freed by userspace. * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of * an allocated region * The object can be one of: @@ -508,10 +541,7 @@ struct kbase_va_region { #define KBASE_REG_RESERVED_BIT_23 (1ul << 23) #endif /* !MALI_USE_CSF */ -/* Whilst this flag is set the GPU allocation is not supposed to be freed by - * user space. The flag will remain set for the lifetime of JIT allocations. - */ -#define KBASE_REG_NO_USER_FREE (1ul << 24) +/* Bit 24 is currently unused and is available for use for a new flag */ /* Memory has permanent kernel side mapping */ #define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) @@ -652,6 +682,7 @@ struct kbase_va_region { #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ int va_refcnt; + int no_user_free_refcnt; }; /** @@ -694,6 +725,23 @@ static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg) return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg)); } +/** + * kbase_is_region_shrinkable - Check if a region is "shrinkable". + * A shrinkable regions is a region for which its backing pages (reg->gpu_alloc->pages) + * can be freed at any point, even though the kbase_va_region structure itself + * may have been refcounted. + * Regions that aren't on a shrinker, but could be shrunk at any point in future + * without warning are still considered "shrinkable" (e.g. Active JIT allocs) + * + * @reg: Pointer to region + * + * Return: true if the region is "shrinkable", false if not. + */ +static inline bool kbase_is_region_shrinkable(struct kbase_va_region *reg) +{ + return (reg->flags & KBASE_REG_DONT_NEED) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC); +} + void kbase_remove_va_region(struct kbase_device *kbdev, struct kbase_va_region *reg); static inline void kbase_region_refcnt_free(struct kbase_device *kbdev, @@ -714,6 +762,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get( lockdep_assert_held(&kctx->reg_lock); WARN_ON(!region->va_refcnt); + WARN_ON(region->va_refcnt == INT_MAX); /* non-atomic as kctx->reg_lock is held */ dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n", @@ -741,6 +790,69 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put( return NULL; } +/** + * kbase_va_region_is_no_user_free - Check if user free is forbidden for the region. + * A region that must not be freed by userspace indicates that it is owned by some other + * kbase subsystem, for example tiler heaps, JIT memory or CSF queues. + * Such regions must not be shrunk (i.e. have their backing pages freed), except by the + * current owner. + * Hence, callers cannot rely on this check alone to determine if a region might be shrunk + * by any part of kbase. Instead they should use kbase_is_region_shrinkable(). + * + * @kctx: Pointer to kbase context. + * @region: Pointer to region. + * + * Return: true if userspace cannot free the region, false if userspace can free the region. + */ +static inline bool kbase_va_region_is_no_user_free(struct kbase_context *kctx, + struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + return region->no_user_free_refcnt > 0; +} + +/** + * kbase_va_region_no_user_free_get - Increment "no user free" refcount for a region. + * Calling this function will prevent the region to be shrunk by parts of kbase that + * don't own the region (as long as the refcount stays above zero). Refer to + * kbase_va_region_is_no_user_free() for more information. + * + * @kctx: Pointer to kbase context. + * @region: Pointer to region (not shrinkable). + * + * Return: the pointer to the region passed as argument. + */ +static inline struct kbase_va_region * +kbase_va_region_no_user_free_get(struct kbase_context *kctx, struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(kbase_is_region_shrinkable(region)); + WARN_ON(region->no_user_free_refcnt == INT_MAX); + + /* non-atomic as kctx->reg_lock is held */ + region->no_user_free_refcnt++; + + return region; +} + +/** + * kbase_va_region_no_user_free_put - Decrement "no user free" refcount for a region. + * + * @kctx: Pointer to kbase context. + * @region: Pointer to region (not shrinkable). + */ +static inline void kbase_va_region_no_user_free_put(struct kbase_context *kctx, + struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(!kbase_va_region_is_no_user_free(kctx, region)); + + /* non-atomic as kctx->reg_lock is held */ + region->no_user_free_refcnt--; +} + /* Common functions */ static inline struct tagged_addr *kbase_get_cpu_phy_pages( struct kbase_va_region *reg) diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index 6ae1f05..f815144 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -36,6 +36,7 @@ #include <linux/cache.h> #include <linux/memory_group_manager.h> #include <linux/math64.h> +#include <linux/migrate.h> #include <mali_kbase.h> #include <mali_kbase_mem_linux.h> @@ -791,7 +792,11 @@ int kbase_mem_evictable_init(struct kbase_context *kctx) * struct shrinker does not define batch */ kctx->reclaim.batch = 0; +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE register_shrinker(&kctx->reclaim); +#else + register_shrinker(&kctx->reclaim, "mali-mem"); +#endif return 0; } @@ -855,6 +860,9 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) lockdep_assert_held(&kctx->reg_lock); + /* Memory is in the process of transitioning to the shrinker, and + * should ignore migration attempts + */ kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, 0, gpu_alloc->nents); @@ -862,12 +870,17 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) /* This allocation can't already be on a list. */ WARN_ON(!list_empty(&gpu_alloc->evict_node)); - /* - * Add the allocation to the eviction list, after this point the shrink + /* Add the allocation to the eviction list, after this point the shrink * can reclaim it. */ list_add(&gpu_alloc->evict_node, &kctx->evict_list); atomic_add(gpu_alloc->nents, &kctx->evict_nents); + + /* Indicate to page migration that the memory can be reclaimed by the shrinker. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE); + mutex_unlock(&kctx->jit_evict_lock); kbase_mem_evictable_mark_reclaim(gpu_alloc); @@ -919,6 +932,15 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) gpu_alloc->evicted, 0, mmu_sync_info); gpu_alloc->evicted = 0; + + /* Since the allocation is no longer evictable, and we ensure that + * it grows back to its pre-eviction size, we will consider the + * state of it to be ALLOCATED_MAPPED, as that is the only state + * in which a physical allocation could transition to NOT_MOVABLE + * from. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED); } } @@ -977,7 +999,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in * & GPU queue ringbuffer and none of them needs to be explicitly marked * as evictable by Userspace. */ - if (reg->flags & KBASE_REG_NO_USER_FREE) + if (kbase_va_region_is_no_user_free(kctx, reg)) goto out_unlock; /* Is the region being transitioning between not needed and needed? */ @@ -1299,9 +1321,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx, err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, - alloc->group_id, mmu_sync_info); + kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, + kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true); if (err) goto bad_insert; @@ -1327,7 +1348,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx, bad_pad_insert: kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, - alloc->nents, kctx->as_nr); + alloc->nents, kctx->as_nr, true); bad_insert: kbase_mem_umm_unmap_attachment(kctx, alloc); bad_map_attachment: @@ -1356,7 +1377,7 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, int err; err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, reg->nr_pages, kctx->as_nr); + alloc->pages, reg->nr_pages, kctx->as_nr, true); WARN_ON(err); } @@ -1885,9 +1906,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* validate found region */ if (kbase_is_region_invalid_or_free(aliasing_reg)) goto bad_handle; /* Not found/already free */ - if (aliasing_reg->flags & KBASE_REG_DONT_NEED) + if (kbase_is_region_shrinkable(aliasing_reg)) goto bad_handle; /* Ephemeral region */ - if (aliasing_reg->flags & KBASE_REG_NO_USER_FREE) + if (kbase_va_region_is_no_user_free(kctx, aliasing_reg)) goto bad_handle; /* JIT regions can't be * aliased. NO_USER_FREE flag * covers the entire lifetime @@ -2161,11 +2182,9 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, /* Map the new pages into the GPU */ phy_pages = kbase_get_gpu_phy_pages(reg); - ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn + old_pages, - phy_pages + old_pages, delta, reg->flags, - kctx->as_nr, reg->gpu_alloc->group_id, - mmu_sync_info); + ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages, + phy_pages + old_pages, delta, reg->flags, kctx->as_nr, + reg->gpu_alloc->group_id, mmu_sync_info, reg, false); return ret; } @@ -2194,7 +2213,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, int ret = 0; ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, - alloc->pages + new_pages, delta, kctx->as_nr); + alloc->pages + new_pages, delta, kctx->as_nr, false); return ret; } @@ -2259,10 +2278,10 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) goto out_unlock; - if (reg->flags & KBASE_REG_DONT_NEED) + if (kbase_is_region_shrinkable(reg)) goto out_unlock; - if (reg->flags & KBASE_REG_NO_USER_FREE) + if (kbase_va_region_is_no_user_free(kctx, reg)) goto out_unlock; #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED @@ -2659,6 +2678,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, size_t size; int err = 0; + lockdep_assert_held(&kctx->reg_lock); + dev_dbg(kctx->kbdev->dev, "%s\n", __func__); size = (vma->vm_end - vma->vm_start); nr_pages = size >> PAGE_SHIFT; @@ -2772,7 +2793,6 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, /* adjust down nr_pages to what we have physically */ *nr_pages = kbase_reg_current_backed_size(reg); - if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, reg->nr_pages, 1, mmu_sync_info) != 0) { dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); @@ -3013,6 +3033,99 @@ void kbase_sync_mem_regions(struct kbase_context *kctx, } } +/** + * kbase_vmap_phy_pages_migrate_count_increment - Increment VMAP count for + * array of physical pages + * + * @pages: Array of pages. + * @page_count: Number of pages. + * @flags: Region flags. + * + * This function is supposed to be called only if page migration support + * is enabled in the driver. + * + * The counter of kernel CPU mappings of the physical pages involved in a + * mapping operation is incremented by 1. Errors are handled by making pages + * not movable. Permanent kernel mappings will be marked as not movable, too. + */ +static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pages, + size_t page_count, unsigned long flags) +{ + size_t i; + + for (i = 0; i < page_count; i++) { + struct page *p = as_page(pages[i]); + struct kbase_page_metadata *page_md = kbase_page_private(p); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(pages[i]) || is_partial(pages[i])) + continue; + + spin_lock(&page_md->migrate_lock); + /* Mark permanent kernel mappings as NOT_MOVABLE because they're likely + * to stay mapped for a long time. However, keep on counting the number + * of mappings even for them: they don't represent an exception for the + * vmap_count. + * + * At the same time, errors need to be handled if a client tries to add + * too many mappings, hence a page may end up in the NOT_MOVABLE state + * anyway even if it's not a permanent kernel mapping. + */ + if (flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + if (page_md->vmap_count < U8_MAX) + page_md->vmap_count++; + else + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } +} + +/** + * kbase_vunmap_phy_pages_migrate_count_decrement - Decrement VMAP count for + * array of physical pages + * + * @pages: Array of pages. + * @page_count: Number of pages. + * + * This function is supposed to be called only if page migration support + * is enabled in the driver. + * + * The counter of kernel CPU mappings of the physical pages involved in a + * mapping operation is decremented by 1. Errors are handled by making pages + * not movable. + */ +static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *pages, + size_t page_count) +{ + size_t i; + + for (i = 0; i < page_count; i++) { + struct page *p = as_page(pages[i]); + struct kbase_page_metadata *page_md = kbase_page_private(p); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(pages[i]) || is_partial(pages[i])) + continue; + + spin_lock(&page_md->migrate_lock); + /* Decrement the number of mappings for all kinds of pages, including + * pages which are NOT_MOVABLE (e.g. permanent kernel mappings). + * However, errors still need to be handled if a client tries to remove + * more mappings than created. + */ + if (page_md->vmap_count == 0) + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + else + page_md->vmap_count--; + spin_unlock(&page_md->migrate_lock); + } +} + static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, kbase_vmap_flag vmap_flags) @@ -3085,6 +3198,13 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi */ cpu_addr = vmap(pages, page_count, VM_MAP, prot); + /* If page migration is enabled, increment the number of VMA mappings + * of all physical pages. In case of errors, e.g. too many mappings, + * make the page not movable to prevent trouble. + */ + if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type)) + kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags); + kfree(pages); if (!cpu_addr) @@ -3108,6 +3228,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi atomic_add(page_count, &kctx->permanent_mapped_pages); kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); + return 0; } @@ -3186,6 +3307,17 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx, vunmap(addr); + /* If page migration is enabled, decrement the number of VMA mappings + * for all physical pages. Now is a good time to do it because references + * haven't been released yet. + */ + if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) { + const size_t page_count = PFN_UP(map->offset_in_page + map->size); + struct tagged_addr *pages_array = map->cpu_pages; + + kbase_vunmap_phy_pages_migrate_count_decrement(pages_array, page_count); + } + if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED) kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) { diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h index 5b12e18..6dda44b 100644 --- a/mali_kbase/mali_kbase_mem_linux.h +++ b/mali_kbase/mali_kbase_mem_linux.h @@ -284,7 +284,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, struct kbase_va_reg * have been released in the mean time. * * Or, it must have been refcounted with a call to kbase_va_region_alloc_get(), and the region * lock is now held again. - * * Or, @reg has had KBASE_REG_NO_USER_FREE set at creation time or under the region lock, and the + * * Or, @reg has had NO_USER_FREE set at creation time or under the region lock, and the * region lock is now held again. * * The acceptable @vmap_flags are those in %KBASE_VMAP_INPUT_FLAGS. diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c index dfa7025..8c62bd3 100644 --- a/mali_kbase/mali_kbase_mem_migrate.c +++ b/mali_kbase/mali_kbase_mem_migrate.c @@ -22,11 +22,11 @@ /** * DOC: Base kernel page migration implementation. */ - #include <linux/migrate.h> #include <mali_kbase.h> #include <mali_kbase_mem_migrate.h> +#include <mmu/mali_kbase_mmu.h> /* Global integer used to determine if module parameter value has been * provided and if page migration feature is enabled. @@ -36,7 +36,12 @@ int kbase_page_migration_enabled; module_param(kbase_page_migration_enabled, int, 0444); KBASE_EXPORT_TEST_API(kbase_page_migration_enabled); -bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr) +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) +static const struct movable_operations movable_ops; +#endif + +bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, + u8 group_id) { struct kbase_page_metadata *page_md = kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL); @@ -48,17 +53,40 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a set_page_private(p, (unsigned long)page_md); page_md->dma_addr = dma_addr; page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATE_IN_PROGRESS); + page_md->vmap_count = 0; + page_md->group_id = group_id; spin_lock_init(&page_md->migrate_lock); lock_page(p); - if (kbdev->mem_migrate.mapping) - __SetPageMovable(p, kbdev->mem_migrate.mapping); +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + __SetPageMovable(p, &movable_ops); +#else + /* In some corner cases, the driver may attempt to allocate memory pages + * even before the device file is open and the mapping for address space + * operations is created. In that case, it is impossible to assign address + * space operations to memory pages: simply pretend that they are movable, + * even if they are not. + * + * The page will go through all state transitions but it will never be + * actually considered movable by the kernel. This is due to the fact that + * the page cannot be marked as NOT_MOVABLE upon creation, otherwise the + * memory pool will always refuse to add it to the pool and schedule + * a worker thread to free it later. + * + * Page metadata may seem redundant in this case, but they are not, + * because memory pools expect metadata to be present when page migration + * is enabled and because the pages may always return to memory pools and + * gain the movable property later on in their life cycle. + */ + if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping) + __SetPageMovable(p, kbdev->mem_migrate.inode->i_mapping); +#endif unlock_page(p); return true; } -static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p) +static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, u8 *group_id) { struct device *const dev = kbdev->dev; struct kbase_page_metadata *page_md; @@ -68,6 +96,8 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p) if (!page_md) return; + if (group_id) + *group_id = page_md->group_id; dma_addr = kbase_dma_addr(p); dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -88,6 +118,7 @@ static void kbase_free_pages_worker(struct work_struct *work) spin_unlock(&mem_migrate->free_pages_lock); list_for_each_entry_safe(p, tmp, &free_list, lru) { + u8 group_id = 0; list_del_init(&p->lru); lock_page(p); @@ -95,8 +126,8 @@ static void kbase_free_pages_worker(struct work_struct *work) __ClearPageMovable(p); unlock_page(p); - kbase_free_page_metadata(kbdev, p); - __free_pages(p, 0); + kbase_free_page_metadata(kbdev, p, &group_id); + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0); } } @@ -110,6 +141,135 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p) } /** + * kbasep_migrate_page_pt_mapped - Migrate a memory page that is mapped + * in a PGD of kbase_mmu_table. + * + * @old_page: Existing PGD page to remove + * @new_page: Destination for migrating the existing PGD page to + * + * Replace an existing PGD page with a new page by migrating its content. More specifically: + * the new page shall replace the existing PGD page in the MMU page table. Before returning, + * the new page shall be set as movable and not isolated, while the old page shall lose + * the movable property. The meta data attached to the PGD page is transferred to the + * new (replacement) page. + * + * Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure + * and the migration is aborted. + */ +static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new_page) +{ + struct kbase_page_metadata *page_md = kbase_page_private(old_page); + struct kbase_context *kctx = page_md->data.pt_mapped.mmut->kctx; + struct kbase_device *kbdev = kctx->kbdev; + dma_addr_t old_dma_addr = page_md->dma_addr; + dma_addr_t new_dma_addr; + int ret; + + /* Create a new dma map for the new page */ + new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(kbdev->dev, new_dma_addr)) + return -ENOMEM; + + /* Lock context to protect access to the page in physical allocation. + * This blocks the CPU page fault handler from remapping pages. + * Only MCU's mmut is device wide, i.e. no corresponding kctx. + */ + kbase_gpu_vm_lock(kctx); + + ret = kbase_mmu_migrate_page( + as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr, + new_dma_addr, PGD_VPFN_LEVEL_GET_LEVEL(page_md->data.pt_mapped.pgd_vpfn_level)); + + if (ret == 0) { + dma_unmap_page(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + __ClearPageMovable(old_page); + ClearPagePrivate(old_page); + put_page(old_page); + +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + __SetPageMovable(new_page, &movable_ops); +#else + if (kbdev->mem_migrate.inode->i_mapping) + __SetPageMovable(new_page, kbdev->mem_migrate.inode->i_mapping); +#endif + SetPagePrivate(new_page); + get_page(new_page); + } else + dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Page fault handler for CPU mapping unblocked. */ + kbase_gpu_vm_unlock(kctx); + + return ret; +} + +/* + * kbasep_migrate_page_allocated_mapped - Migrate a memory page that is both + * allocated and mapped. + * + * @old_page: Page to remove. + * @new_page: Page to add. + * + * Replace an old page with a new page by migrating its content and all its + * CPU and GPU mappings. More specifically: the new page shall replace the + * old page in the MMU page table, as well as in the page array of the physical + * allocation, which is used to create CPU mappings. Before returning, the new + * page shall be set as movable and not isolated, while the old page shall lose + * the movable property. + */ +static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page) +{ + struct kbase_page_metadata *page_md = kbase_page_private(old_page); + struct kbase_context *kctx = page_md->data.mapped.mmut->kctx; + dma_addr_t old_dma_addr, new_dma_addr; + int ret; + + old_dma_addr = page_md->dma_addr; + new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr)) + return -ENOMEM; + + /* Lock context to protect access to array of pages in physical allocation. + * This blocks the CPU page fault handler from remapping pages. + */ + kbase_gpu_vm_lock(kctx); + + /* Unmap the old physical range. */ + unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT, + PAGE_SIZE, 1); + + ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)), + as_tagged(page_to_phys(new_page)), old_dma_addr, new_dma_addr, + MIDGARD_MMU_BOTTOMLEVEL); + + if (ret == 0) { + dma_unmap_page(kctx->kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + SetPagePrivate(new_page); + get_page(new_page); + + /* Clear PG_movable from the old page and release reference. */ + ClearPagePrivate(old_page); + __ClearPageMovable(old_page); + put_page(old_page); + + /* Set PG_movable to the new page. */ +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + __SetPageMovable(new_page, &movable_ops); +#else + if (kctx->kbdev->mem_migrate.inode->i_mapping) + __SetPageMovable(new_page, kctx->kbdev->mem_migrate.inode->i_mapping); +#endif + } else + dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Page fault handler for CPU mapping unblocked. */ + kbase_gpu_vm_unlock(kctx); + + return ret; +} + +/** * kbase_page_isolate - Isolate a page for migration. * * @p: Pointer of the page struct of page to isolate. @@ -127,6 +287,9 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) CSTD_UNUSED(mode); + if (!PageMovable(p) || !page_md) + return false; + if (!spin_trylock(&page_md->migrate_lock)) return false; @@ -146,17 +309,28 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) atomic_inc(&mem_pool->isolation_in_progress_cnt); break; case ALLOCATED_MAPPED: + /* Mark the page into isolated state, but only if it has no + * kernel CPU mappings + */ + if (page_md->vmap_count == 0) + page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); + break; case PT_MAPPED: - /* Only pages in a memory pool can be isolated for now. */ + /* Mark the page into isolated state. */ + page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); break; case SPILL_IN_PROGRESS: case ALLOCATE_IN_PROGRESS: case FREE_IN_PROGRESS: - /* Transitory state: do nothing. */ + break; + case NOT_MOVABLE: + /* Opportunistically clear the movable property for these pages */ + __ClearPageMovable(p); break; default: /* State should always fall in one of the previous cases! - * Also notice that FREE_ISOLATED_IN_PROGRESS is impossible because + * Also notice that FREE_ISOLATED_IN_PROGRESS or + * FREE_PT_ISOLATED_IN_PROGRESS is impossible because * that state only applies to pages that are already isolated. */ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); @@ -204,17 +378,31 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) * * Return: 0 on success, error code otherwise. */ +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) static int kbase_page_migrate(struct address_space *mapping, struct page *new_page, struct page *old_page, enum migrate_mode mode) +#else +static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum migrate_mode mode) +#endif { int err = 0; bool status_mem_pool = false; + bool status_free_pt_isolated_in_progress = false; + bool status_free_isolated_in_progress = false; + bool status_pt_mapped = false; + bool status_mapped = false; + bool status_not_movable = false; struct kbase_page_metadata *page_md = kbase_page_private(old_page); - struct kbase_device *kbdev; + struct kbase_device *kbdev = NULL; +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) CSTD_UNUSED(mapping); +#endif CSTD_UNUSED(mode); + if (!PageMovable(old_page) || !page_md) + return -EINVAL; + if (!spin_trylock(&page_md->migrate_lock)) return -EAGAIN; @@ -229,10 +417,22 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa kbdev = page_md->data.mem_pool.kbdev; break; case ALLOCATED_MAPPED: + status_mapped = true; + break; case PT_MAPPED: + status_pt_mapped = true; + break; case FREE_ISOLATED_IN_PROGRESS: - case MULTI_MAPPED: - /* So far, only pages in a memory pool can be migrated. */ + status_free_isolated_in_progress = true; + kbdev = page_md->data.free_isolated.kbdev; + break; + case FREE_PT_ISOLATED_IN_PROGRESS: + status_free_pt_isolated_in_progress = true; + kbdev = page_md->data.free_pt_isolated.kbdev; + break; + case NOT_MOVABLE: + status_not_movable = true; + break; default: /* State should always fall in one of the previous cases! */ err = -EAGAIN; @@ -241,17 +441,27 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa spin_unlock(&page_md->migrate_lock); - if (status_mem_pool) { + if (status_mem_pool || status_free_isolated_in_progress || + status_free_pt_isolated_in_progress) { struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; - kbase_free_page_metadata(kbdev, old_page); + kbase_free_page_metadata(kbdev, old_page, NULL); __ClearPageMovable(old_page); + put_page(old_page); /* Just free new page to avoid lock contention. */ INIT_LIST_HEAD(&new_page->lru); + get_page(new_page); set_page_private(new_page, 0); kbase_free_page_later(kbdev, new_page); queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } else if (status_not_movable) { + __ClearPageMovable(old_page); + err = -EINVAL; + } else if (status_mapped) { + err = kbasep_migrate_page_allocated_mapped(old_page, new_page); + } else if (status_pt_mapped) { + err = kbasep_migrate_page_pt_mapped(old_page, new_page); } return err; @@ -270,13 +480,17 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa static void kbase_page_putback(struct page *p) { bool status_mem_pool = false; + bool status_free_isolated_in_progress = false; + bool status_free_pt_isolated_in_progress = false; struct kbase_page_metadata *page_md = kbase_page_private(p); - struct kbase_device *kbdev; + struct kbase_device *kbdev = NULL; spin_lock(&page_md->migrate_lock); - /* Page must have been isolated to reach here but metadata is incorrect. */ - WARN_ON(!IS_PAGE_ISOLATED(page_md->status)); + if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) { + spin_unlock(&page_md->migrate_lock); + return; + } switch (PAGE_STATUS_GET(page_md->status)) { case MEM_POOL: @@ -284,11 +498,22 @@ static void kbase_page_putback(struct page *p) kbdev = page_md->data.mem_pool.kbdev; break; case ALLOCATED_MAPPED: + page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); + break; case PT_MAPPED: - case FREE_ISOLATED_IN_PROGRESS: - /* Only pages in a memory pool can be isolated for now. - * Therefore only pages in a memory pool can be 'putback'. + case NOT_MOVABLE: + /* Pages should no longer be isolated if they are in a stable state + * and used by the driver. */ + page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); + break; + case FREE_ISOLATED_IN_PROGRESS: + status_free_isolated_in_progress = true; + kbdev = page_md->data.free_isolated.kbdev; + break; + case FREE_PT_ISOLATED_IN_PROGRESS: + status_free_pt_isolated_in_progress = true; + kbdev = page_md->data.free_pt_isolated.kbdev; break; default: /* State should always fall in one of the previous cases! */ @@ -297,34 +522,57 @@ static void kbase_page_putback(struct page *p) spin_unlock(&page_md->migrate_lock); - /* If page was in a memory pool then just free it to avoid lock contention. */ - if (!WARN_ON(!status_mem_pool)) { - struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; - + /* If page was in a memory pool then just free it to avoid lock contention. The + * same is also true to status_free_pt_isolated_in_progress. + */ + if (status_mem_pool || status_free_isolated_in_progress || + status_free_pt_isolated_in_progress) { __ClearPageMovable(p); - list_del_init(&p->lru); - kbase_free_page_later(kbdev, p); - queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + if (!WARN_ON_ONCE(!kbdev)) { + struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + + kbase_free_page_later(kbdev, p); + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } } } +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) +static const struct movable_operations movable_ops = { + .isolate_page = kbase_page_isolate, + .migrate_page = kbase_page_migrate, + .putback_page = kbase_page_putback, +}; +#else static const struct address_space_operations kbase_address_space_ops = { .isolate_page = kbase_page_isolate, .migratepage = kbase_page_migrate, .putback_page = kbase_page_putback, }; +#endif +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp) { + mutex_lock(&kbdev->fw_load_lock); + if (filp) { filp->f_inode->i_mapping->a_ops = &kbase_address_space_ops; - if (!kbdev->mem_migrate.mapping) - kbdev->mem_migrate.mapping = filp->f_inode->i_mapping; - else - WARN_ON(kbdev->mem_migrate.mapping != filp->f_inode->i_mapping); + if (!kbdev->mem_migrate.inode) { + kbdev->mem_migrate.inode = filp->f_inode; + /* This reference count increment is balanced by iput() + * upon termination. + */ + atomic_inc(&filp->f_inode->i_count); + } else { + WARN_ON(kbdev->mem_migrate.inode != filp->f_inode); + } } + + mutex_unlock(&kbdev->fw_load_lock); } +#endif void kbase_mem_migrate_init(struct kbase_device *kbdev) { @@ -336,6 +584,9 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev) spin_lock_init(&mem_migrate->free_pages_lock); INIT_LIST_HEAD(&mem_migrate->free_pages_list); +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + mem_migrate->inode = NULL; +#endif mem_migrate->free_pages_workq = alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1); INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker); @@ -347,4 +598,7 @@ void kbase_mem_migrate_term(struct kbase_device *kbdev) if (mem_migrate->free_pages_workq) destroy_workqueue(mem_migrate->free_pages_workq); +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + iput(mem_migrate->inode); +#endif } diff --git a/mali_kbase/mali_kbase_mem_migrate.h b/mali_kbase/mali_kbase_mem_migrate.h index 6610c0c..30d0803 100644 --- a/mali_kbase/mali_kbase_mem_migrate.h +++ b/mali_kbase/mali_kbase_mem_migrate.h @@ -41,6 +41,8 @@ extern int kbase_page_migration_enabled; * @kbdev: Pointer to kbase device. * @p: Page to assign metadata to. * @dma_addr: DMA address mapped to paged. + * @group_id: Memory group ID associated with the entity that is + * allocating the page metadata. * * This will allocate memory for the page's metadata, initialize it and * assign a reference to the page's private field. Importantly, once @@ -49,7 +51,8 @@ extern int kbase_page_migration_enabled; * * Return: true if successful or false otherwise. */ -bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr); +bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, + u8 group_id); /** * kbase_free_page_later - Defer freeing of given page. @@ -61,6 +64,7 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a */ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p); +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) /* * kbase_mem_migrate_set_address_space_ops - Set address space operations * @@ -72,6 +76,7 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p); * add a reference to @kbdev. */ void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp); +#endif /* * kbase_mem_migrate_init - Initialise kbase page migration diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c index dce066d..bede1f4 100644 --- a/mali_kbase/mali_kbase_mem_pool.c +++ b/mali_kbase/mali_kbase_mem_pool.c @@ -57,37 +57,59 @@ static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) return kbase_mem_pool_size(pool) == 0; } -static void set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p, +static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p, struct list_head *page_list, size_t *list_size) { struct kbase_page_metadata *page_md = kbase_page_private(p); + bool not_movable = false; lockdep_assert_held(&pool->pool_lock); + /* Free the page instead of adding it to the pool if it's not movable. + * Only update page status and add the page to the memory pool if + * it is not isolated. + */ spin_lock(&page_md->migrate_lock); - /* Only update page status and add the page to the memory pool if it is not isolated */ - if (!WARN_ON(IS_PAGE_ISOLATED(page_md->status))) { + if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) { + not_movable = true; + } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) { page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL); page_md->data.mem_pool.pool = pool; page_md->data.mem_pool.kbdev = pool->kbdev; - list_move(&p->lru, page_list); + list_add(&p->lru, page_list); (*list_size)++; } spin_unlock(&page_md->migrate_lock); + + if (not_movable) { + kbase_free_page_later(pool->kbdev, p); + pool_dbg(pool, "skipping a not movable page\n"); + } + + return not_movable; } static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, struct page *p) { + bool queue_work_to_free = false; + lockdep_assert_held(&pool->pool_lock); - if (!pool->order && kbase_page_migration_enabled) - set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size); - else { + if (!pool->order && kbase_page_migration_enabled) { + if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) + queue_work_to_free = true; + } else { list_add(&p->lru, &pool->page_list); pool->cur_size++; } + if (queue_work_to_free) { + struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; + + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } + pool_dbg(pool, "added page\n"); } @@ -101,18 +123,29 @@ static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, struct list_head *page_list, size_t nr_pages) { + bool queue_work_to_free = false; + lockdep_assert_held(&pool->pool_lock); if (!pool->order && kbase_page_migration_enabled) { struct page *p, *tmp; - list_for_each_entry_safe(p, tmp, page_list, lru) - set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size); + list_for_each_entry_safe(p, tmp, page_list, lru) { + list_del_init(&p->lru); + if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) + queue_work_to_free = true; + } } else { list_splice(page_list, &pool->page_list); pool->cur_size += nr_pages; } + if (queue_work_to_free) { + struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; + + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } + pool_dbg(pool, "added %zu pages\n", nr_pages); } @@ -226,7 +259,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) /* Setup page metadata for 4KB pages when page migration is enabled */ if (!pool->order && kbase_page_migration_enabled) { INIT_LIST_HEAD(&p->lru); - if (!kbase_alloc_page_metadata(kbdev, p, dma_addr)) { + if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) { dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p, pool->order); @@ -460,7 +493,11 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool * struct shrinker does not define batch */ pool->reclaim.batch = 0; +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE register_shrinker(&pool->reclaim); +#else + register_shrinker(&pool->reclaim, "mali-mem-pool"); +#endif pool_dbg(pool, "initialized\n"); @@ -636,10 +673,12 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, /* Get pages from this pool */ kbase_mem_pool_lock(pool); nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool)); + while (nr_from_pool--) { int j; p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); + if (pool->order) { pages[i++] = as_tagged_tag(page_to_phys(p), HUGE_HEAD | HUGE_PAGE); @@ -867,7 +906,6 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, pages[i] = as_tagged(0); continue; } - p = as_page(pages[i]); kbase_mem_pool_free_page(pool, p); diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c index 063b29a..5f3dabd 100644 --- a/mali_kbase/mali_kbase_vinstr.c +++ b/mali_kbase/mali_kbase_vinstr.c @@ -41,6 +41,11 @@ #include <linux/version_compat_defs.h> #include <linux/workqueue.h> +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include <uapi/linux/eventpoll.h> +#endif + /* Hwcnt reader API version */ #define HWCNT_READER_API 1 diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c index db20860..d1e4078 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c @@ -88,12 +88,11 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, * context's address space, when the page fault occurs for * MCU's address space. */ - if (!queue_work(as->pf_wq, &as->work_pagefault)) - kbase_ctx_sched_release_ctx(kctx); - else { + if (!queue_work(as->pf_wq, &as->work_pagefault)) { dev_dbg(kbdev->dev, - "Page fault is already pending for as %u\n", - as_nr); + "Page fault is already pending for as %u", as_nr); + kbase_ctx_sched_release_ctx(kctx); + } else { atomic_inc(&kbdev->faults_pending); } } @@ -559,7 +558,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, int i) kbdev->as[i].pf_data.addr = 0ULL; kbdev->as[i].gf_data.addr = 0ULL; - kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i); if (!kbdev->as[i].pf_wq) return -ENOMEM; diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index c909cd0..e39c8ad 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -25,6 +25,7 @@ #include <linux/kernel.h> #include <linux/dma-mapping.h> +#include <linux/migrate.h> #include <mali_kbase.h> #include <gpu/mali_kbase_gpu_fault.h> #include <gpu/mali_kbase_gpu_regmap.h> @@ -156,7 +157,7 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz } else if (op == KBASE_MMU_OP_FLUSH_MEM) { flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC; } else { - dev_warn(kbdev->dev, "Invalid flush request (op = %d)\n", op); + dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op); return; } @@ -167,7 +168,7 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz * perform a reset to recover */ dev_err(kbdev->dev, - "Flush for physical address range did not complete. Issuing GPU soft-reset to recover\n"); + "Flush for physical address range did not complete. Issuing GPU soft-reset to recover"); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); @@ -230,9 +231,8 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as */ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); - if (kbase_prepare_to_reset_gpu( - kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu_locked(kbdev); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -326,7 +326,7 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct * perform a reset to recover. */ dev_err(kbdev->dev, - "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); @@ -420,6 +420,65 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, u64 vpfn, int level, enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, struct list_head *free_pgds_list); + +static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) +{ + atomic_sub(1, &kbdev->memdev.used_pages); + + /* If MMU tables belong to a context then pages will have been accounted + * against it, so we must decrement the usage counts here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); +} + +static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + struct page *p) +{ + struct kbase_page_metadata *page_md = kbase_page_private(p); + bool page_is_isolated = false; + + lockdep_assert_held(&mmut->mmu_lock); + + if (!kbase_page_migration_enabled) + return false; + + spin_lock(&page_md->migrate_lock); + if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) { + WARN_ON_ONCE(!mmut->kctx); + if (IS_PAGE_ISOLATED(page_md->status)) { + page_md->status = PAGE_STATUS_SET(page_md->status, + FREE_PT_ISOLATED_IN_PROGRESS); + page_md->data.free_pt_isolated.kbdev = kbdev; + page_is_isolated = true; + } else { + page_md->status = + PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS); + } + } else { + WARN_ON_ONCE(mmut->kctx); + WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE); + } + spin_unlock(&page_md->migrate_lock); + + if (unlikely(page_is_isolated)) { + /* Do the CPU cache flush and accounting here for the isolated + * PGD page, which is done inside kbase_mmu_free_pgd() for the + * PGD page that did not get isolated. + */ + dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE, + DMA_BIDIRECTIONAL); + kbase_mmu_account_freed_pgd(kbdev, mmut); + } + + return page_is_isolated; +} + /** * kbase_mmu_free_pgd() - Free memory of the page directory * @@ -441,17 +500,7 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); - atomic_sub(1, &kbdev->memdev.used_pages); - - /* If MMU tables belong to a context then pages will have been accounted - * against it, so we must decrement the usage counts here. - */ - if (mmut->kctx) { - kbase_process_page_usage_dec(mmut->kctx, 1); - atomic_sub(1, &mmut->kctx->used_pages); - } - - kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); + kbase_mmu_account_freed_pgd(kbdev, mmut); } /** @@ -482,6 +531,20 @@ static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mm mutex_unlock(&mmut->mmu_lock); } +static void kbase_mmu_add_to_free_pgds_list(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + struct page *p, struct list_head *free_pgds_list) +{ + bool page_is_isolated = false; + + lockdep_assert_held(&mmut->mmu_lock); + + page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); + + if (likely(!page_is_isolated)) + list_add(&p->lru, free_pgds_list); +} + /** * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to * a region on a GPU page fault @@ -509,7 +572,7 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, if (!multiple) { dev_warn( kbdev->dev, - "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", + "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW", ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); return minimum_extra; } @@ -917,7 +980,7 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, */ dev_warn( kctx->kbdev->dev, - "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available\n", + "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available", new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced, total_mempools_free_4k); *pages_to_grow = 0; @@ -985,9 +1048,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) as_no = faulting_as->number; kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); - dev_dbg(kbdev->dev, - "Entering %s %pK, fault_pfn %lld, as_no %d\n", - __func__, (void *)data, fault_pfn, as_no); + dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %d", __func__, (void *)data, + fault_pfn, as_no); /* Grab the context that was already refcounted in kbase_mmu_interrupt() * Therefore, it cannot be scheduled out of this AS until we explicitly @@ -1010,8 +1072,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) #ifdef CONFIG_MALI_ARBITER_SUPPORT /* check if we still have GPU */ if (unlikely(kbase_is_gpu_removed(kbdev))) { - dev_dbg(kbdev->dev, - "%s: GPU has been removed\n", __func__); + dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); goto fault_done; } #endif @@ -1206,8 +1267,7 @@ page_fault_retry: /* cap to max vsize */ new_pages = min(new_pages, region->nr_pages - current_backed_size); - dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n", - new_pages); + dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages); if (new_pages == 0) { struct kbase_mmu_hw_op_param op_param; @@ -1284,11 +1344,10 @@ page_fault_retry: * so the no_flush version of insert_pages is used which allows * us to unlock the MMU as we see fit. */ - err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu, - region->start_pfn + pfn_offset, - &kbase_get_gpu_phy_pages(region)[pfn_offset], - new_pages, region->flags, - region->gpu_alloc->group_id, &dirty_pgds); + err = kbase_mmu_insert_pages_no_flush( + kbdev, &kctx->mmu, region->start_pfn + pfn_offset, + &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags, + region->gpu_alloc->group_id, &dirty_pgds, region, false); if (err) { kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); @@ -1314,16 +1373,11 @@ page_fault_retry: if (region->threshold_pages && kbase_reg_current_backed_size(region) > region->threshold_pages) { - - dev_dbg(kctx->kbdev->dev, - "%zu pages exceeded IR threshold %zu\n", - new_pages + current_backed_size, - region->threshold_pages); + dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu", + new_pages + current_backed_size, region->threshold_pages); if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { - dev_dbg(kctx->kbdev->dev, - "Get region %pK for IR\n", - (void *)region); + dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region); kbase_va_region_alloc_get(kctx, region); } } @@ -1441,7 +1495,7 @@ page_fault_retry: kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Page allocation failure", fault); } else { - dev_dbg(kbdev->dev, "Try again after pool_grow\n"); + dev_dbg(kbdev->dev, "Try again after pool_grow"); goto page_fault_retry; } } @@ -1468,7 +1522,7 @@ fault_done: release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); - dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data); + dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data); } static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, @@ -1532,11 +1586,10 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * u64 *dirty_pgds) { u64 *page; + u64 pgd_vpfn = vpfn; phys_addr_t target_pgd; struct page *p; - KBASE_DEBUG_ASSERT(*pgd); - lockdep_assert_held(&mmut->mmu_lock); /* @@ -1549,7 +1602,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * p = pfn_to_page(PFN_DOWN(*pgd)); page = kmap(p); if (page == NULL) { - dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); return -EINVAL; } @@ -1559,8 +1612,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { - dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n", - __func__); + dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure", __func__); kunmap(p); return -ENOMEM; } @@ -1585,9 +1637,32 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * * GPU cache is still needed. For explanation, please refer * the comment in kbase_mmu_insert_pages_no_flush(). */ - kbase_mmu_sync_pgd(kbdev, mmut->kctx, *pgd + (vpfn * sizeof(u64)), - kbase_dma_addr(p) + (vpfn * sizeof(u64)), sizeof(u64), - KBASE_MMU_OP_FLUSH_PT); + kbase_mmu_sync_pgd(kbdev, mmut->kctx, + *pgd + (vpfn * sizeof(u64)), + kbase_dma_addr(p) + (vpfn * sizeof(u64)), + sizeof(u64), KBASE_MMU_OP_FLUSH_PT); + + /* Update the new target_pgd page to its stable state */ + if (kbase_page_migration_enabled) { + struct kbase_page_metadata *page_md = + kbase_page_private(phys_to_page(target_pgd)); + + spin_lock(&page_md->migrate_lock); + + WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS || + IS_PAGE_ISOLATED(page_md->status)); + + if (mmut->kctx) { + page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED); + page_md->data.pt_mapped.mmut = mmut; + page_md->data.pt_mapped.pgd_vpfn_level = + PGD_VPFN_LEVEL_SET(pgd_vpfn, level); + } else { + page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); + } + + spin_unlock(&page_md->migrate_lock); + } } else { target_pgd = kbdev->mmu_mode->pte_to_phy_addr( kbdev->mgm_dev->ops.mgm_pte_to_original_pte( @@ -1618,9 +1693,8 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds); /* Handle failure condition */ if (err) { - dev_dbg(kbdev->dev, - "%s: mmu_get_next_pgd failure at level %d\n", - __func__, l); + dev_dbg(kbdev->dev, "%s: mmu_get_next_pgd failure at level %d", __func__, + l); return err; } } @@ -1640,7 +1714,8 @@ static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 from_vpfn, u64 to_vpfn, u64 *dirty_pgds, - struct list_head *free_pgds_list) + struct list_head *free_pgds_list, + struct tagged_addr *phys, bool ignore_page_migration) { u64 vpfn = from_vpfn; struct kbase_mmu_mode const *mmu_mode; @@ -1693,8 +1768,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, pcount = count; break; default: - dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n", - __func__, level); + dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level); goto next; } @@ -1713,7 +1787,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, if (!num_of_valid_entries) { kunmap(p); - list_add(&p->lru, free_pgds_list); + kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, KBASE_MMU_OP_NONE, dirty_pgds, @@ -1734,6 +1808,27 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, next: vpfn += count; } + + /* If page migration is enabled: the only way to recover from failure + * is to mark all pages as not movable. It is not predictable what's + * going to happen to these pages at this stage. They might return + * movable once they are returned to a memory pool. + */ + if (kbase_page_migration_enabled && !ignore_page_migration && phys) { + const u64 num_pages = to_vpfn - from_vpfn + 1; + u64 i; + + for (i = 0; i < num_pages; i++) { + struct page *phys_page = as_page(phys[i]); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + if (page_md) { + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } + } + } } static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, @@ -1806,6 +1901,20 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, if (nr == 0) return 0; + /* If page migration is enabled, pages involved in multiple GPU mappings + * are always treated as not movable. + */ + if (kbase_page_migration_enabled) { + struct page *phys_page = as_page(phys); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + if (page_md) { + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } + } + mutex_lock(&kctx->mmu.mmu_lock); while (remain) { @@ -1842,15 +1951,15 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, mutex_lock(&kctx->mmu.mmu_lock); } while (!err); if (err) { - dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure\n", - __func__); + dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure", __func__); if (recover_required) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, start_vpfn + recover_count, - &dirty_pgds, &free_pgds_list); + &dirty_pgds, &free_pgds_list, + NULL, true); } goto fail_unlock; } @@ -1858,14 +1967,15 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); if (recover_required) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, start_vpfn + recover_count, - &dirty_pgds, &free_pgds_list); + &dirty_pgds, &free_pgds_list, + NULL, true); } err = -ENOMEM; goto fail_unlock; @@ -1931,6 +2041,85 @@ fail_unlock: return err; } +static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys, + struct kbase_va_region *reg, + struct kbase_mmu_table *mmut, const u64 vpfn) +{ + struct page *phys_page = as_page(phys); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + spin_lock(&page_md->migrate_lock); + + /* If no GPU va region is given: the metadata provided are + * invalid. + * + * If the page is already allocated and mapped: this is + * an additional GPU mapping, probably to create a memory + * alias, which means it is no longer possible to migrate + * the page easily because tracking all the GPU mappings + * would be too costly. + * + * In any case: the page becomes not movable. It is kept + * alive, but attempts to migrate it will fail. The page + * will be freed if it is still not movable when it returns + * to a memory pool. Notice that the movable flag is not + * cleared because that would require taking the page lock. + */ + if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) { + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + } else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) { + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED); + page_md->data.mapped.reg = reg; + page_md->data.mapped.mmut = mmut; + page_md->data.mapped.vpfn = vpfn; + } + + spin_unlock(&page_md->migrate_lock); +} + +static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev, + struct tagged_addr *phys, size_t requested_nr) +{ + size_t i; + + for (i = 0; i < requested_nr; i++) { + struct page *phys_page = as_page(phys[i]); + struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(phys[i]) || is_partial(phys[i])) + continue; + + if (page_md) { + u8 status; + + spin_lock(&page_md->migrate_lock); + status = PAGE_STATUS_GET(page_md->status); + + if (status == ALLOCATED_MAPPED) { + if (IS_PAGE_ISOLATED(page_md->status)) { + page_md->status = PAGE_STATUS_SET( + page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS); + page_md->data.free_isolated.kbdev = kbdev; + /* At this point, we still have a reference + * to the page via its page migration metadata, + * and any page with the FREE_ISOLATED_IN_PROGRESS + * status will subsequently be freed in either + * kbase_page_migrate() or kbase_page_putback() + */ + phys[i] = as_tagged(0); + } else + page_md->status = PAGE_STATUS_SET(page_md->status, + (u8)FREE_IN_PROGRESS); + } + + spin_unlock(&page_md->migrate_lock); + } + } +} + u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, struct tagged_addr const phy, unsigned long const flags, int const level, int const group_id) @@ -1944,7 +2133,8 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, const u64 start_vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id, u64 *dirty_pgds) + unsigned long flags, int const group_id, u64 *dirty_pgds, + struct kbase_va_region *reg, bool ignore_page_migration) { phys_addr_t pgd; u64 *pgd_page; @@ -2006,14 +2196,15 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu } while (!err); if (err) { - dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure\n", __func__); + dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure", __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, - &free_pgds_list); + &free_pgds_list, phys, + ignore_page_migration); } goto fail_unlock; } @@ -2021,15 +2212,15 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure\n", - __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially * completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, - &free_pgds_list); + &free_pgds_list, phys, + ignore_page_migration); } err = -ENOMEM; goto fail_unlock; @@ -2060,6 +2251,14 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu *target = kbase_mmu_create_ate(kbdev, phys[i], flags, cur_level, group_id); + + /* If page migration is enabled, this is the right time + * to update the status of the page. + */ + if (kbase_page_migration_enabled && !ignore_page_migration && + !is_huge(phys[i]) && !is_partial(phys[i])) + kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut, + insert_vpfn + i); } num_of_valid_entries += count; } @@ -2104,8 +2303,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu fail_unlock: mutex_unlock(&mmut->mmu_lock); - mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, *dirty_pgds, - CALLER_MMU_ASYNC); + mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, + dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC); kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; @@ -2115,11 +2314,10 @@ fail_unlock: * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space * number 'as_nr'. */ -int kbase_mmu_insert_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int const group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info) +int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, + int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg, bool ignore_page_migration) { int err; u64 dirty_pgds = 0; @@ -2130,7 +2328,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, return 0; err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, - &dirty_pgds); + &dirty_pgds, reg, ignore_page_migration); if (err) return err; @@ -2285,7 +2483,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, current_pgd + (index * sizeof(u64)), sizeof(u64), flush_op); - list_add(&p->lru, free_pgds_list); + kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list); } else { current_valid_entries--; @@ -2361,11 +2559,12 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, * @mmut: Pointer to GPU MMU page table. * @vpfn: Start page frame number of the GPU virtual pages to unmap. * @phys: Array of physical pages currently mapped to the virtual - * pages to unmap, or NULL. This is only used for GPU cache - * maintenance. + * pages to unmap, or NULL. This is used for GPU cache maintenance + * and page migration support. * @nr: Number of pages to unmap. * @as_nr: Address space number, for GPU cache maintenance operations * that happen outside a specific kbase context. + * @ignore_page_migration: Whether page migration metadata should be ignored. * * We actually discard the ATE and free the page table pages if no valid entries * exist in PGD. @@ -2384,10 +2583,11 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, * Return: 0 on success, otherwise an error code. */ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, int as_nr) + struct tagged_addr *phys, size_t nr, int as_nr, + bool ignore_page_migration) { + const size_t requested_nr = nr; u64 start_vpfn = vpfn; - size_t requested_nr = nr; enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; struct kbase_mmu_mode const *mmu_mode; struct kbase_mmu_hw_op_param op_param; @@ -2478,9 +2678,8 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table switch (level) { case MIDGARD_MMU_LEVEL(0): case MIDGARD_MMU_LEVEL(1): - dev_warn(kbdev->dev, - "%s: No support for ATEs at level %d\n", - __func__, level); + dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__, + level); kunmap(p); goto out; case MIDGARD_MMU_LEVEL(2): @@ -2488,9 +2687,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table if (count >= 512) { pcount = 1; } else { - dev_warn(kbdev->dev, - "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n", - __func__, count); + dev_warn( + kbdev->dev, + "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down", + __func__, count); pcount = 0; } break; @@ -2499,9 +2699,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table pcount = count; break; default: - dev_err(kbdev->dev, - "%s: found non-mapped memory, early out\n", - __func__); + dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__); vpfn += count; nr -= count; continue; @@ -2530,7 +2728,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table pgd + (index * sizeof(u64)), pcount * sizeof(u64), flush_op); - list_add(&p->lru, &free_pgds_list); + kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, &free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, flush_op, &dirty_pgds, @@ -2553,7 +2751,6 @@ next: } err = 0; out: - mutex_unlock(&mmut->mmu_lock); /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ op_param = (struct kbase_mmu_hw_op_param){ .vpfn = start_vpfn, @@ -2566,6 +2763,16 @@ out: }; mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param); + /* If page migration is enabled: the status of all physical pages involved + * shall be updated, unless they are not movable. Their status shall be + * updated before releasing the lock to protect against concurrent + * requests to migrate the pages, if they have been isolated. + */ + if (kbase_page_migration_enabled && phys && !ignore_page_migration) + kbase_mmu_progress_migration_on_teardown(kbdev, phys, requested_nr); + + mutex_unlock(&mmut->mmu_lock); + kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; @@ -2737,6 +2944,353 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, return err; } +static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress); + kbdev->mmu_page_migrate_in_progress = true; +} + +static void mmu_page_migration_transaction_end(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress); + kbdev->mmu_page_migrate_in_progress = false; + /* Invoke the PM state machine, as the MMU page migration session + * may have deferred a transition in L2 state machine. + */ + kbase_pm_update_state(kbdev); +} + +int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys, + dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level) +{ + struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys)); + struct kbase_mmu_hw_op_param op_param; + struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ? + page_md->data.mapped.mmut : + page_md->data.pt_mapped.mmut; + struct kbase_device *kbdev; + phys_addr_t pgd; + u64 *old_page, *new_page, *pgd_page, *target, vpfn; + int index, check_state, ret = 0; + unsigned long hwaccess_flags = 0; + unsigned int num_of_valid_entries; + u8 vmap_count = 0; + + /* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param, + * here we skip the no kctx case, which is only used with MCU's mmut. + */ + if (!mmut->kctx) + return -EINVAL; + + if (level > MIDGARD_MMU_BOTTOMLEVEL) + return -EINVAL; + else if (level == MIDGARD_MMU_BOTTOMLEVEL) + vpfn = page_md->data.mapped.vpfn; + else + vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level); + + kbdev = mmut->kctx->kbdev; + index = (vpfn >> ((3 - level) * 9)) & 0x1FF; + + /* Create all mappings before copying content. + * This is done as early as possible because is the only operation that may + * fail. It is possible to do this before taking any locks because the + * pages to migrate are not going to change and even the parent PGD is not + * going to be affected by any other concurrent operation, since the page + * has been isolated before migration and therefore it cannot disappear in + * the middle of this function. + */ + old_page = kmap(as_page(old_phys)); + if (!old_page) { + dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__); + ret = -EINVAL; + goto old_page_map_error; + } + + new_page = kmap(as_page(new_phys)); + if (!new_page) { + dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__); + ret = -EINVAL; + goto new_page_map_error; + } + + /* GPU cache maintenance affects both memory content and page table, + * but at two different stages. A single virtual memory page is affected + * by the migration. + * + * Notice that the MMU maintenance is done in the following steps: + * + * 1) The MMU region is locked without performing any other operation. + * This lock must cover the entire migration process, in order to + * prevent any GPU access to the virtual page whose physical page + * is being migrated. + * 2) Immediately after locking: the MMU region content is flushed via + * GPU control while the lock is taken and without unlocking. + * The region must stay locked for the duration of the whole page + * migration procedure. + * This is necessary to make sure that pending writes to the old page + * are finalized before copying content to the new page. + * 3) Before unlocking: changes to the page table are flushed. + * Finer-grained GPU control operations are used if possible, otherwise + * the whole GPU cache shall be flushed again. + * This is necessary to make sure that the GPU accesses the new page + * after migration. + * 4) The MMU region is unlocked. + */ +#define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1)) + op_param.mmu_sync_info = CALLER_MMU_ASYNC; + op_param.kctx_id = mmut->kctx->id; + op_param.vpfn = vpfn & PGD_VPFN_MASK(level); + op_param.nr = 1 << ((3 - level) * 9); + op_param.op = KBASE_MMU_OP_FLUSH_PT; + /* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */ + op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ? + pgd_level_to_skip_flush(1ULL << level) : + pgd_level_to_skip_flush(3ULL << level); + + mutex_lock(&mmut->mmu_lock); + + /* The state was evaluated before entering this function, but it could + * have changed before the mmu_lock was taken. However, the state + * transitions which are possible at this point are only two, and in both + * cases it is a stable state progressing to a "free in progress" state. + * + * After taking the mmu_lock the state can no longer change: read it again + * and make sure that it hasn't changed before continuing. + */ + spin_lock(&page_md->migrate_lock); + check_state = PAGE_STATUS_GET(page_md->status); + if (level == MIDGARD_MMU_BOTTOMLEVEL) + vmap_count = page_md->vmap_count; + spin_unlock(&page_md->migrate_lock); + + if (level == MIDGARD_MMU_BOTTOMLEVEL) { + if (check_state != ALLOCATED_MAPPED) { + dev_dbg(kbdev->dev, + "%s: state changed to %d (was %d), abort page migration", __func__, + check_state, ALLOCATED_MAPPED); + ret = -EAGAIN; + goto page_state_change_out; + } else if (vmap_count > 0) { + dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration", + __func__); + ret = -EAGAIN; + goto page_state_change_out; + } + } else { + if (check_state != PT_MAPPED) { + dev_dbg(kbdev->dev, + "%s: state changed to %d (was %d), abort PGD page migration", + __func__, check_state, PT_MAPPED); + WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS); + ret = -EAGAIN; + goto page_state_change_out; + } + } + + ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd, NULL, NULL); + if (ret) { + dev_warn(kbdev->dev, "%s: failed to find PGD for old page.", __func__); + goto get_pgd_at_level_error; + } + + pgd_page = kmap(phys_to_page(pgd)); + if (!pgd_page) { + dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__); + ret = -EINVAL; + goto pgd_page_map_error; + } + + mutex_lock(&kbdev->pm.lock); + mutex_lock(&kbdev->mmu_hw_mutex); + + /* Lock MMU region and flush GPU cache by using GPU control, + * in order to keep MMU region locked. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); + if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) { + /* Defer the migration as L2 is in a transitional phase */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&kbdev->pm.lock); + dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__); + ret = -EAGAIN; + goto l2_state_defer_out; + } + /* Prevent transitional phases in L2 by starting the transaction */ + mmu_page_migration_transaction_begin(kbdev); + if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { + int as_nr = mmut->kctx->as_nr; + struct kbase_as *as = &kbdev->as[as_nr]; + + ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param); + if (!ret) { + ret = kbase_gpu_cache_flush_and_busy_wait( + kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); + } + if (ret) + mmu_page_migration_transaction_end(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + + if (ret < 0) { + dev_err(kbdev->dev, + "%s: failed to lock MMU region or flush GPU cache. Issuing GPU soft-reset to recover.", + __func__); + goto gpu_reset; + } + + /* Copy memory content. + * + * It is necessary to claim the ownership of the DMA buffer for the old + * page before performing the copy, to make sure of reading a consistent + * version of its content, before copying. After the copy, ownership of + * the DMA buffer for the new page is given to the GPU in order to make + * the content visible to potential GPU access that may happen as soon as + * this function releases the lock on the MMU region. + */ + dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + memcpy(new_page, old_page, PAGE_SIZE); + dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Remap GPU virtual page. + * + * This code rests on the assumption that page migration is only enabled + * for 4 kB pages, that necessarily live in the bottom level of the MMU + * page table. For this reason, the PGD level tells us inequivocably + * whether the page being migrated is a "content page" or another PGD + * of the page table: + * + * - Bottom level implies ATE (Address Translation Entry) + * - Any other level implies PTE (Page Table Entry) + * + * The current implementation doesn't handle the case of a level 0 PGD, + * that is: the root PGD of the page table. + */ + target = &pgd_page[index]; + + /* Certain entries of a page table page encode the count of valid entries + * present in that page. So need to save & restore the count information + * when updating the PTE/ATE to point to the new page. + */ + num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page); + + if (level == MIDGARD_MMU_BOTTOMLEVEL) { + WARN_ON_ONCE((*target & 1UL) == 0); + *target = + kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags, + level, page_md->data.mapped.reg->gpu_alloc->group_id); + } else { + u64 managed_pte; + +#ifdef CONFIG_MALI_DEBUG + /* The PTE should be pointing to the page being migrated */ + WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr( + kbdev->mgm_dev->ops.mgm_pte_to_original_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index]))); +#endif + kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys)); + *target = kbdev->mgm_dev->ops.mgm_update_gpu_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte); + } + + kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); + + /* This function always updates a single entry inside an existing PGD, + * therefore cache maintenance is necessary and affects a single entry. + */ + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), + kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), sizeof(u64), + KBASE_MMU_OP_FLUSH_PT); + + /* Unlock MMU region. + * + * Notice that GPUs which don't issue flush commands via GPU control + * still need an additional GPU cache flush here, this time only + * for the page table, because the function call above to sync PGDs + * won't have any effect on them. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); + if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { + int as_nr = mmut->kctx->as_nr; + struct kbase_as *as = &kbdev->as[as_nr]; + + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param); + } else { + ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, + GPU_COMMAND_CACHE_CLN_INV_L2); + if (!ret) + ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param); + } + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + /* Releasing locks before checking the migration transaction error state */ + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&kbdev->pm.lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); + /* Release the transition prevention in L2 by ending the transaction */ + mmu_page_migration_transaction_end(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + + /* Checking the final migration transaction error state */ + if (ret < 0) { + dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__); + goto gpu_reset; + } + + /* Undertaking metadata transfer, while we are holding the mmu_lock */ + spin_lock(&page_md->migrate_lock); + if (level == MIDGARD_MMU_BOTTOMLEVEL) { + size_t page_array_index = + page_md->data.mapped.vpfn - page_md->data.mapped.reg->start_pfn; + + WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED); + + /* Replace page in array of pages of the physical allocation. */ + page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys; + } + /* Update the new page dma_addr with the transferred metadata from the old_page */ + page_md->dma_addr = new_dma_addr; + page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); + spin_unlock(&page_md->migrate_lock); + set_page_private(as_page(new_phys), (unsigned long)page_md); + /* Old page metatdata pointer cleared as it now owned by the new page */ + set_page_private(as_page(old_phys), 0); + +l2_state_defer_out: + kunmap(phys_to_page(pgd)); +pgd_page_map_error: +get_pgd_at_level_error: +page_state_change_out: + mutex_unlock(&mmut->mmu_lock); + + kunmap(as_page(new_phys)); +new_page_map_error: + kunmap(as_page(old_phys)); +old_page_map_error: + return ret; + +gpu_reset: + /* Unlock the MMU table before resetting the GPU and undo + * mappings. + */ + mutex_unlock(&mmut->mmu_lock); + kunmap(phys_to_page(pgd)); + kunmap(as_page(new_phys)); + kunmap(as_page(old_phys)); + + /* Reset the GPU because of an unrecoverable error in locking or flushing. */ + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + + return ret; +} + static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t pgd, int level) @@ -2746,12 +3300,14 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; u64 *pgd_page_buffer = NULL; + bool page_is_isolated = false; + struct page *p = phys_to_page(pgd); lockdep_assert_held(&mmut->mmu_lock); - pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + pgd_page = kmap_atomic(p); /* kmap_atomic should NEVER fail. */ - if (WARN_ON(pgd_page == NULL)) + if (WARN_ON_ONCE(pgd_page == NULL)) return; if (level < MIDGARD_MMU_BOTTOMLEVEL) { /* Copy the page to our preallocated buffer so that we can minimize @@ -2761,6 +3317,12 @@ static void mmu_teardown_level(struct kbase_device *kbdev, memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); } + /* When page migration is enabled, kbase_region_tracker_term() would ensure + * there are no pages left mapped on the GPU for a context. Hence the count + * of valid entries is expected to be zero here. + */ + if (kbase_page_migration_enabled && mmut->kctx) + WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page)); /* Invalidate page after copying */ mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); kunmap_atomic(pgd_page); @@ -2779,7 +3341,12 @@ static void mmu_teardown_level(struct kbase_device *kbdev, } } - kbase_mmu_free_pgd(kbdev, mmut, pgd); + /* Top level PGD page is excluded from migration process. */ + if (level != MIDGARD_MMU_TOPLEVEL) + page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); + + if (likely(!page_is_isolated)) + kbase_mmu_free_pgd(kbdev, mmut, pgd); } int kbase_mmu_init(struct kbase_device *const kbdev, @@ -2836,6 +3403,10 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { int level; + WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID), + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables", + mmut->kctx->tgid, mmut->kctx->id); + if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) { mutex_lock(&mmut->mmu_lock); mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL); @@ -2881,7 +3452,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + dev_warn(kbdev->dev, "%s: kmap failure", __func__); return 0; } @@ -3035,8 +3606,7 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data) #ifdef CONFIG_MALI_ARBITER_SUPPORT /* check if we still have GPU */ if (unlikely(kbase_is_gpu_removed(kbdev))) { - dev_dbg(kbdev->dev, - "%s: GPU has been removed\n", __func__); + dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); return; diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h index 5330306..602a3f9 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.h +++ b/mali_kbase/mmu/mali_kbase_mmu.h @@ -29,6 +29,7 @@ struct kbase_context; struct kbase_mmu_table; +struct kbase_va_region; /** * enum kbase_caller_mmu_sync_info - MMU-synchronous caller info. @@ -132,24 +133,56 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev, int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, const u64 start_vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int group_id, u64 *dirty_pgds); -int kbase_mmu_insert_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info); + unsigned long flags, int group_id, u64 *dirty_pgds, + struct kbase_va_region *reg, bool ignore_page_migration); +int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, + int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg, bool ignore_page_migration); int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, size_t nr, unsigned long flags, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info); int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, int as_nr); + struct tagged_addr *phys, size_t nr, int as_nr, + bool ignore_page_migration); int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int const group_id); /** + * kbase_mmu_migrate_page - Migrate GPU mappings and content between memory pages + * + * @old_phys: Old physical page to be replaced. + * @new_phys: New physical page used to replace old physical page. + * @old_dma_addr: DMA address of the old page. + * @new_dma_addr: DMA address of the new page. + * @level: MMU page table level of the provided PGD. + * + * The page migration process is made of 2 big steps: + * + * 1) Copy the content of the old page to the new page. + * 2) Remap the virtual page, that is: replace either the ATE (if the old page + * was a regular page) or the PTE (if the old page was used as a PGD) in the + * MMU page table with the new page. + * + * During the process, the MMU region is locked to prevent GPU access to the + * virtual memory page that is being remapped. + * + * Before copying the content of the old page to the new page and while the + * MMU region is locked, a GPU cache flush is performed to make sure that + * pending GPU writes are finalized to the old page before copying. + * That is necessary because otherwise there's a risk that GPU writes might + * be finalized to the old page, and not new page, after migration. + * The MMU region is unlocked only at the end of the migration operation. + * + * Return: 0 on success, otherwise an error code. + */ +int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys, + dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level); + +/** * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt. * * @kbdev: Pointer to the kbase device for which bus fault was reported. diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h index 09b3fa8..63277bc 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw.h +++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h @@ -105,6 +105,22 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as); /** + * kbase_mmu_hw_do_lock - Issue LOCK command to the MMU and program + * the LOCKADDR register. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * hwaccess_lock needs to be held when calling this function. + * + * Return: 0 if issuing the command was successful, otherwise an error code. + */ +int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + +/** * kbase_mmu_hw_do_unlock_no_addr - Issue UNLOCK command to the MMU without * programming the LOCKADDR register and wait * for it to complete before returning. @@ -114,6 +130,9 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, * @op_param: Pointer to struct containing information about the MMU * operation to perform. * + * This function should be called for GPU where GPU command is used to flush + * the cache(s) instead of MMU command. + * * Return: 0 if issuing the command was successful, otherwise an error code. */ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, @@ -145,7 +164,7 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, * GPUs where MMU command to flush the cache(s) is deprecated. * mmu_hw_mutex needs to be held when calling this function. * - * Return: Zero if the operation was successful, non-zero otherwise. + * Return: 0 if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); @@ -164,7 +183,7 @@ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, * Both mmu_hw_mutex and hwaccess_lock need to be held when calling this * function. * - * Return: Zero if the operation was successful, non-zero otherwise. + * Return: 0 if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); @@ -181,7 +200,7 @@ int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as * specified inside @op_param. GPU command is used to flush the cache(s) * instead of the MMU command. * - * Return: Zero if the operation was successful, non-zero otherwise. + * Return: 0 if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c index 527588e..68bc697 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c @@ -424,6 +424,14 @@ static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, return ret; } +int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return mmu_hw_do_lock(kbdev, as, op_param); +} + int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { diff --git a/mali_kbase/tests/include/kutf/kutf_helpers.h b/mali_kbase/tests/include/kutf/kutf_helpers.h index c4c713c..3f68efa 100644 --- a/mali_kbase/tests/include/kutf/kutf_helpers.h +++ b/mali_kbase/tests/include/kutf/kutf_helpers.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,6 +31,7 @@ */ #include <kutf/kutf_suite.h> +#include <linux/device.h> /** * kutf_helper_pending_input() - Check any pending lines sent by user space @@ -81,4 +82,28 @@ int kutf_helper_input_enqueue(struct kutf_context *context, */ void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context); +/** + * kutf_helper_ignore_dmesg() - Write message in dmesg to instruct parser + * to ignore errors, until the counterpart + * is written to dmesg to stop ignoring errors. + * @dev: Device pointer to write to dmesg using. + * + * This function writes "Start ignoring dmesg warnings" to dmesg, which + * the parser will read and not log any errors. Only to be used in cases where + * we expect an error to be produced in dmesg but that we do not want to be + * flagged as an error. + */ +void kutf_helper_ignore_dmesg(struct device *dev); + +/** + * kutf_helper_stop_ignoring_dmesg() - Write message in dmesg to instruct parser + * to stop ignoring errors. + * @dev: Device pointer to write to dmesg using. + * + * This function writes "Stop ignoring dmesg warnings" to dmesg, which + * the parser will read and continue to log any errors. Counterpart to + * kutf_helper_ignore_dmesg(). + */ +void kutf_helper_stop_ignoring_dmesg(struct device *dev); + #endif /* _KERNEL_UTF_HELPERS_H_ */ diff --git a/mali_kbase/tests/kutf/kutf_helpers.c b/mali_kbase/tests/kutf/kutf_helpers.c index d207d1c..4273619 100644 --- a/mali_kbase/tests/kutf/kutf_helpers.c +++ b/mali_kbase/tests/kutf/kutf_helpers.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -127,3 +127,15 @@ void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context) { kutf_helper_input_enqueue(context, NULL, 0); } + +void kutf_helper_ignore_dmesg(struct device *dev) +{ + dev_info(dev, "KUTF: Start ignoring dmesg warnings\n"); +} +EXPORT_SYMBOL(kutf_helper_ignore_dmesg); + +void kutf_helper_stop_ignoring_dmesg(struct device *dev) +{ + dev_info(dev, "KUTF: Stop ignoring dmesg warnings\n"); +} +EXPORT_SYMBOL(kutf_helper_stop_ignoring_dmesg); diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c index 644d69b..359d063 100644 --- a/mali_kbase/tl/mali_kbase_timeline_io.c +++ b/mali_kbase/tl/mali_kbase_timeline_io.c @@ -30,6 +30,11 @@ #include <linux/version_compat_defs.h> #include <linux/anon_inodes.h> +/* Explicitly include epoll header for old kernels. Not required from 4.16. */ +#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE +#include <uapi/linux/eventpoll.h> +#endif + /* The timeline stream file operations functions. */ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, size_t size, loff_t *f_pos); |