diff options
author | Sidath Senanayake <sidaths@google.com> | 2019-04-10 14:37:00 +0200 |
---|---|---|
committer | Sidath Senanayake <sidaths@google.com> | 2019-04-10 14:37:00 +0200 |
commit | e972f6531ef8c9d01eae567f52db4f0fd37d1428 (patch) | |
tree | 52df0c2e2665e00e4fe5822ddb50df1a72e24cd0 /mali_kbase | |
parent | a970431fa55f99aba31ea4263fdc8e70019a9ccd (diff) | |
download | gpu-e972f6531ef8c9d01eae567f52db4f0fd37d1428.tar.gz |
Mali Bifrost DDK r17p0 KMD
Provenance:
789dfe7c7 (collaborate/EAC/b_r17p0)
BX304L01B-BU-00000-r17p0-01rel0
BX304L06A-BU-00000-r17p0-01rel0
BX304X07X-BU-00000-r17p0-01rel0
Signed-off-by: Sidath Senanayake <sidaths@google.com>
Change-Id: Iff5bea2d96207a6e72d5e533e772c24a7adbdc31
Diffstat (limited to 'mali_kbase')
71 files changed, 4281 insertions, 2282 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index 8e73e1f..09674bf 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r16p0-01rel0" +MALI_RELEASE_NAME ?= "r17p0-01rel0" # Paths required for build KBASE_PATH = $(src) @@ -35,6 +35,8 @@ MALI_UNIT_TEST ?= 0 MALI_KERNEL_TEST_API ?= 0 MALI_COVERAGE ?= 0 CONFIG_MALI_PLATFORM_NAME ?= "devicetree" +# MALI_CSF_LT_V10: Temporary for hybrid integration config +MALI_CSF_LT_V10 ?= 0 # Set up our defines, which will be passed to gcc DEFINES = \ @@ -43,6 +45,7 @@ DEFINES = \ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ -DMALI_COVERAGE=$(MALI_COVERAGE) \ + -DMALI_CSF_LT_V10=$(MALI_CSF_LT_V10) \ -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" ifeq ($(KBUILD_EXTMOD),) @@ -65,7 +68,9 @@ SRC := \ mali_kbase_device.c \ mali_kbase_cache_policy.c \ mali_kbase_mem.c \ + mali_kbase_mem_pool_group.c \ mali_kbase_mmu.c \ + mali_kbase_native_mgm.c \ mali_kbase_ctx_sched.c \ mali_kbase_jd.c \ mali_kbase_jd_debugfs.c \ @@ -102,6 +107,7 @@ SRC := \ mali_kbase_smc.c \ mali_kbase_mem_pool.c \ mali_kbase_mem_pool_debugfs.c \ + mali_kbase_debugfs_helper.c \ mali_kbase_tlstream.c \ mali_kbase_strings.c \ mali_kbase_as_fault_debugfs.c \ diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig index 7c10016..2b35d83 100644 --- a/mali_kbase/Kconfig +++ b/mali_kbase/Kconfig @@ -106,6 +106,20 @@ config MALI_CORESTACK If unsure, say N. +config MALI_PLATFORM_POWER_DOWN_ONLY + bool "Support disabling the power down of individual cores" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Enabling this feature will let the driver avoid power down of the + shader cores, the tiler, and the L2 cache. + The entire GPU would be powered down at once through the platform + specific code. + This may be required for certain platform configurations only. + This also limits the available power policies. + + If unsure, say N. + config MALI_DEBUG bool "Debug build" depends on MALI_MIDGARD && MALI_EXPERT @@ -182,6 +196,16 @@ config MALI_PWRSOFT_765 If using kernel >= v4.10 then say N, otherwise if devfreq cooling changes have been backported say Y to avoid compilation errors. +config MALI_MEMORY_FULLY_BACKED + bool "Memory fully physically-backed" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This option enables full physical backing of all virtual + memory allocations in the kernel. Notice that this build + option only affects allocations of grow-on-GPU-page-fault + memory. + # Instrumentation options. config MALI_JOB_DUMP diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig index 46dca14..1f61c9f 100644 --- a/mali_kbase/Mconfig +++ b/mali_kbase/Mconfig @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -70,12 +70,8 @@ config MALI_DMA_FENCE config MALI_PLATFORM_NAME depends on MALI_MIDGARD string "Platform name" - default "arndale" if PLATFORM_ARNDALE - default "arndale_octa" if PLATFORM_ARNDALE_OCTA - default "rk" if PLATFORM_FIREFLY default "hisilicon" if PLATFORM_HIKEY960 default "hisilicon" if PLATFORM_HIKEY970 - default "vexpress" if PLATFORM_VEXPRESS default "devicetree" help Enter the name of the desired platform configuration directory to @@ -108,6 +104,20 @@ config MALI_CORESTACK If unsure, say N. +config MALI_PLATFORM_POWER_DOWN_ONLY + bool "Support disabling the power down of individual cores" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Enabling this feature will let the driver avoid power down of the + shader cores, the tiler, and the L2 cache. + The entire GPU would be powered down at once through the platform + specific code. + This may be required for certain platform configurations only. + This also limits the available power policies. + + If unsure, say N. + config MALI_DEBUG bool "Debug build" depends on MALI_MIDGARD && MALI_EXPERT @@ -133,20 +143,41 @@ config MALI_FENCE_DEBUG The timeout can be changed at runtime through the js_soft_timeout device attribute, where the timeout is specified in milliseconds. -config MALI_ERROR_INJECT - bool "Error injection" - depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI - default n +choice + prompt "Error injection level" + default MALI_ERROR_INJECT_NONE help Enables insertion of errors to test module failure and recovery mechanisms. +config MALI_ERROR_INJECT_NONE + bool "disabled" + help + Error injection is disabled. + +config MALI_ERROR_INJECT_TRACK_LIST + bool "error track list" + depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI + help + Errors to inject are pre-configured by the user. + config MALI_ERROR_INJECT_RANDOM - bool "Random error injection" - depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI && MALI_ERROR_INJECT - default n + bool "random error injection" + depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI help Injected errors are random, rather than user-driven. +endchoice + +config MALI_ERROR_INJECT_ON + string + default "0" if MALI_ERROR_INJECT_NONE + default "1" if MALI_ERROR_INJECT_TRACK_LIST + default "2" if MALI_ERROR_INJECT_RANDOM + +config MALI_ERROR_INJECT + bool + default y if !MALI_ERROR_INJECT_NONE + config MALI_SYSTEM_TRACE bool "Enable system event tracing support" depends on MALI_MIDGARD && MALI_EXPERT @@ -182,6 +213,13 @@ config MALI_PWRSOFT_765 not merged in mainline kernel yet. So this define helps to guard those parts of the code. +config MALI_MEMORY_FULLY_BACKED + bool "Memory fully physically-backed" + default n + help + This option enables full backing of all virtual memory allocations + for the kernel. This only affects grow-on-GPU-page-fault memory. + # Instrumentation options. # config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig. diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index 5ade012..df50dd6 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -156,7 +156,7 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) kbdev->current_freq = freq; kbdev->current_core_mask = core_mask; - KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq); + KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)nominal_freq); return err; } diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index 79c04d9..31633ec 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -48,10 +48,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) goto out_err; - /* Override core availability policy to ensure all cores are available - */ - kbase_pm_ca_instr_enable(kbdev); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { @@ -183,8 +179,6 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) kbdev->hwcnt.addr = 0ULL; kbdev->hwcnt.addr_bytes = 0ULL; - kbase_pm_ca_instr_disable(kbdev); - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index acd4a5a..e7bfa39 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,9 +27,6 @@ #include <mali_kbase.h> #include <mali_kbase_config.h> #include <mali_midg_regmap.h> -#if defined(CONFIG_MALI_GATOR_SUPPORT) -#include <mali_kbase_gator.h> -#endif #include <mali_kbase_tlstream.h> #include <mali_kbase_hw.h> #include <mali_kbase_hwaccess_jm.h> @@ -185,19 +182,19 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, (u32)affinity); -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_job_slots_event( - GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), - kctx, kbase_jd_atom_id(kctx, katom)); -#endif - KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head, + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx, + js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START); + + KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head, affinity, cfg); KBASE_TLSTREAM_TL_RET_CTX_LPU( + kbdev, kctx, &kbdev->gpu_props.props.raw_props.js_features[ katom->slot_nr]); - KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]); + KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); KBASE_TLSTREAM_TL_RET_ATOM_LPU( + kbdev, katom, &kbdev->gpu_props.props.raw_props.js_features[js], "ctx_nr,atom_nr"); @@ -269,6 +266,7 @@ static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, int js) { KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( + kbdev, &kbdev->gpu_props.props.raw_props.js_features[js]); } @@ -310,12 +308,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) JOB_SLOT_REG(i, JS_STATUS)); if (completion_code == BASE_JD_EVENT_STOPPED) { -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_job_slots_event( - GATOR_MAKE_EVENT( - GATOR_JOB_SLOT_SOFT_STOPPED, i), - NULL, 0); -#endif + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( + kbdev, NULL, + i, 0, TL_JS_EVENT_SOFT_STOP); kbasep_trace_tl_event_lpu_softstop( kbdev, i); @@ -543,7 +538,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; /* Mark the point where we issue the soft-stop command */ - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom); + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { int i; @@ -772,6 +767,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, if (katom->sched_priority > priority) { if (!stop_sent) KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( + kbdev, target_katom); kbase_job_slot_softstop(kbdev, js, katom); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index c714582..5ea6130 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -322,11 +322,11 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, kbase_pm_release_gpu_cycle_counter_nolock(kbdev); /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom, + KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, katom, &kbdev->gpu_props.props.raw_props.js_features [katom->slot_nr]); - KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]); - KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx, + KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); + KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, kctx, &kbdev->gpu_props.props.raw_props.js_features [katom->slot_nr]); @@ -546,7 +546,7 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, kbase_pm_protected_override_disable(kbdev); kbase_pm_update_cores_state_nolock(kbdev); - KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev); + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); if (err) { /* * Failed to switch into protected mode, resume @@ -600,7 +600,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, switch (katom[idx]->protected_state.enter) { case KBASE_ATOM_ENTER_PROTECTED_CHECK: - KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev); + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev); /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV * should ensure that we are not already transitiong, and that * there are no atoms currently on the GPU. */ @@ -754,7 +754,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, switch (katom[idx]->protected_state.exit) { case KBASE_ATOM_EXIT_PROTECTED_CHECK: - KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev); + KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev, kbdev); /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV * should ensure that we are not already transitiong, and that * there are no atoms currently on the GPU. */ @@ -1095,12 +1095,12 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; if (completion_code == BASE_JD_EVENT_STOPPED) { - KBASE_TLSTREAM_TL_NRET_ATOM_LPU(next_katom, + KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom, &kbdev->gpu_props.props.raw_props.js_features [next_katom->slot_nr]); - KBASE_TLSTREAM_TL_NRET_ATOM_AS(next_katom, &kbdev->as + KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, next_katom, &kbdev->as [next_katom->kctx->as_nr]); - KBASE_TLSTREAM_TL_NRET_CTX_LPU(next_katom->kctx, + KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx, &kbdev->gpu_props.props.raw_props.js_features [next_katom->slot_nr]); } @@ -1390,7 +1390,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbdev->protected_mode_hwcnt_disabled = false; - KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev); + KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev, kbdev); } kbdev->protected_mode_transition = false; diff --git a/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c b/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c index ba5bf72..b926f4c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -284,7 +284,7 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), (current_setup->memattr >> 32) & 0xFFFFFFFFUL); - KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, + KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as, current_setup->transtab, current_setup->memattr, transcfg); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index c19a0d1..9509875 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -314,6 +314,7 @@ static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) */ backend->hwcnt_disabled = true; kbase_pm_update_state(kbdev); + kbase_backend_slot_update(kbdev); } else { /* PM state was updated while we were doing the disable, * so we need to undo the disable we just performed. diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c index 2cb9452..41f6429 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c @@ -27,6 +27,9 @@ #include <mali_kbase.h> #include <mali_kbase_pm.h> #include <backend/gpu/mali_kbase_pm_internal.h> +#ifdef CONFIG_MALI_NO_MALI +#include <backend/gpu/mali_kbase_model_dummy.h> +#endif int kbase_pm_ca_init(struct kbase_device *kbdev) { @@ -75,15 +78,12 @@ unlock: u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) { +#ifdef CONFIG_MALI_DEVFREQ struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; +#endif lockdep_assert_held(&kbdev->hwaccess_lock); - /* All cores must be enabled when instrumentation is in use */ - if (pm_backend->instr_enabled) - return kbdev->gpu_props.props.raw_props.shader_present & - kbdev->pm.debug_core_mask_all; - #ifdef CONFIG_MALI_DEVFREQ return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all; #else @@ -94,14 +94,13 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); -void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) +u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); - kbdev->pm.backend.instr_enabled = true; -} -void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - kbdev->pm.backend.instr_enabled = false; +#ifdef CONFIG_MALI_NO_MALI + return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1); +#else + return kbdev->pm.backend.pm_shaders_core_mask; +#endif } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h index 274581d..5423e96 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h @@ -75,23 +75,15 @@ void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning); /** - * kbase_pm_ca_instr_enable - Enable override for instrumentation + * kbase_pm_ca_get_instr_core_mask - Get the PM state sync-ed shaders core mask * * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * This overrides the output of the core availability policy, ensuring that all - * cores are available - */ -void kbase_pm_ca_instr_enable(struct kbase_device *kbdev); - -/** - * kbase_pm_ca_instr_disable - Disable override for instrumentation - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * Returns a mask of the PM state synchronised shader cores for arranging + * HW performance counter dumps * - * This disables any previously enabled override, and resumes normal policy - * functionality + * Return: The bit mask of PM state synchronised cores */ -void kbase_pm_ca_instr_disable(struct kbase_device *kbdev); +u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev); #endif /* _KBASE_PM_CA_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h index 0cff22e..e11cb75 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h @@ -94,10 +94,16 @@ enum kbase_l2_core_state { * * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have - * been requested to power on + * been requested to power on and hwcnt + * is being disabled * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been - * requested to power on - * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on + * requested to power on. + * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt + * already enabled. + * @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks + * are on, hwcnt disabled, and checks + * to powering down or re-enabling + * hwcnt. * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to * power off, but they remain on for the * duration of the hysteresis timer @@ -118,6 +124,7 @@ enum kbase_shader_core_state { KBASE_SHADERS_OFF_CORESTACK_PEND_ON, KBASE_SHADERS_PEND_ON_CORESTACK_ON, KBASE_SHADERS_ON_CORESTACK_ON, + KBASE_SHADERS_ON_CORESTACK_ON_RECHECK, KBASE_SHADERS_WAIT_OFF_CORESTACK_ON, KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON, KBASE_SHADERS_PEND_OFF_CORESTACK_ON, @@ -245,8 +252,10 @@ union kbase_pm_policy_data { * machines * @gpu_powered: Set to true when the GPU is powered and register * accesses are possible, false otherwise - * @instr_enabled: Set to true when instrumentation is enabled, - * false otherwise + * @pm_shaders_core_mask: Shader PM state synchronised shaders core mask. It + * holds the cores enabled in a hardware counters dump, + * and may differ from @shaders_avail when under different + * states and transitions. * @cg1_disabled: Set if the policy wants to keep the second core group * powered off * @driver_ready_for_irqs: Debug state indicating whether sufficient @@ -332,7 +341,7 @@ struct kbase_pm_backend_data { bool gpu_powered; - bool instr_enabled; + u64 pm_shaders_core_mask; bool cg1_disabled; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index 2e6599a..3184e57 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,7 +29,6 @@ #include <mali_kbase.h> #include <mali_kbase_config_defaults.h> #include <mali_midg_regmap.h> -#include <mali_kbase_gator.h> #include <mali_kbase_tlstream.h> #include <mali_kbase_pm.h> #include <mali_kbase_config_defaults.h> @@ -49,7 +48,7 @@ bool corestack_driver_control = true; #else bool corestack_driver_control; /* Default value of 0/false */ #endif -module_param(corestack_driver_control, bool, 0000); +module_param(corestack_driver_control, bool, 0444); MODULE_PARM_DESC(corestack_driver_control, "Let the driver power on/off the GPU core stack independently " "without involving the Power Domain Controller. This should " @@ -57,8 +56,12 @@ MODULE_PARM_DESC(corestack_driver_control, "to the Mali GPU is known to be problematic."); KBASE_EXPORT_TEST_API(corestack_driver_control); -bool platform_power_down_only = PLATFORM_POWER_DOWN_ONLY; -module_param(platform_power_down_only, bool, 0000); +#ifdef CONFIG_MALI_PLATFORM_POWER_DOWN_ONLY +bool platform_power_down_only = true; +#else +bool platform_power_down_only; /* Default value of 0/false */ +#endif +module_param(platform_power_down_only, bool, 0444); MODULE_PARM_DESC(platform_power_down_only, "Disable power down of individual cores."); @@ -233,14 +236,6 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, reg = core_type_to_reg(core_type, action); KBASE_DEBUG_ASSERT(reg); -#if defined(CONFIG_MALI_GATOR_SUPPORT) - if (cores) { - if (action == ACTION_PWRON) - kbase_trace_mali_pm_power_on(core_type, cores); - else if (action == ACTION_PWROFF) - kbase_trace_mali_pm_power_off(core_type, cores); - } -#endif if (cores) { u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); @@ -249,7 +244,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, state |= cores; else if (action == ACTION_PWROFF) state &= ~cores; - KBASE_TLSTREAM_AUX_PM_STATE(core_type, state); + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state); } /* Tracing */ @@ -433,6 +428,28 @@ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); +static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* See if we can get away with disabling hwcnt + * atomically, otherwise kick off a worker. + */ + if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) { + backend->hwcnt_disabled = true; + } else { +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, + &backend->hwcnt_disable_work); +#else + queue_work(system_highpri_wq, + &backend->hwcnt_disable_work); +#endif + } +} + static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; @@ -575,22 +592,9 @@ static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev) break; } - /* See if we can get away with disabling hwcnt - * atomically, otherwise kick off a worker. - */ backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) { - if (kbase_hwcnt_context_disable_atomic( - kbdev->hwcnt_gpu_ctx)) - backend->hwcnt_disabled = true; - else -#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE - queue_work(system_wq, - &backend->hwcnt_disable_work); -#else - queue_work(system_highpri_wq, - &backend->hwcnt_disable_work); -#endif + kbase_pm_trigger_hwcnt_disable(kbdev); } if (backend->hwcnt_disabled) @@ -635,10 +639,9 @@ static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev) break; case KBASE_L2_RESET_WAIT: - if (!backend->in_reset) { - /* Reset complete */ + /* Reset complete */ + if (!backend->in_reset) backend->l2_state = KBASE_L2_OFF; - } break; default: @@ -761,13 +764,27 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) * i.e. off and SHADERS_ON_CORESTACK_ON. */ backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); + backend->pm_shaders_core_mask = 0; + + if (backend->shaders_desired && + backend->l2_state == KBASE_L2_ON) { + if (backend->hwcnt_desired && + !backend->hwcnt_disabled) { + /* Trigger a hwcounter dump */ + backend->hwcnt_desired = false; + kbase_pm_trigger_hwcnt_disable(kbdev); + } - if (backend->shaders_desired && backend->l2_state == KBASE_L2_ON) { - if (corestack_driver_control) - kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, - stacks_avail, ACTION_PWRON); - - backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_ON; + if (backend->hwcnt_disabled) { + if (corestack_driver_control) { + kbase_pm_invoke(kbdev, + KBASE_PM_CORE_STACK, + stacks_avail, + ACTION_PWRON); + } + backend->shaders_state = + KBASE_SHADERS_OFF_CORESTACK_PEND_ON; + } } break; @@ -777,7 +794,6 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->shaders_avail, ACTION_PWRON); backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; - } break; @@ -786,6 +802,13 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, NULL, 0u, (u32)shaders_ready); + backend->pm_shaders_core_mask = shaders_ready; + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + backend->hwcnt_disabled = false; + } backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; } break; @@ -793,7 +816,25 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_ON_CORESTACK_ON: backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); - if (!backend->shaders_desired) { + /* If shaders to change state, trigger a counter dump */ + if (!backend->shaders_desired || + (backend->shaders_avail & ~shaders_ready)) { + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) + kbase_pm_trigger_hwcnt_disable(kbdev); + backend->shaders_state = + KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; + } + break; + + case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: + backend->shaders_avail = + kbase_pm_ca_get_core_mask(kbdev); + + if (!backend->hwcnt_disabled) { + /* Wait for being disabled */ + ; + } else if (!backend->shaders_desired) { if (kbdev->pm.backend.protected_transition_override || !stt->configured_ticks || WARN_ON(stt->cancel_queued)) { @@ -821,16 +862,15 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; } - } else if (!platform_power_down_only) { + } else { if (backend->shaders_avail & ~shaders_ready) { backend->shaders_avail |= shaders_ready; kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail & ~shaders_ready, ACTION_PWRON); - backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; - } + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; } break; @@ -842,7 +882,7 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) if (backend->shaders_desired) { stt->remaining_ticks = 0; - backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; } else if (stt->remaining_ticks == 0) { backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; } @@ -873,8 +913,18 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) break; case KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: - if ((!stacks_trans && !stacks_ready) || platform_power_down_only) + if ((!stacks_trans && !stacks_ready) || + platform_power_down_only) { + /* On powered off, re-enable the hwcnt */ + backend->pm_shaders_core_mask = 0; + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + backend->hwcnt_disabled = false; + } backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + } break; case KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: @@ -945,31 +995,18 @@ static void kbase_pm_trace_power_state(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_L2)); - kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_SHADER)); - kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_TILER)); - if (corestack_driver_control) - kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_STACK)); -#endif - KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, KBASE_PM_CORE_L2, kbase_pm_get_ready_cores( kbdev, KBASE_PM_CORE_L2)); KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, KBASE_PM_CORE_SHADER, kbase_pm_get_ready_cores( kbdev, KBASE_PM_CORE_SHADER)); KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, KBASE_PM_CORE_TILER, kbase_pm_get_ready_cores( kbdev, @@ -977,6 +1014,7 @@ static void kbase_pm_trace_power_state(struct kbase_device *kbdev) if (corestack_driver_control) KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, KBASE_PM_CORE_STACK, kbase_pm_get_ready_cores( kbdev, @@ -1627,7 +1665,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); - KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev); + KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_SOFT_RESET); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.h b/mali_kbase/backend/gpu/mali_kbase_pm_policy.h index 28d258f..966fce7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.h @@ -93,12 +93,14 @@ static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, * available, and shaders are definitely not powered. */ if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON && - kbdev->pm.backend.l2_state != KBASE_L2_ON) + kbdev->pm.backend.l2_state != KBASE_L2_ON && + kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE) return false; if (shader_required && kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK) return false; return true; diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp index 2cf685c..a971143 100644 --- a/mali_kbase/build.bp +++ b/mali_kbase/build.bp @@ -3,7 +3,7 @@ * ---------------------------------------------------------------------------- * This confidential and proprietary software may be used only as authorized * by a licensing agreement from ARM Limited. - * (C) COPYRIGHT 2017-2018 ARM Limited, ALL RIGHTS RESERVED + * (C) COPYRIGHT 2017-2019 ARM Limited, ALL RIGHTS RESERVED * The entire notice above must be reproduced on all authorized copies and * copies may only be made to the extent permitted by a licensing agreement * from ARM Limited. @@ -49,10 +49,15 @@ bob_defaults { mali_pwrsoft_765: { kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"], }, + mali_memory_fully_backed: { + kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], + }, kbuild_options: [ "MALI_UNIT_TEST={{.unit_test_code}}", "MALI_CUSTOMER_RELEASE={{.release}}", "MALI_USE_CSF={{.gpu_has_csf}}", + /* MALI_CSF_LT_V10: Temporary for hybrid integration config */ + "MALI_CSF_LT_V10={{.base_hybrid_csf_lt_v10}}", "MALI_KERNEL_TEST_API={{.debug}}", ], defaults: ["kernel_defaults"], @@ -90,6 +95,9 @@ bob_kernel_module { mali_corestack: { kbuild_options: ["CONFIG_MALI_CORESTACK=y"], }, + mali_platform_power_down_only: { + kbuild_options: ["CONFIG_MALI_PLATFORM_POWER_DOWN_ONLY=y"], + }, mali_error_inject: { kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], }, diff --git a/mali_kbase/ipa/mali_kbase_ipa_simple.c b/mali_kbase/ipa/mali_kbase_ipa_simple.c index c8399ab..852559e 100644 --- a/mali_kbase/ipa/mali_kbase_ipa_simple.c +++ b/mali_kbase/ipa/mali_kbase_ipa_simple.c @@ -132,7 +132,7 @@ static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) const s64 res_big = ts[3] * t3 /* +/- 2^62 */ + ts[2] * t2 /* +/- 2^55 */ + ts[1] * t /* +/- 2^48 */ - + ts[0] * 1000; /* +/- 2^41 */ + + ts[0] * (s64)1000; /* +/- 2^41 */ /* Range: -2^60 < res_unclamped < 2^60 */ s64 res_unclamped = div_s64(res_big, 1000); diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h index 5571f84..03e326f 100644 --- a/mali_kbase/mali_base_hwconfig_features.h +++ b/mali_kbase/mali_base_hwconfig_features.h @@ -406,7 +406,6 @@ static const enum base_hw_feature base_hw_features_tTRx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -435,7 +434,6 @@ static const enum base_hw_feature base_hw_features_tNAx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -464,7 +462,6 @@ static const enum base_hw_feature base_hw_features_tBEx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -497,6 +494,34 @@ static const enum base_hw_feature base_hw_features_tULx[] = { BASE_HW_FEATURE_END }; +static const enum base_hw_feature base_hw_features_tDUx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +}; + static const enum base_hw_feature base_hw_features_tBOx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h index d7c40ef..5b3a854 100644 --- a/mali_kbase/mali_base_hwconfig_issues.h +++ b/mali_kbase/mali_base_hwconfig_issues.h @@ -1287,6 +1287,23 @@ static const enum base_hw_issue base_hw_issues_model_tULx[] = { BASE_HW_ISSUE_END }; +static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tDUx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + static const enum base_hw_issue base_hw_issues_tBOx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, diff --git a/mali_kbase/mali_base_kernel.h b/mali_kbase/mali_base_kernel.h index 70dc3c5..02a823a 100644 --- a/mali_kbase/mali_base_kernel.h +++ b/mali_kbase/mali_base_kernel.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -86,6 +86,10 @@ typedef struct base_mem_handle { * @{ */ +/* Physical memory group ID for normal usage. + */ +#define BASE_MEM_GROUP_DEFAULT (0) + /** * typedef base_mem_alloc_flags - Memory allocation, access/hint flags. * diff --git a/mali_kbase/mali_kbase_10969_workaround.c b/mali_kbase/mali_kbase_10969_workaround.c index 8d71926..118511a 100644 --- a/mali_kbase/mali_kbase_10969_workaround.c +++ b/mali_kbase/mali_kbase_10969_workaround.c @@ -65,7 +65,7 @@ int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom) kbase_gpu_vm_lock(katom->kctx); region = kbase_region_tracker_find_region_enclosing_address(katom->kctx, katom->jc); - if (!region || (region->flags & KBASE_REG_FREE)) + if (kbase_is_region_invalid_or_free(region)) goto out_unlock; page_array = kbase_get_cpu_phy_pages(region); diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h index bb2ab53..cfb9a41 100644 --- a/mali_kbase/mali_kbase_config_defaults.h +++ b/mali_kbase/mali_kbase_config_defaults.h @@ -248,20 +248,6 @@ enum { #define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */ /** - * Perform GPU power down using only platform specific code, skipping DDK power - * management. - * - * If this is non-zero then kbase will avoid powering down shader cores, the - * tiler, and the L2 cache, instead just powering down the entire GPU through - * platform specific code. This may be required for certain platform - * integrations. - * - * Note that as this prevents kbase from powering down shader cores, this limits - * the available power policies to coarse_demand and always_on. - */ -#define PLATFORM_POWER_DOWN_ONLY (0) - -/** * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms * this isn't available, so we simply define a dummy value here. If devfreq * is enabled the value will be read from there, otherwise this should be diff --git a/mali_kbase/mali_kbase_context.c b/mali_kbase/mali_kbase_context.c index 59609d7..6489a4f 100644 --- a/mali_kbase/mali_kbase_context.c +++ b/mali_kbase/mali_kbase_context.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,6 +31,7 @@ #include <mali_kbase_mem_linux.h> #include <mali_kbase_dma_fence.h> #include <mali_kbase_ctx_sched.h> +#include <mali_kbase_mem_pool_group.h> struct kbase_context * kbase_create_context(struct kbase_device *kbdev, bool is_compat) @@ -69,22 +70,11 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kctx->tgid = current->tgid; kctx->pid = current->pid; - err = kbase_mem_pool_init(&kctx->mem_pool, - kbdev->mem_pool_max_size_default, - KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, - kctx->kbdev, - &kbdev->mem_pool); + err = kbase_mem_pool_group_init(&kctx->mem_pools, kbdev, + &kbdev->mem_pool_defaults, &kbdev->mem_pools); if (err) goto free_kctx; - err = kbase_mem_pool_init(&kctx->lp_mem_pool, - (kbdev->mem_pool_max_size_default >> 9), - KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, - kctx->kbdev, - &kbdev->lp_mem_pool); - if (err) - goto free_mem_pool; - err = kbase_mem_evictable_init(kctx); if (err) goto free_both_pools; @@ -103,7 +93,6 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (err) goto free_jd; - atomic_set(&kctx->drain_pending, 0); mutex_init(&kctx->reg_lock); @@ -115,13 +104,14 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) spin_lock_init(&kctx->waiting_soft_jobs_lock); err = kbase_dma_fence_init(kctx); if (err) - goto free_kcpu_wq; + goto free_event; err = kbase_mmu_init(kbdev, &kctx->mmu, kctx); if (err) goto term_dma_fence; - p = kbase_mem_alloc_page(&kctx->mem_pool); + p = kbase_mem_alloc_page( + &kctx->mem_pools.small[BASE_MEM_GROUP_DEFAULT]); if (!p) goto no_sink_page; kctx->aliasing_sink_page = as_tagged(page_to_phys(p)); @@ -130,7 +120,6 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kctx->cookies = KBASE_COOKIE_MASK; - /* Make sure page 0 is not used... */ err = kbase_region_tracker_init(kctx); if (err) @@ -143,6 +132,8 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) err = kbase_jit_init(kctx); if (err) goto no_jit; + + #ifdef CONFIG_GPU_TRACEPOINTS atomic_set(&kctx->jctx.work_id, 0); #endif @@ -163,12 +154,13 @@ no_jit: no_sticky: kbase_region_tracker_term(kctx); no_region_tracker: - kbase_mem_pool_free(&kctx->mem_pool, p, false); + kbase_mem_pool_free( + &kctx->mem_pools.small[BASE_MEM_GROUP_DEFAULT], p, false); no_sink_page: kbase_mmu_term(kbdev, &kctx->mmu); term_dma_fence: kbase_dma_fence_term(kctx); -free_kcpu_wq: +free_event: kbase_event_cleanup(kctx); free_jd: /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ @@ -177,9 +169,7 @@ free_jd: deinit_evictable: kbase_mem_evictable_deinit(kctx); free_both_pools: - kbase_mem_pool_term(&kctx->lp_mem_pool); -free_mem_pool: - kbase_mem_pool_term(&kctx->mem_pool); + kbase_mem_pool_group_term(&kctx->mem_pools); free_kctx: vfree(kctx); out: @@ -216,7 +206,7 @@ void kbase_destroy_context(struct kbase_context *kctx) * thread. */ kbase_pm_context_active(kbdev); - kbase_mem_pool_mark_dying(&kctx->mem_pool); + kbase_mem_pool_group_mark_dying(&kctx->mem_pools); kbase_jd_zap_context(kctx); @@ -255,7 +245,8 @@ void kbase_destroy_context(struct kbase_context *kctx) /* drop the aliasing sink page now that it can't be mapped anymore */ p = as_page(kctx->aliasing_sink_page); - kbase_mem_pool_free(&kctx->mem_pool, p, false); + kbase_mem_pool_free(&kctx->mem_pools.small[BASE_MEM_GROUP_DEFAULT], + p, false); /* free pending region setups */ pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK; @@ -273,7 +264,6 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_region_tracker_term(kctx); kbase_gpu_vm_unlock(kctx); - /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ kbasep_js_kctx_term(kctx); @@ -292,8 +282,9 @@ void kbase_destroy_context(struct kbase_context *kctx) dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); kbase_mem_evictable_deinit(kctx); - kbase_mem_pool_term(&kctx->mem_pool); - kbase_mem_pool_term(&kctx->lp_mem_pool); + + kbase_mem_pool_group_term(&kctx->mem_pools); + WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); vfree(kctx); diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index 382285f..96f4d01 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,6 +40,7 @@ #include "mali_kbase_debug_mem_view.h" #include "mali_kbase_mem.h" #include "mali_kbase_mem_pool_debugfs.h" +#include "mali_kbase_debugfs_helper.h" #if !MALI_CUSTOMER_RELEASE #include "mali_kbase_regs_dump_debugfs.h" #endif /* !MALI_CUSTOMER_RELEASE */ @@ -94,6 +95,8 @@ #include <linux/opp.h> #endif +#include <linux/pm_runtime.h> + #include <mali_kbase_tlstream.h> #include <mali_kbase_as_fault_debugfs.h> @@ -443,7 +446,7 @@ static int kbase_open(struct inode *inode, struct file *filp) kbase_debug_job_fault_context_init(kctx); - kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool, &kctx->lp_mem_pool); + kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); kbase_jit_debugfs_init(kctx); #endif /* CONFIG_DEBUG_FS */ @@ -459,6 +462,7 @@ static int kbase_open(struct inode *inode, struct file *filp) element->kctx = kctx; list_add(&element->link, &kbdev->kctx_list); KBASE_TLSTREAM_TL_NEW_CTX( + kbdev, element->kctx, element->kctx->id, (u32)(element->kctx->tgid)); @@ -482,7 +486,7 @@ static int kbase_release(struct inode *inode, struct file *filp) struct kbasep_kctx_list_element *element, *tmp; bool found_element = false; - KBASE_TLSTREAM_TL_DEL_CTX(kctx); + KBASE_TLSTREAM_TL_DEL_CTX(kbdev, kctx); #ifdef CONFIG_DEBUG_FS kbasep_mem_profile_debugfs_remove(kctx); @@ -823,12 +827,12 @@ static int kbase_api_get_context_id(struct kbase_context *kctx, static int kbase_api_tlstream_acquire(struct kbase_context *kctx, struct kbase_ioctl_tlstream_acquire *acquire) { - return kbase_tlstream_acquire(kctx, acquire->flags); + return kbase_tlstream_acquire(kctx->kbdev, acquire->flags); } static int kbase_api_tlstream_flush(struct kbase_context *kctx) { - kbase_tlstream_flush_streams(); + kbase_tlstream_flush_streams(kctx->kbdev->timeline); return 0; } @@ -1055,6 +1059,7 @@ static int kbase_api_tlstream_test(struct kbase_context *kctx, struct kbase_ioctl_tlstream_test *test) { kbase_tlstream_test( + kctx->kbdev, test->tpw_count, test->msg_delay, test->msg_count, @@ -1066,7 +1071,7 @@ static int kbase_api_tlstream_test(struct kbase_context *kctx, static int kbase_api_tlstream_stats(struct kbase_context *kctx, struct kbase_ioctl_tlstream_stats *stats) { - kbase_tlstream_stats( + kbase_tlstream_stats(kctx->kbdev->timeline, &stats->bytes_collected, &stats->bytes_generated); @@ -2620,41 +2625,33 @@ static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout, set_reset_timeout); - static ssize_t show_mem_pool_size(struct device *dev, struct device_attribute *attr, char * const buf) { - struct kbase_device *kbdev; - ssize_t ret; + struct kbase_device *const kbdev = to_kbase_device(dev); - kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - ret = scnprintf(buf, PAGE_SIZE, "%zu\n", - kbase_mem_pool_size(&kbdev->mem_pool)); - - return ret; + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_size); } static ssize_t set_mem_pool_size(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct kbase_device *kbdev; - size_t new_size; + struct kbase_device *const kbdev = to_kbase_device(dev); int err; - kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - err = kstrtoul(buf, 0, (unsigned long *)&new_size); - if (err) - return err; - - kbase_mem_pool_trim(&kbdev->mem_pool, new_size); + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_trim); - return count; + return err ? err : count; } static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size, @@ -2663,37 +2660,30 @@ static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size, static ssize_t show_mem_pool_max_size(struct device *dev, struct device_attribute *attr, char * const buf) { - struct kbase_device *kbdev; - ssize_t ret; + struct kbase_device *const kbdev = to_kbase_device(dev); - kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - ret = scnprintf(buf, PAGE_SIZE, "%zu\n", - kbase_mem_pool_max_size(&kbdev->mem_pool)); - - return ret; + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); } static ssize_t set_mem_pool_max_size(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct kbase_device *kbdev; - size_t new_max_size; + struct kbase_device *const kbdev = to_kbase_device(dev); int err; - kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - err = kstrtoul(buf, 0, (unsigned long *)&new_max_size); - if (err) - return -EINVAL; + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); - kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size); - - return count; + return err ? err : count; } static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, @@ -2712,13 +2702,14 @@ static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, static ssize_t show_lp_mem_pool_size(struct device *dev, struct device_attribute *attr, char * const buf) { - struct kbase_device *kbdev; + struct kbase_device *const kbdev = to_kbase_device(dev); - kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_size(&kbdev->lp_mem_pool)); + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_size); } /** @@ -2736,21 +2727,17 @@ static ssize_t show_lp_mem_pool_size(struct device *dev, static ssize_t set_lp_mem_pool_size(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct kbase_device *kbdev; - unsigned long new_size; + struct kbase_device *const kbdev = to_kbase_device(dev); int err; - kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - err = kstrtoul(buf, 0, &new_size); - if (err) - return err; - - kbase_mem_pool_trim(&kbdev->lp_mem_pool, new_size); + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_trim); - return count; + return err ? err : count; } static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size, @@ -2769,13 +2756,14 @@ static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size, static ssize_t show_lp_mem_pool_max_size(struct device *dev, struct device_attribute *attr, char * const buf) { - struct kbase_device *kbdev; + struct kbase_device *const kbdev = to_kbase_device(dev); - kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_max_size(&kbdev->lp_mem_pool)); + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); } /** @@ -2792,21 +2780,17 @@ static ssize_t show_lp_mem_pool_max_size(struct device *dev, static ssize_t set_lp_mem_pool_max_size(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct kbase_device *kbdev; - unsigned long new_max_size; + struct kbase_device *const kbdev = to_kbase_device(dev); int err; - kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - err = kstrtoul(buf, 0, &new_max_size); - if (err) - return -EINVAL; - - kbase_mem_pool_set_max_size(&kbdev->lp_mem_pool, new_max_size); + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); - return count; + return err ? err : count; } static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size, @@ -3395,6 +3379,45 @@ static const struct file_operations fops_protected_debug_mode = { .llseek = default_llseek, }; +static int kbase_device_debugfs_mem_pool_max_size_show(struct seq_file *sfile, + void *data) +{ + CSTD_UNUSED(data); + return kbase_debugfs_helper_seq_read(sfile, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_config_debugfs_max_size); +} + +static ssize_t kbase_device_debugfs_mem_pool_max_size_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + int err = 0; + + CSTD_UNUSED(ppos); + err = kbase_debugfs_helper_seq_write(file, ubuf, count, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_config_debugfs_set_max_size); + + return err ? err : count; +} + +static int kbase_device_debugfs_mem_pool_max_size_open(struct inode *in, + struct file *file) +{ + return single_open(file, kbase_device_debugfs_mem_pool_max_size_show, + in->i_private); +} + +static const struct file_operations + kbase_device_debugfs_mem_pool_max_size_fops = { + .owner = THIS_MODULE, + .open = kbase_device_debugfs_mem_pool_max_size_open, + .read = seq_read, + .write = kbase_device_debugfs_mem_pool_max_size_write, + .llseek = seq_lseek, + .release = single_release, +}; + static int kbase_device_debugfs_init(struct kbase_device *kbdev) { struct dentry *debugfs_ctx_defaults_directory; @@ -3451,9 +3474,15 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) debugfs_ctx_defaults_directory, &kbdev->infinite_cache_active_default); - debugfs_create_size_t("mem_pool_max_size", 0644, + debugfs_create_file("mem_pool_max_size", 0644, + debugfs_ctx_defaults_directory, + &kbdev->mem_pool_defaults.small, + &kbase_device_debugfs_mem_pool_max_size_fops); + + debugfs_create_file("lp_mem_pool_max_size", 0644, debugfs_ctx_defaults_directory, - &kbdev->mem_pool_max_size_default); + &kbdev->mem_pool_defaults.large, + &kbase_device_debugfs_mem_pool_max_size_fops); if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { debugfs_create_file("protected_debug_mode", S_IRUGO, @@ -3686,7 +3715,7 @@ static int kbase_platform_device_remove(struct platform_device *pdev) } if (kbdev->inited_subsys & inited_tlstream) { - kbase_tlstream_term(); + kbase_tlstream_term(kbdev->timeline); kbdev->inited_subsys &= ~inited_tlstream; } @@ -3834,6 +3863,7 @@ static int kbase_platform_device_probe(struct platform_device *pdev) scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, kbase_dev_nr); + kbdev->id = kbase_dev_nr; kbase_disjoint_init(kbdev); @@ -3899,7 +3929,8 @@ static int kbase_platform_device_probe(struct platform_device *pdev) } kbdev->inited_subsys |= inited_js; - err = kbase_tlstream_init(); + atomic_set(&kbdev->timeline_is_enabled, 0); + err = kbase_tlstream_init(&kbdev->timeline, &kbdev->timeline_is_enabled); if (err) { dev_err(kbdev->dev, "Timeline stream initialization failed\n"); kbase_platform_device_remove(pdev); @@ -3930,7 +3961,9 @@ static int kbase_platform_device_probe(struct platform_device *pdev) kbdev->inited_subsys |= inited_hwcnt_gpu_ctx; err = kbase_hwcnt_virtualizer_init( - kbdev->hwcnt_gpu_ctx, &kbdev->hwcnt_gpu_virt); + kbdev->hwcnt_gpu_ctx, + KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, + &kbdev->hwcnt_gpu_virt); if (err) { dev_err(kbdev->dev, "GPU hwcnt virtualizer initialization failed\n"); @@ -4195,6 +4228,10 @@ static int kbase_device_runtime_idle(struct device *dev) if (kbdev->pm.backend.callback_power_runtime_idle) return kbdev->pm.backend.callback_power_runtime_idle(kbdev); + /* Just need to update the device's last busy mark. Kernel will respect + * the autosuspend delay and so won't suspend the device immediately. + */ + pm_runtime_mark_last_busy(kbdev->dev); return 0; } #endif /* KBASE_PM_RUNTIME */ @@ -4281,51 +4318,30 @@ MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); -void kbase_trace_mali_pm_status(u32 event, u64 value) -{ - trace_mali_pm_status(event, value); -} - -void kbase_trace_mali_pm_power_off(u32 event, u64 value) -{ - trace_mali_pm_power_off(event, value); -} - -void kbase_trace_mali_pm_power_on(u32 event, u64 value) -{ - trace_mali_pm_power_on(event, value); -} - -void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id) -{ - trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id); -} - -void kbase_trace_mali_page_fault_insert_pages(int event, u32 value) +void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value) { - trace_mali_page_fault_insert_pages(event, value); + trace_mali_pm_status(dev_id, event, value); } -void kbase_trace_mali_mmu_as_in_use(int event) +void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id) { - trace_mali_mmu_as_in_use(event); + trace_mali_job_slots_event(dev_id, event, + (kctx != NULL ? kctx->tgid : 0), + (kctx != NULL ? kctx->pid : 0), + atom_id); } -void kbase_trace_mali_mmu_as_released(int event) +void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value) { - trace_mali_mmu_as_released(event); + trace_mali_page_fault_insert_pages(dev_id, event, value); } -void kbase_trace_mali_total_alloc_pages_change(long long int event) +void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event) { - trace_mali_total_alloc_pages_change(event); + trace_mali_total_alloc_pages_change(dev_id, event); } #endif /* CONFIG_MALI_GATOR_SUPPORT */ #ifdef CONFIG_MALI_SYSTEM_TRACE diff --git a/mali_kbase/mali_kbase_debugfs_helper.c b/mali_kbase/mali_kbase_debugfs_helper.c new file mode 100644 index 0000000..747364d --- /dev/null +++ b/mali_kbase/mali_kbase_debugfs_helper.c @@ -0,0 +1,187 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/uaccess.h> + +#include "mali_kbase_debugfs_helper.h" + +#ifdef CONFIG_DEBUG_FS + +/* Arbitrary maximum size to prevent user space allocating too much kernel + * memory + */ +#define DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE (256u) + +/** + * set_attr_from_string - Parse a string to set elements of an array + * + * This is the core of the implementation of + * kbase_debugfs_helper_set_attr_from_string. The only difference between the + * two functions is that this one requires the input string to be writable. + * + * @buf: Input string to parse. Must be nul-terminated! + * @array: Address of an object that can be accessed like an array. + * @nelems: Number of elements in the array. + * @set_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +static int set_attr_from_string( + char *const buf, + void *const array, size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn) +{ + size_t index, err = 0; + char *ptr = buf; + + for (index = 0; index < nelems && *ptr; ++index) { + unsigned long new_size; + size_t len; + char sep; + + /* Drop leading spaces */ + while (*ptr == ' ') + ptr++; + + len = strcspn(ptr, "\n "); + if (len == 0) { + /* No more values (allow this) */ + break; + } + + /* Substitute a nul terminator for a space character + * to make the substring valid for kstrtoul. + */ + sep = ptr[len]; + if (sep == ' ') + ptr[len++] = '\0'; + + err = kstrtoul(ptr, 0, &new_size); + if (err) + break; + + /* Skip the substring (including any premature nul terminator) + */ + ptr += len; + + set_attr_fn(array, index, new_size); + } + + return err; +} + +int kbase_debugfs_helper_set_attr_from_string( + const char *const buf, void *const array, size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn) +{ + char *const wbuf = kstrdup(buf, GFP_KERNEL); + int err = 0; + + if (!wbuf) + return -ENOMEM; + + err = set_attr_from_string(wbuf, array, nelems, + set_attr_fn); + + kfree(wbuf); + return err; +} + +ssize_t kbase_debugfs_helper_get_attr_to_string( + char *const buf, size_t const size, + void *const array, size_t const nelems, + kbase_debugfs_helper_get_attr_fn const get_attr_fn) +{ + ssize_t total = 0; + size_t index; + + for (index = 0; index < nelems; ++index) { + const char *postfix = " "; + + if (index == (nelems-1)) + postfix = "\n"; + + total += scnprintf(buf + total, size - total, "%zu%s", + get_attr_fn(array, index), postfix); + } + + return total; +} + +int kbase_debugfs_helper_seq_write(struct file *const file, + const char __user *const ubuf, size_t const count, + size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn) +{ + const struct seq_file *const sfile = file->private_data; + void *const array = sfile->private; + int err = 0; + char *buf; + + if (WARN_ON(!array)) + return -EINVAL; + + if (WARN_ON(count > DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE)) + return -EINVAL; + + buf = kmalloc(count + 1, GFP_KERNEL); + if (ZERO_OR_NULL_PTR(buf)) + return -ENOMEM; + + if (copy_from_user(buf, ubuf, count)) { + kfree(buf); + return -EFAULT; + } + + buf[count] = '\0'; + err = set_attr_from_string(buf, + array, nelems, set_attr_fn); + kfree(buf); + + return err; +} + +int kbase_debugfs_helper_seq_read(struct seq_file *const sfile, + size_t const nelems, + kbase_debugfs_helper_get_attr_fn const get_attr_fn) +{ + void *const array = sfile->private; + size_t index; + + if (WARN_ON(!array)) + return -EINVAL; + + for (index = 0; index < nelems; ++index) { + const char *postfix = " "; + + if (index == (nelems-1)) + postfix = "\n"; + + seq_printf(sfile, "%zu%s", get_attr_fn(array, index), postfix); + } + return 0; +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/mali_kbase/mali_kbase_debugfs_helper.h b/mali_kbase/mali_kbase_debugfs_helper.h new file mode 100644 index 0000000..c3c9efa --- /dev/null +++ b/mali_kbase/mali_kbase_debugfs_helper.h @@ -0,0 +1,141 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUGFS_HELPER_H_ +#define _KBASE_DEBUGFS_HELPER_H_ + +/** + * typedef kbase_debugfs_helper_set_attr_fn - Type of function to set an + * attribute value from an array + * + * @array: Address of an object that can be accessed like an array. + * @index: An element index. The valid range depends on the use-case. + * @value: Attribute value to be set. + */ +typedef void (*kbase_debugfs_helper_set_attr_fn)( + void *array, size_t index, size_t value); + +/** + * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an + * array + * + * The given function is called once for each attribute value found in the + * input string. It is not an error if the string specifies fewer attribute + * values than the specified number of array elements. + * + * The number base of each attribute value is detected automatically + * according to the standard rules (e.g. prefix "0x" for hexadecimal). + * Attribute values are separated by one or more space characters. + * Additional leading and trailing spaces are ignored. + * + * @buf: Input string to parse. Must be nul-terminated! + * @array: Address of an object that can be accessed like an array. + * @nelems: Number of elements in the array. + * @set_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_debugfs_helper_set_attr_from_string( + const char *buf, void *array, size_t nelems, + kbase_debugfs_helper_set_attr_fn set_attr_fn); + +/** + * typedef kbase_debugfs_helper_get_attr_fn - Type of function to get an + * attribute value from an array + * + * @array: Address of an object that can be accessed like an array. + * @index: An element index. The valid range depends on the use-case. + * + * Return: Value of attribute. + */ +typedef size_t (*kbase_debugfs_helper_get_attr_fn)( + void *array, size_t index); + +/** + * kbase_debugfs_helper_get_attr_to_string - Construct a formatted string + * from elements in an array + * + * The given function is called once for each array element to get the + * value of the attribute to be inspected. The attribute values are + * written to the buffer as a formatted string of decimal numbers + * separated by spaces and terminated by a linefeed. + * + * @buf: Buffer in which to store the formatted output string. + * @size: The size of the buffer, in bytes. + * @array: Address of an object that can be accessed like an array. + * @nelems: Number of elements in the array. + * @get_attr_fn: Function to be called back for each array element. + * + * Return: Number of characters written excluding the nul terminator. + */ +ssize_t kbase_debugfs_helper_get_attr_to_string( + char *buf, size_t size, void *array, size_t nelems, + kbase_debugfs_helper_get_attr_fn get_attr_fn); + +/** + * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an + * array + * + * The virtual file must have been opened by calling single_open and passing + * the address of an object that can be accessed like an array. + * + * The given function is called once for each array element to get the + * value of the attribute to be inspected. The attribute values are + * written to the buffer as a formatted string of decimal numbers + * separated by spaces and terminated by a linefeed. + * + * @sfile: A virtual file previously opened by calling single_open. + * @nelems: Number of elements in the array. + * @get_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_debugfs_helper_seq_read( + struct seq_file *const sfile, size_t const nelems, + kbase_debugfs_helper_get_attr_fn const get_attr_fn); + +/** + * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an + * array + * + * The virtual file must have been opened by calling single_open and passing + * the address of an object that can be accessed like an array. + * + * The given function is called once for each attribute value found in the + * data written to the virtual file. For further details, refer to the + * description of set_attr_from_string. + * + * @file: A virtual file previously opened by calling single_open. + * @ubuf: Source address in user space. + * @count: Number of bytes written to the virtual file. + * @nelems: Number of elements in the array. + * @set_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_debugfs_helper_seq_write(struct file *const file, + const char __user *const ubuf, size_t const count, + size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn); + +#endif /*_KBASE_DEBUGFS_HELPER_H_ */ + diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index a135742..cbb406a 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,7 +43,6 @@ #include <mali_kbase_hwcnt_backend_gpu.h> #include <protected_mode_switcher.h> - #include <linux/atomic.h> #include <linux/mempool.h> #include <linux/slab.h> @@ -70,6 +69,7 @@ #include <linux/clk.h> #include <linux/regulator/consumer.h> +#include <linux/memory_group_manager.h> #if defined(CONFIG_PM_RUNTIME) || \ (defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) @@ -230,6 +230,14 @@ /* Reset the GPU after each atom completion */ #define KBASE_SERIALIZE_RESET (1 << 2) +/* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer + * clients, to reduce undesired system load. + * If a virtualizer client requests a dump within this threshold period after + * some other client has performed a dump, a new dump won't be performed and + * the accumulated counter values for that client will be returned instead. + */ +#define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC) + /* Forward declarations */ struct kbase_context; struct kbase_device; @@ -1080,6 +1088,9 @@ struct kbase_pm_device_data { * @max_size: Maximum number of free pages in the pool * @order: order = 0 refers to a pool of 4 KB pages * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager, if present. Immutable. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). * @pool_lock: Lock protecting the pool - must be held when modifying * @cur_size and @page_list * @page_list: List of free pages in the pool @@ -1096,7 +1107,8 @@ struct kbase_mem_pool { struct kbase_device *kbdev; size_t cur_size; size_t max_size; - size_t order; + u8 order; + u8 group_id; spinlock_t pool_lock; struct list_head page_list; struct shrinker reclaim; @@ -1108,6 +1120,50 @@ struct kbase_mem_pool { }; /** + * struct kbase_mem_pool_group - a complete set of physical memory pools. + * + * Memory pools are used to allow efficient reallocation of previously-freed + * physical pages. A pair of memory pools is initialized for each physical + * memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays + * should be indexed by physical memory group ID, the meaning of which is + * defined by the systems integrator. + * + * @small: Array of objects containing the state for pools of 4 KiB size + * physical pages. + * @large: Array of objects containing the state for pools of 2 MiB size + * physical pages. + */ +struct kbase_mem_pool_group { + struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS]; + struct kbase_mem_pool large[MEMORY_GROUP_MANAGER_NR_GROUPS]; +}; + +/** + * struct kbase_mem_pool_config - Initial configuration for a physical memory + * pool + * + * @max_size: Maximum number of free pages that the pool can hold. + */ +struct kbase_mem_pool_config { + size_t max_size; +}; + +/** + * struct kbase_mem_pool_group_config - Initial configuration for a complete + * set of physical memory pools + * + * This array should be indexed by physical memory group ID, the meaning + * of which is defined by the systems integrator. + * + * @small: Array of initial configuration for pools of 4 KiB pages. + * @large: Array of initial configuration for pools of 2 MiB pages. + */ +struct kbase_mem_pool_group_config { + struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS]; + struct kbase_mem_pool_config large[MEMORY_GROUP_MANAGER_NR_GROUPS]; +}; + +/** * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP * frequency, and real frequency and core mask * @opp_freq: Nominal OPP frequency @@ -1163,8 +1219,8 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); -#define DEVNAME_SIZE 16 +#define DEVNAME_SIZE 16 /** * struct kbase_device - Object representing an instance of GPU platform device, @@ -1199,6 +1255,8 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * for GPU device * @devname: string containing the name used for GPU device instance, * miscellaneous device is registered using the same name. + * @id: Unique identifier for the device, indicates the number of + * devices which have been created so far. * @model: Pointer, valid only when Driver is compiled to not access * the real GPU Hw, to the dummy model which tries to mimic * to some extent the state & behavior of GPU Hw in response @@ -1223,14 +1281,16 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * Job Scheduler, which is global to the device and is not * tied to any particular struct kbase_context running on * the device - * @mem_pool: Object containing the state for global pool of 4KB size - * physical pages which can be used by all the contexts. - * @lp_mem_pool: Object containing the state for global pool of 2MB size - * physical pages which can be used by all the contexts. + * @mem_pools: Global pools of free physical memory pages which can + * be used by all the contexts. * @memdev: keeps track of the in use physical pages allocated by * the Driver. * @mmu_mode: Pointer to the object containing methods for programming * the MMU, depending on the type of MMU supported by Hw. + * @mgm_dev: Pointer to the memory group manager device attached + * to the GPU device. This points to an internal memory + * group manager if no platform-specific memory group + * manager was retrieved through device tree. * @as: Array of objects representing address spaces of GPU. * @as_free: Bitpattern of free/available GPU address spaces. * @as_to_kctx: Array of pointers to struct kbase_context, having @@ -1254,7 +1314,10 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * @hwaccess_lock must be held when calling * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. - * @vinstr_ctx: vinstr context created per device + * @vinstr_ctx: vinstr context created per device. + * @timeline_is_enabled: Non zero, if there is at least one timeline client, + * zero otherwise. + * @timeline: Timeline context created per device. * @trace_lock: Lock to serialize the access to trace buffer. * @trace_first_out: Index/offset in the trace buffer at which the first * unread message is present. @@ -1312,7 +1375,6 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * previously entered protected mode. * @ipa: Top level structure for IPA, containing pointers to both * configured & fallback models. - * @timeline: Stores the global timeline tracking information. * @job_fault_debug: Flag to control the dumping of debug data for job faults, * set when the 'job_fault' debugfs file is opened. * @mali_debugfs_directory: Root directory for the debugfs files created by the driver @@ -1358,8 +1420,8 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * power on for GPU is started. * @infinite_cache_active_default: Set to enable using infinite cache for all the * allocations of a new context. - * @mem_pool_max_size_default: Initial/default value for the maximum size of both - * types of pool created for a new context. + * @mem_pool_defaults: Default configuration for the group of memory pools + * created for a new context. * @current_gpu_coherency_mode: coherency mode in use, which can be different * from @system_coherency, when using protected mode. * @system_coherency: coherency mode as retrieved from the device tree. @@ -1427,6 +1489,7 @@ struct kbase_device { struct regulator *regulator; #endif char devname[DEVNAME_SIZE]; + u32 id; #ifdef CONFIG_MALI_NO_MALI void *model; @@ -1440,11 +1503,12 @@ struct kbase_device { struct kbase_pm_device_data pm; struct kbasep_js_device_data js_data; - struct kbase_mem_pool mem_pool; - struct kbase_mem_pool lp_mem_pool; + struct kbase_mem_pool_group mem_pools; struct kbasep_mem_device memdev; struct kbase_mmu_mode const *mmu_mode; + struct memory_group_manager_device *mgm_dev; + struct kbase_as as[BASE_MAX_NR_AS]; u16 as_free; /* Bitpattern of free Address Spaces */ struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; @@ -1480,6 +1544,9 @@ struct kbase_device { struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; struct kbase_vinstr_context *vinstr_ctx; + atomic_t timeline_is_enabled; + struct kbase_timeline *timeline; + #if KBASE_TRACE_ENABLE spinlock_t trace_lock; u16 trace_first_out; @@ -1584,7 +1651,7 @@ struct kbase_device { #else u32 infinite_cache_active_default; #endif - size_t mem_pool_max_size_default; + struct kbase_mem_pool_group_config mem_pool_defaults; u32 current_gpu_coherency_mode; u32 system_coherency; @@ -1735,7 +1802,6 @@ struct kbase_sub_alloc { DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K); }; - /** * struct kbase_context - Object representing an entity, among which GPU is * scheduled and gets its own GPU address space. @@ -1745,7 +1811,7 @@ struct kbase_sub_alloc { * @kbdev: Pointer to the Kbase device for which the context is created. * @mmu: Structure holding details of the MMU tables for this * context - * @id: Unique indentifier for the context, indicates the number of + * @id: Unique identifier for the context, indicates the number of * contexts which have been created for the device so far. * @api_version: contains the version number for User/kernel interface, * used for compatibility check. @@ -1819,10 +1885,7 @@ struct kbase_sub_alloc { * when special tracking page is freed by userspace where it * is reset to 0. * @permanent_mapped_pages: Usage count of permanently mapped memory - * @mem_pool: Object containing the state for the context specific pool of - * 4KB size physical pages. - * @lp_mem_pool: Object containing the state for the context specific pool of - * 2MB size physical pages. + * @mem_pools: Context-specific pools of free physical memory pages. * @reclaim: Shrinker object registered with the kernel containing * the pointer to callback function which is invoked under * low memory conditions. In the callback function Driver @@ -1869,8 +1932,6 @@ struct kbase_sub_alloc { * or U64_MAX if the EXEC_VA zone is uninitialized. * @gpu_va_end: End address of the GPU va space (in 4KB page units) * @jit_va: Indicates if a JIT_VA zone has been created. - * @timeline: Object tracking the number of atoms currently in flight for - * the context and thread group id of the process, i.e. @tgid. * @mem_profile_data: Buffer containing the profiling information provided by * Userspace, can be read through the mem_profile debugfs file. * @mem_profile_size: Size of the @mem_profile_data. @@ -2007,8 +2068,7 @@ struct kbase_context { atomic_t nonmapped_pages; unsigned long permanent_mapped_pages; - struct kbase_mem_pool mem_pool; - struct kbase_mem_pool lp_mem_pool; + struct kbase_mem_pool_group mem_pools; struct shrinker reclaim; struct list_head evict_list; @@ -2026,7 +2086,6 @@ struct kbase_context { atomic_t refcount; - /* NOTE: * * Flags are in jctx.sched_info.ctx.flags diff --git a/mali_kbase/mali_kbase_event.c b/mali_kbase/mali_kbase_event.c index 3c9cef3..874170d 100644 --- a/mali_kbase/mali_kbase_event.c +++ b/mali_kbase/mali_kbase_event.c @@ -29,6 +29,7 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) { struct base_jd_udata data; + struct kbase_device *kbdev; lockdep_assert_held(&kctx->jctx.lock); @@ -36,10 +37,11 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru KBASE_DEBUG_ASSERT(katom != NULL); KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + kbdev = kctx->kbdev; data = katom->udata; - KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx); - KBASE_TLSTREAM_TL_DEL_ATOM(katom); + KBASE_TLSTREAM_TL_NRET_ATOM_CTX(kbdev, katom, kctx); + KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom); katom->status = KBASE_JD_ATOM_STATE_UNUSED; @@ -170,6 +172,8 @@ static int kbase_event_coalesce(struct kbase_context *kctx) void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) { + struct kbase_device *kbdev = ctx->kbdev; + if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { if (atom->event_code == BASE_JD_EVENT_DONE) { /* Don't report the event */ @@ -183,7 +187,7 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) kbase_event_process_noreport(ctx, atom); return; } - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED); if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { /* Don't report the event until other event(s) have completed */ mutex_lock(&ctx->event_mutex); diff --git a/mali_kbase/mali_kbase_gator.h b/mali_kbase/mali_kbase_gator.h index 4f54817..3e3fb06 100644 --- a/mali_kbase/mali_kbase_gator.h +++ b/mali_kbase/mali_kbase_gator.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,20 +30,18 @@ #ifndef _KBASE_GATOR_H_ #define _KBASE_GATOR_H_ -#ifdef CONFIG_MALI_GATOR_SUPPORT -#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16)) #define GATOR_JOB_SLOT_START 1 #define GATOR_JOB_SLOT_STOP 2 #define GATOR_JOB_SLOT_SOFT_STOPPED 3 -void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id); -void kbase_trace_mali_pm_status(u32 event, u64 value); -void kbase_trace_mali_pm_power_off(u32 event, u64 value); -void kbase_trace_mali_pm_power_on(u32 event, u64 value); -void kbase_trace_mali_page_fault_insert_pages(int event, u32 value); -void kbase_trace_mali_mmu_as_in_use(int event); -void kbase_trace_mali_mmu_as_released(int event); -void kbase_trace_mali_total_alloc_pages_change(long long int event); +#ifdef CONFIG_MALI_GATOR_SUPPORT + +#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16)) + +void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id); +void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value); +void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value); +void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event); #endif /* CONFIG_MALI_GATOR_SUPPORT */ diff --git a/mali_kbase/mali_kbase_gator_api.c b/mali_kbase/mali_kbase_gator_api.c index 1719edf..1d97662 100644 --- a/mali_kbase/mali_kbase_gator_api.c +++ b/mali_kbase/mali_kbase_gator_api.c @@ -89,14 +89,18 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) hardware_counters = hardware_counters_mali_tGOx; count = ARRAY_SIZE(hardware_counters_mali_tGOx); break; - case GPU_ID2_PRODUCT_TKAX: - hardware_counters = hardware_counters_mali_tKAx; - count = ARRAY_SIZE(hardware_counters_mali_tKAx); - break; case GPU_ID2_PRODUCT_TTRX: hardware_counters = hardware_counters_mali_tTRx; count = ARRAY_SIZE(hardware_counters_mali_tTRx); break; + case GPU_ID2_PRODUCT_TNAX: + hardware_counters = hardware_counters_mali_tNAx; + count = ARRAY_SIZE(hardware_counters_mali_tNAx); + break; + case GPU_ID2_PRODUCT_TBEX: + hardware_counters = hardware_counters_mali_tBEx; + count = ARRAY_SIZE(hardware_counters_mali_tBEx); + break; default: hardware_counters = NULL; count = 0; diff --git a/mali_kbase/mali_kbase_gator_api.h b/mali_kbase/mali_kbase_gator_api.h index bd0589e..d57a418 100644 --- a/mali_kbase/mali_kbase_gator_api.h +++ b/mali_kbase/mali_kbase_gator_api.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,9 +30,10 @@ /* This define is used by the gator kernel module compile to select which DDK * API calling convention to use. If not defined (legacy DDK) gator assumes * version 1. The version to DDK release mapping is: - * Version 1 API: DDK versions r1px, r2px - * Version 2 API: DDK versions r3px, r4px - * Version 3 API: DDK version r5p0 and newer + * Version 1 API: DDK versions m_r1px, m_r2px + * Version 2 API: DDK versions m_r3px, m_r4px + * Version 3 API: DDK versions m_r5px-m_r28px and b_r1px-b_r16px + * Version 4 API: DDK versions b_r17p0 and newer * * API Usage * ========= @@ -115,7 +116,7 @@ * init_names() returned a non-NULL value. **/ -#define MALI_DDK_GATOR_API_VERSION 3 +#define MALI_DDK_GATOR_API_VERSION 4 enum hwc_type { JM_BLOCK = 0, diff --git a/mali_kbase/mali_kbase_gator_hwcnt_names.h b/mali_kbase/mali_kbase_gator_hwcnt_names.h index 5d38c7b..764c9d1 100644 --- a/mali_kbase/mali_kbase_gator_hwcnt_names.h +++ b/mali_kbase/mali_kbase_gator_hwcnt_names.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -2171,8 +2171,10 @@ static const char * const hardware_counters_mali_t88x[] = { #include "mali_kbase_gator_hwcnt_names_tgox.h" -#include "mali_kbase_gator_hwcnt_names_tkax.h" - #include "mali_kbase_gator_hwcnt_names_ttrx.h" +#include "mali_kbase_gator_hwcnt_names_tnax.h" + +#include "mali_kbase_gator_hwcnt_names_tbex.h" + #endif diff --git a/mali_kbase/mali_kbase_gator_hwcnt_names_tbex.h b/mali_kbase/mali_kbase_gator_hwcnt_names_tbex.h new file mode 100644 index 0000000..592bb2e --- /dev/null +++ b/mali_kbase/mali_kbase_gator_hwcnt_names_tbex.h @@ -0,0 +1,296 @@ +/* + * + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * This header was autogenerated, it should not be edited. + */ + +#ifndef _KBASE_GATOR_HWCNT_NAMES_TBEX_H_ +#define _KBASE_GATOR_HWCNT_NAMES_TBEX_H_ + +static const char * const hardware_counters_mali_tBEx[] = { + /* Performance counters for the Job Manager */ + "", + "", + "", + "", + "TBEx_MESSAGES_SENT", + "TBEx_MESSAGES_RECEIVED", + "TBEx_GPU_ACTIVE", + "TBEx_IRQ_ACTIVE", + "TBEx_JS0_JOBS", + "TBEx_JS0_TASKS", + "TBEx_JS0_ACTIVE", + "", + "TBEx_JS0_WAIT_READ", + "TBEx_JS0_WAIT_ISSUE", + "TBEx_JS0_WAIT_DEPEND", + "TBEx_JS0_WAIT_FINISH", + "TBEx_JS1_JOBS", + "TBEx_JS1_TASKS", + "TBEx_JS1_ACTIVE", + "", + "TBEx_JS1_WAIT_READ", + "TBEx_JS1_WAIT_ISSUE", + "TBEx_JS1_WAIT_DEPEND", + "TBEx_JS1_WAIT_FINISH", + "TBEx_JS2_JOBS", + "TBEx_JS2_TASKS", + "TBEx_JS2_ACTIVE", + "", + "TBEx_JS2_WAIT_READ", + "TBEx_JS2_WAIT_ISSUE", + "TBEx_JS2_WAIT_DEPEND", + "TBEx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /* Performance counters for the Tiler */ + "", + "", + "", + "", + "TBEx_TILER_ACTIVE", + "TBEx_JOBS_PROCESSED", + "TBEx_TRIANGLES", + "TBEx_LINES", + "TBEx_POINTS", + "TBEx_FRONT_FACING", + "TBEx_BACK_FACING", + "TBEx_PRIM_VISIBLE", + "TBEx_PRIM_CULLED", + "TBEx_PRIM_CLIPPED", + "TBEx_PRIM_SAT_CULLED", + "TBEx_BIN_ALLOC_INIT", + "TBEx_BIN_ALLOC_OVERFLOW", + "TBEx_BUS_READ", + "", + "TBEx_BUS_WRITE", + "TBEx_LOADING_DESC", + "TBEx_IDVS_POS_SHAD_REQ", + "TBEx_IDVS_POS_SHAD_WAIT", + "TBEx_IDVS_POS_SHAD_STALL", + "TBEx_IDVS_POS_FIFO_FULL", + "TBEx_PREFETCH_STALL", + "TBEx_VCACHE_HIT", + "TBEx_VCACHE_MISS", + "TBEx_VCACHE_LINE_WAIT", + "TBEx_VFETCH_POS_READ_WAIT", + "TBEx_VFETCH_VERTEX_WAIT", + "TBEx_VFETCH_STALL", + "TBEx_PRIMASSY_STALL", + "TBEx_BBOX_GEN_STALL", + "TBEx_IDVS_VBU_HIT", + "TBEx_IDVS_VBU_MISS", + "TBEx_IDVS_VBU_LINE_DEALLOCATE", + "TBEx_IDVS_VAR_SHAD_REQ", + "TBEx_IDVS_VAR_SHAD_STALL", + "TBEx_BINNER_STALL", + "TBEx_ITER_STALL", + "TBEx_COMPRESS_MISS", + "TBEx_COMPRESS_STALL", + "TBEx_PCACHE_HIT", + "TBEx_PCACHE_MISS", + "TBEx_PCACHE_MISS_STALL", + "TBEx_PCACHE_EVICT_STALL", + "TBEx_PMGR_PTR_WR_STALL", + "TBEx_PMGR_PTR_RD_STALL", + "TBEx_PMGR_CMD_WR_STALL", + "TBEx_WRBUF_ACTIVE", + "TBEx_WRBUF_HIT", + "TBEx_WRBUF_MISS", + "TBEx_WRBUF_NO_FREE_LINE_STALL", + "TBEx_WRBUF_NO_AXI_ID_STALL", + "TBEx_WRBUF_AXI_STALL", + "", + "", + "", + "TBEx_UTLB_TRANS", + "TBEx_UTLB_TRANS_HIT", + "TBEx_UTLB_TRANS_STALL", + "TBEx_UTLB_TRANS_MISS_DELAY", + "TBEx_UTLB_MMU_REQ", + + /* Performance counters for the Shader Core */ + "", + "", + "", + "", + "TBEx_FRAG_ACTIVE", + "TBEx_FRAG_PRIMITIVES", + "TBEx_FRAG_PRIM_RAST", + "TBEx_FRAG_FPK_ACTIVE", + "TBEx_FRAG_STARVING", + "TBEx_FRAG_WARPS", + "TBEx_FRAG_PARTIAL_WARPS", + "TBEx_FRAG_QUADS_RAST", + "TBEx_FRAG_QUADS_EZS_TEST", + "TBEx_FRAG_QUADS_EZS_UPDATE", + "TBEx_FRAG_QUADS_EZS_KILL", + "TBEx_FRAG_LZS_TEST", + "TBEx_FRAG_LZS_KILL", + "TBEx_WARP_REG_SIZE_64", + "TBEx_FRAG_PTILES", + "TBEx_FRAG_TRANS_ELIM", + "TBEx_QUAD_FPK_KILLER", + "TBEx_FULL_QUAD_WARPS", + "TBEx_COMPUTE_ACTIVE", + "TBEx_COMPUTE_TASKS", + "TBEx_COMPUTE_WARPS", + "TBEx_COMPUTE_STARVING", + "TBEx_EXEC_CORE_ACTIVE", + "TBEx_EXEC_INSTR_FMA", + "TBEx_EXEC_INSTR_CVT", + "TBEx_EXEC_INSTR_SFU", + "TBEx_EXEC_INSTR_MSG", + "TBEx_EXEC_INSTR_DIVERGED", + "TBEx_EXEC_ICACHE_MISS", + "TBEx_EXEC_STARVE_ARITH", + "TBEx_CALL_BLEND_SHADER", + "TBEx_TEX_MSGI_NUM_FLITS", + "TBEx_TEX_DFCH_CLK_STALLED", + "TBEx_TEX_TFCH_CLK_STALLED", + "TBEx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", + "TBEx_TEX_FILT_NUM_OPERATIONS", + "TBEx_TEX_FILT_NUM_FXR_OPERATIONS", + "TBEx_TEX_FILT_NUM_FST_OPERATIONS", + "TBEx_TEX_MSGO_NUM_MSG", + "TBEx_TEX_MSGO_NUM_FLITS", + "TBEx_LS_MEM_READ_FULL", + "TBEx_LS_MEM_READ_SHORT", + "TBEx_LS_MEM_WRITE_FULL", + "TBEx_LS_MEM_WRITE_SHORT", + "TBEx_LS_MEM_ATOMIC", + "TBEx_VARY_INSTR", + "TBEx_VARY_SLOT_32", + "TBEx_VARY_SLOT_16", + "TBEx_ATTR_INSTR", + "TBEx_ARITH_INSTR_FP_MUL", + "TBEx_BEATS_RD_FTC", + "TBEx_BEATS_RD_FTC_EXT", + "TBEx_BEATS_RD_LSC", + "TBEx_BEATS_RD_LSC_EXT", + "TBEx_BEATS_RD_TEX", + "TBEx_BEATS_RD_TEX_EXT", + "TBEx_BEATS_RD_OTHER", + "TBEx_BEATS_WR_LSC_OTHER", + "TBEx_BEATS_WR_TIB", + "TBEx_BEATS_WR_LSC_WB", + + /* Performance counters for the Memory System */ + "", + "", + "", + "", + "TBEx_MMU_REQUESTS", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TBEx_L2_RD_MSG_IN", + "TBEx_L2_RD_MSG_IN_STALL", + "TBEx_L2_WR_MSG_IN", + "TBEx_L2_WR_MSG_IN_STALL", + "TBEx_L2_SNP_MSG_IN", + "TBEx_L2_SNP_MSG_IN_STALL", + "TBEx_L2_RD_MSG_OUT", + "TBEx_L2_RD_MSG_OUT_STALL", + "TBEx_L2_WR_MSG_OUT", + "TBEx_L2_ANY_LOOKUP", + "TBEx_L2_READ_LOOKUP", + "TBEx_L2_WRITE_LOOKUP", + "TBEx_L2_EXT_SNOOP_LOOKUP", + "TBEx_L2_EXT_READ", + "TBEx_L2_EXT_READ_NOSNP", + "TBEx_L2_EXT_READ_UNIQUE", + "TBEx_L2_EXT_READ_BEATS", + "TBEx_L2_EXT_AR_STALL", + "TBEx_L2_EXT_AR_CNT_Q1", + "TBEx_L2_EXT_AR_CNT_Q2", + "TBEx_L2_EXT_AR_CNT_Q3", + "TBEx_L2_EXT_RRESP_0_127", + "TBEx_L2_EXT_RRESP_128_191", + "TBEx_L2_EXT_RRESP_192_255", + "TBEx_L2_EXT_RRESP_256_319", + "TBEx_L2_EXT_RRESP_320_383", + "TBEx_L2_EXT_WRITE", + "TBEx_L2_EXT_WRITE_NOSNP_FULL", + "TBEx_L2_EXT_WRITE_NOSNP_PTL", + "TBEx_L2_EXT_WRITE_SNP_FULL", + "TBEx_L2_EXT_WRITE_SNP_PTL", + "TBEx_L2_EXT_WRITE_BEATS", + "TBEx_L2_EXT_W_STALL", + "TBEx_L2_EXT_AW_CNT_Q1", + "TBEx_L2_EXT_AW_CNT_Q2", + "TBEx_L2_EXT_AW_CNT_Q3", + "TBEx_L2_EXT_SNOOP", + "TBEx_L2_EXT_SNOOP_STALL", + "TBEx_L2_EXT_SNOOP_RESP_CLEAN", + "TBEx_L2_EXT_SNOOP_RESP_DATA", + "TBEx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", +}; + +#endif /* _KBASE_GATOR_HWCNT_NAMES_TBEX_H_ */ diff --git a/mali_kbase/mali_kbase_gator_hwcnt_names_tkax.h b/mali_kbase/mali_kbase_gator_hwcnt_names_tkax.h deleted file mode 100644 index 73db45c..0000000 --- a/mali_kbase/mali_kbase_gator_hwcnt_names_tkax.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _KBASE_GATOR_HWCNT_NAMES_TKAX_H_ -#define _KBASE_GATOR_HWCNT_NAMES_TKAX_H_ - -static const char * const hardware_counters_mali_tKAx[] = { - /* Performance counters for the Job Manager */ - "", - "", - "", - "", - "TKAx_MESSAGES_SENT", - "TKAx_MESSAGES_RECEIVED", - "TKAx_GPU_ACTIVE", - "TKAx_IRQ_ACTIVE", - "TKAx_JS0_JOBS", - "TKAx_JS0_TASKS", - "TKAx_JS0_ACTIVE", - "", - "TKAx_JS0_WAIT_READ", - "TKAx_JS0_WAIT_ISSUE", - "TKAx_JS0_WAIT_DEPEND", - "TKAx_JS0_WAIT_FINISH", - "TKAx_JS1_JOBS", - "TKAx_JS1_TASKS", - "TKAx_JS1_ACTIVE", - "", - "TKAx_JS1_WAIT_READ", - "TKAx_JS1_WAIT_ISSUE", - "TKAx_JS1_WAIT_DEPEND", - "TKAx_JS1_WAIT_FINISH", - "TKAx_JS2_JOBS", - "TKAx_JS2_TASKS", - "TKAx_JS2_ACTIVE", - "", - "TKAx_JS2_WAIT_READ", - "TKAx_JS2_WAIT_ISSUE", - "TKAx_JS2_WAIT_DEPEND", - "TKAx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Performance counters for the Tiler */ - "", - "", - "", - "", - "TKAx_TILER_ACTIVE", - "TKAx_JOBS_PROCESSED", - "TKAx_TRIANGLES", - "TKAx_LINES", - "TKAx_POINTS", - "TKAx_FRONT_FACING", - "TKAx_BACK_FACING", - "TKAx_PRIM_VISIBLE", - "TKAx_PRIM_CULLED", - "TKAx_PRIM_CLIPPED", - "TKAx_PRIM_SAT_CULLED", - "TKAx_BIN_ALLOC_INIT", - "TKAx_BIN_ALLOC_OVERFLOW", - "TKAx_BUS_READ", - "", - "TKAx_BUS_WRITE", - "TKAx_LOADING_DESC", - "TKAx_IDVS_POS_SHAD_REQ", - "TKAx_IDVS_POS_SHAD_WAIT", - "TKAx_IDVS_POS_SHAD_STALL", - "TKAx_IDVS_POS_FIFO_FULL", - "TKAx_PREFETCH_STALL", - "TKAx_VCACHE_HIT", - "TKAx_VCACHE_MISS", - "TKAx_VCACHE_LINE_WAIT", - "TKAx_VFETCH_POS_READ_WAIT", - "TKAx_VFETCH_VERTEX_WAIT", - "TKAx_VFETCH_STALL", - "TKAx_PRIMASSY_STALL", - "TKAx_BBOX_GEN_STALL", - "TKAx_IDVS_VBU_HIT", - "TKAx_IDVS_VBU_MISS", - "TKAx_IDVS_VBU_LINE_DEALLOCATE", - "TKAx_IDVS_VAR_SHAD_REQ", - "TKAx_IDVS_VAR_SHAD_STALL", - "TKAx_BINNER_STALL", - "TKAx_ITER_STALL", - "TKAx_COMPRESS_MISS", - "TKAx_COMPRESS_STALL", - "TKAx_PCACHE_HIT", - "TKAx_PCACHE_MISS", - "TKAx_PCACHE_MISS_STALL", - "TKAx_PCACHE_EVICT_STALL", - "TKAx_PMGR_PTR_WR_STALL", - "TKAx_PMGR_PTR_RD_STALL", - "TKAx_PMGR_CMD_WR_STALL", - "TKAx_WRBUF_ACTIVE", - "TKAx_WRBUF_HIT", - "TKAx_WRBUF_MISS", - "TKAx_WRBUF_NO_FREE_LINE_STALL", - "TKAx_WRBUF_NO_AXI_ID_STALL", - "TKAx_WRBUF_AXI_STALL", - "", - "", - "", - "TKAx_UTLB_TRANS", - "TKAx_UTLB_TRANS_HIT", - "TKAx_UTLB_TRANS_STALL", - "TKAx_UTLB_TRANS_MISS_DELAY", - "TKAx_UTLB_MMU_REQ", - - /* Performance counters for the Shader Core */ - "", - "", - "", - "", - "TKAx_FRAG_ACTIVE", - "TKAx_FRAG_PRIMITIVES", - "TKAx_FRAG_PRIM_RAST", - "TKAx_FRAG_FPK_ACTIVE", - "TKAx_FRAG_STARVING", - "TKAx_FRAG_WARPS", - "TKAx_FRAG_PARTIAL_WARPS", - "TKAx_FRAG_QUADS_RAST", - "TKAx_FRAG_QUADS_EZS_TEST", - "TKAx_FRAG_QUADS_EZS_UPDATE", - "TKAx_FRAG_QUADS_EZS_KILL", - "TKAx_FRAG_LZS_TEST", - "TKAx_FRAG_LZS_KILL", - "TKAx_WARP_REG_SIZE_64", - "TKAx_FRAG_PTILES", - "TKAx_FRAG_TRANS_ELIM", - "TKAx_QUAD_FPK_KILLER", - "TKAx_FULL_QUAD_WARPS", - "TKAx_COMPUTE_ACTIVE", - "TKAx_COMPUTE_TASKS", - "TKAx_COMPUTE_WARPS", - "TKAx_COMPUTE_STARVING", - "TKAx_EXEC_CORE_ACTIVE", - "TKAx_EXEC_ACTIVE", - "TKAx_EXEC_INSTR_COUNT", - "TKAx_EXEC_INSTR_DIVERGED", - "TKAx_EXEC_INSTR_STARVING", - "TKAx_ARITH_INSTR_SINGLE_FMA", - "TKAx_ARITH_INSTR_DOUBLE", - "TKAx_ARITH_INSTR_MSG", - "TKAx_ARITH_INSTR_MSG_ONLY", - "TKAx_TEX_MSGI_NUM_QUADS", - "TKAx_TEX_DFCH_NUM_PASSES", - "TKAx_TEX_DFCH_NUM_PASSES_MISS", - "TKAx_TEX_DFCH_NUM_PASSES_MIP_MAP", - "TKAx_TEX_TIDX_NUM_SPLIT_MIP_MAP", - "TKAx_TEX_TFCH_NUM_LINES_FETCHED", - "TKAx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK", - "TKAx_TEX_TFCH_NUM_OPERATIONS", - "TKAx_TEX_FILT_NUM_OPERATIONS", - "TKAx_LS_MEM_READ_FULL", - "TKAx_LS_MEM_READ_SHORT", - "TKAx_LS_MEM_WRITE_FULL", - "TKAx_LS_MEM_WRITE_SHORT", - "TKAx_LS_MEM_ATOMIC", - "TKAx_VARY_INSTR", - "TKAx_VARY_SLOT_32", - "TKAx_VARY_SLOT_16", - "TKAx_ATTR_INSTR", - "TKAx_ARITH_INSTR_FP_MUL", - "TKAx_BEATS_RD_FTC", - "TKAx_BEATS_RD_FTC_EXT", - "TKAx_BEATS_RD_LSC", - "TKAx_BEATS_RD_LSC_EXT", - "TKAx_BEATS_RD_TEX", - "TKAx_BEATS_RD_TEX_EXT", - "TKAx_BEATS_RD_OTHER", - "TKAx_BEATS_WR_LSC_OTHER", - "TKAx_BEATS_WR_TIB", - "TKAx_BEATS_WR_LSC_WB", - - /* Performance counters for the Memory System */ - "", - "", - "", - "", - "TKAx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TKAx_L2_RD_MSG_IN", - "TKAx_L2_RD_MSG_IN_STALL", - "TKAx_L2_WR_MSG_IN", - "TKAx_L2_WR_MSG_IN_STALL", - "TKAx_L2_SNP_MSG_IN", - "TKAx_L2_SNP_MSG_IN_STALL", - "TKAx_L2_RD_MSG_OUT", - "TKAx_L2_RD_MSG_OUT_STALL", - "TKAx_L2_WR_MSG_OUT", - "TKAx_L2_ANY_LOOKUP", - "TKAx_L2_READ_LOOKUP", - "TKAx_L2_WRITE_LOOKUP", - "TKAx_L2_EXT_SNOOP_LOOKUP", - "TKAx_L2_EXT_READ", - "TKAx_L2_EXT_READ_NOSNP", - "TKAx_L2_EXT_READ_UNIQUE", - "TKAx_L2_EXT_READ_BEATS", - "TKAx_L2_EXT_AR_STALL", - "TKAx_L2_EXT_AR_CNT_Q1", - "TKAx_L2_EXT_AR_CNT_Q2", - "TKAx_L2_EXT_AR_CNT_Q3", - "TKAx_L2_EXT_RRESP_0_127", - "TKAx_L2_EXT_RRESP_128_191", - "TKAx_L2_EXT_RRESP_192_255", - "TKAx_L2_EXT_RRESP_256_319", - "TKAx_L2_EXT_RRESP_320_383", - "TKAx_L2_EXT_WRITE", - "TKAx_L2_EXT_WRITE_NOSNP_FULL", - "TKAx_L2_EXT_WRITE_NOSNP_PTL", - "TKAx_L2_EXT_WRITE_SNP_FULL", - "TKAx_L2_EXT_WRITE_SNP_PTL", - "TKAx_L2_EXT_WRITE_BEATS", - "TKAx_L2_EXT_W_STALL", - "TKAx_L2_EXT_AW_CNT_Q1", - "TKAx_L2_EXT_AW_CNT_Q2", - "TKAx_L2_EXT_AW_CNT_Q3", - "TKAx_L2_EXT_SNOOP", - "TKAx_L2_EXT_SNOOP_STALL", - "TKAx_L2_EXT_SNOOP_RESP_CLEAN", - "TKAx_L2_EXT_SNOOP_RESP_DATA", - "TKAx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", -}; - -#endif /* _KBASE_GATOR_HWCNT_NAMES_TKAX_H_ */ diff --git a/mali_kbase/mali_kbase_gator_hwcnt_names_tnax.h b/mali_kbase/mali_kbase_gator_hwcnt_names_tnax.h new file mode 100644 index 0000000..a90db76 --- /dev/null +++ b/mali_kbase/mali_kbase_gator_hwcnt_names_tnax.h @@ -0,0 +1,296 @@ +/* + * + * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * This header was autogenerated, it should not be edited. + */ + +#ifndef _KBASE_GATOR_HWCNT_NAMES_TNAX_H_ +#define _KBASE_GATOR_HWCNT_NAMES_TNAX_H_ + +static const char * const hardware_counters_mali_tNAx[] = { + /* Performance counters for the Job Manager */ + "", + "", + "", + "", + "TNAx_MESSAGES_SENT", + "TNAx_MESSAGES_RECEIVED", + "TNAx_GPU_ACTIVE", + "TNAx_IRQ_ACTIVE", + "TNAx_JS0_JOBS", + "TNAx_JS0_TASKS", + "TNAx_JS0_ACTIVE", + "", + "TNAx_JS0_WAIT_READ", + "TNAx_JS0_WAIT_ISSUE", + "TNAx_JS0_WAIT_DEPEND", + "TNAx_JS0_WAIT_FINISH", + "TNAx_JS1_JOBS", + "TNAx_JS1_TASKS", + "TNAx_JS1_ACTIVE", + "", + "TNAx_JS1_WAIT_READ", + "TNAx_JS1_WAIT_ISSUE", + "TNAx_JS1_WAIT_DEPEND", + "TNAx_JS1_WAIT_FINISH", + "TNAx_JS2_JOBS", + "TNAx_JS2_TASKS", + "TNAx_JS2_ACTIVE", + "", + "TNAx_JS2_WAIT_READ", + "TNAx_JS2_WAIT_ISSUE", + "TNAx_JS2_WAIT_DEPEND", + "TNAx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /* Performance counters for the Tiler */ + "", + "", + "", + "", + "TNAx_TILER_ACTIVE", + "TNAx_JOBS_PROCESSED", + "TNAx_TRIANGLES", + "TNAx_LINES", + "TNAx_POINTS", + "TNAx_FRONT_FACING", + "TNAx_BACK_FACING", + "TNAx_PRIM_VISIBLE", + "TNAx_PRIM_CULLED", + "TNAx_PRIM_CLIPPED", + "TNAx_PRIM_SAT_CULLED", + "TNAx_BIN_ALLOC_INIT", + "TNAx_BIN_ALLOC_OVERFLOW", + "TNAx_BUS_READ", + "", + "TNAx_BUS_WRITE", + "TNAx_LOADING_DESC", + "TNAx_IDVS_POS_SHAD_REQ", + "TNAx_IDVS_POS_SHAD_WAIT", + "TNAx_IDVS_POS_SHAD_STALL", + "TNAx_IDVS_POS_FIFO_FULL", + "TNAx_PREFETCH_STALL", + "TNAx_VCACHE_HIT", + "TNAx_VCACHE_MISS", + "TNAx_VCACHE_LINE_WAIT", + "TNAx_VFETCH_POS_READ_WAIT", + "TNAx_VFETCH_VERTEX_WAIT", + "TNAx_VFETCH_STALL", + "TNAx_PRIMASSY_STALL", + "TNAx_BBOX_GEN_STALL", + "TNAx_IDVS_VBU_HIT", + "TNAx_IDVS_VBU_MISS", + "TNAx_IDVS_VBU_LINE_DEALLOCATE", + "TNAx_IDVS_VAR_SHAD_REQ", + "TNAx_IDVS_VAR_SHAD_STALL", + "TNAx_BINNER_STALL", + "TNAx_ITER_STALL", + "TNAx_COMPRESS_MISS", + "TNAx_COMPRESS_STALL", + "TNAx_PCACHE_HIT", + "TNAx_PCACHE_MISS", + "TNAx_PCACHE_MISS_STALL", + "TNAx_PCACHE_EVICT_STALL", + "TNAx_PMGR_PTR_WR_STALL", + "TNAx_PMGR_PTR_RD_STALL", + "TNAx_PMGR_CMD_WR_STALL", + "TNAx_WRBUF_ACTIVE", + "TNAx_WRBUF_HIT", + "TNAx_WRBUF_MISS", + "TNAx_WRBUF_NO_FREE_LINE_STALL", + "TNAx_WRBUF_NO_AXI_ID_STALL", + "TNAx_WRBUF_AXI_STALL", + "", + "", + "", + "TNAx_UTLB_TRANS", + "TNAx_UTLB_TRANS_HIT", + "TNAx_UTLB_TRANS_STALL", + "TNAx_UTLB_TRANS_MISS_DELAY", + "TNAx_UTLB_MMU_REQ", + + /* Performance counters for the Shader Core */ + "", + "", + "", + "", + "TNAx_FRAG_ACTIVE", + "TNAx_FRAG_PRIMITIVES_OUT", + "TNAx_FRAG_PRIM_RAST", + "TNAx_FRAG_FPK_ACTIVE", + "TNAx_FRAG_STARVING", + "TNAx_FRAG_WARPS", + "TNAx_FRAG_PARTIAL_QUADS_RAST", + "TNAx_FRAG_QUADS_RAST", + "TNAx_FRAG_QUADS_EZS_TEST", + "TNAx_FRAG_QUADS_EZS_UPDATE", + "TNAx_FRAG_QUADS_EZS_KILL", + "TNAx_FRAG_LZS_TEST", + "TNAx_FRAG_LZS_KILL", + "TNAx_WARP_REG_SIZE_64", + "TNAx_FRAG_PTILES", + "TNAx_FRAG_TRANS_ELIM", + "TNAx_QUAD_FPK_KILLER", + "TNAx_FULL_QUAD_WARPS", + "TNAx_COMPUTE_ACTIVE", + "TNAx_COMPUTE_TASKS", + "TNAx_COMPUTE_WARPS", + "TNAx_COMPUTE_STARVING", + "TNAx_EXEC_CORE_ACTIVE", + "TNAx_EXEC_INSTR_FMA", + "TNAx_EXEC_INSTR_CVT", + "TNAx_EXEC_INSTR_SFU", + "TNAx_EXEC_INSTR_MSG", + "TNAx_EXEC_INSTR_DIVERGED", + "TNAx_EXEC_ICACHE_MISS", + "TNAx_EXEC_STARVE_ARITH", + "TNAx_CALL_BLEND_SHADER", + "TNAx_TEX_MSGI_NUM_QUADS", + "TNAx_TEX_DFCH_NUM_PASSES", + "TNAx_TEX_DFCH_NUM_PASSES_MISS", + "TNAx_TEX_DFCH_NUM_PASSES_MIP_MAP", + "TNAx_TEX_TIDX_NUM_SPLIT_MIP_MAP", + "TNAx_TEX_TFCH_NUM_LINES_FETCHED", + "TNAx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK", + "TNAx_TEX_TFCH_NUM_OPERATIONS", + "TNAx_TEX_FILT_NUM_OPERATIONS", + "TNAx_LS_MEM_READ_FULL", + "TNAx_LS_MEM_READ_SHORT", + "TNAx_LS_MEM_WRITE_FULL", + "TNAx_LS_MEM_WRITE_SHORT", + "TNAx_LS_MEM_ATOMIC", + "TNAx_VARY_INSTR", + "TNAx_VARY_SLOT_32", + "TNAx_VARY_SLOT_16", + "TNAx_ATTR_INSTR", + "TNAx_ARITH_INSTR_FP_MUL", + "TNAx_BEATS_RD_FTC", + "TNAx_BEATS_RD_FTC_EXT", + "TNAx_BEATS_RD_LSC", + "TNAx_BEATS_RD_LSC_EXT", + "TNAx_BEATS_RD_TEX", + "TNAx_BEATS_RD_TEX_EXT", + "TNAx_BEATS_RD_OTHER", + "TNAx_BEATS_WR_LSC_OTHER", + "TNAx_BEATS_WR_TIB", + "TNAx_BEATS_WR_LSC_WB", + + /* Performance counters for the Memory System */ + "", + "", + "", + "", + "TNAx_MMU_REQUESTS", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TNAx_L2_RD_MSG_IN", + "TNAx_L2_RD_MSG_IN_STALL", + "TNAx_L2_WR_MSG_IN", + "TNAx_L2_WR_MSG_IN_STALL", + "TNAx_L2_SNP_MSG_IN", + "TNAx_L2_SNP_MSG_IN_STALL", + "TNAx_L2_RD_MSG_OUT", + "TNAx_L2_RD_MSG_OUT_STALL", + "TNAx_L2_WR_MSG_OUT", + "TNAx_L2_ANY_LOOKUP", + "TNAx_L2_READ_LOOKUP", + "TNAx_L2_WRITE_LOOKUP", + "TNAx_L2_EXT_SNOOP_LOOKUP", + "TNAx_L2_EXT_READ", + "TNAx_L2_EXT_READ_NOSNP", + "TNAx_L2_EXT_READ_UNIQUE", + "TNAx_L2_EXT_READ_BEATS", + "TNAx_L2_EXT_AR_STALL", + "TNAx_L2_EXT_AR_CNT_Q1", + "TNAx_L2_EXT_AR_CNT_Q2", + "TNAx_L2_EXT_AR_CNT_Q3", + "TNAx_L2_EXT_RRESP_0_127", + "TNAx_L2_EXT_RRESP_128_191", + "TNAx_L2_EXT_RRESP_192_255", + "TNAx_L2_EXT_RRESP_256_319", + "TNAx_L2_EXT_RRESP_320_383", + "TNAx_L2_EXT_WRITE", + "TNAx_L2_EXT_WRITE_NOSNP_FULL", + "TNAx_L2_EXT_WRITE_NOSNP_PTL", + "TNAx_L2_EXT_WRITE_SNP_FULL", + "TNAx_L2_EXT_WRITE_SNP_PTL", + "TNAx_L2_EXT_WRITE_BEATS", + "TNAx_L2_EXT_W_STALL", + "TNAx_L2_EXT_AW_CNT_Q1", + "TNAx_L2_EXT_AW_CNT_Q2", + "TNAx_L2_EXT_AW_CNT_Q3", + "TNAx_L2_EXT_SNOOP", + "TNAx_L2_EXT_SNOOP_STALL", + "TNAx_L2_EXT_SNOOP_RESP_CLEAN", + "TNAx_L2_EXT_SNOOP_RESP_DATA", + "TNAx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", +}; + +#endif /* _KBASE_GATOR_HWCNT_NAMES_TNAX_H_ */ diff --git a/mali_kbase/mali_kbase_gator_hwcnt_names_ttrx.h b/mali_kbase/mali_kbase_gator_hwcnt_names_ttrx.h index c1e315b..4226a9f 100644 --- a/mali_kbase/mali_kbase_gator_hwcnt_names_ttrx.h +++ b/mali_kbase/mali_kbase_gator_hwcnt_names_ttrx.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -166,12 +166,12 @@ static const char * const hardware_counters_mali_tTRx[] = { "", "", "TTRx_FRAG_ACTIVE", - "TTRx_FRAG_PRIMITIVES", + "TTRx_FRAG_PRIMITIVES_OUT", "TTRx_FRAG_PRIM_RAST", "TTRx_FRAG_FPK_ACTIVE", "TTRx_FRAG_STARVING", "TTRx_FRAG_WARPS", - "TTRx_FRAG_PARTIAL_WARPS", + "TTRx_FRAG_PARTIAL_QUADS_RAST", "TTRx_FRAG_QUADS_RAST", "TTRx_FRAG_QUADS_EZS_TEST", "TTRx_FRAG_QUADS_EZS_UPDATE", @@ -196,15 +196,15 @@ static const char * const hardware_counters_mali_tTRx[] = { "TTRx_EXEC_ICACHE_MISS", "TTRx_EXEC_STARVE_ARITH", "TTRx_CALL_BLEND_SHADER", - "TTRx_TEX_MSGI_NUM_QUADS", - "TTRx_TEX_DFCH_NUM_PASSES", - "TTRx_TEX_DFCH_NUM_PASSES_MISS", - "TTRx_TEX_DFCH_NUM_PASSES_MIP_MAP", - "TTRx_TEX_TIDX_NUM_SPLIT_MIP_MAP", - "TTRx_TEX_TFCH_NUM_LINES_FETCHED", - "TTRx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK", - "TTRx_TEX_TFCH_NUM_OPERATIONS", + "TTRx_TEX_MSGI_NUM_FLITS", + "TTRx_TEX_DFCH_CLK_STALLED", + "TTRx_TEX_TFCH_CLK_STALLED", + "TTRx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", "TTRx_TEX_FILT_NUM_OPERATIONS", + "TTRx_TEX_FILT_NUM_FXR_OPERATIONS", + "TTRx_TEX_FILT_NUM_FST_OPERATIONS", + "TTRx_TEX_MSGO_NUM_MSG", + "TTRx_TEX_MSGO_NUM_FLITS", "TTRx_LS_MEM_READ_FULL", "TTRx_LS_MEM_READ_SHORT", "TTRx_LS_MEM_WRITE_FULL", diff --git a/mali_kbase/mali_kbase_gpu_id.h b/mali_kbase/mali_kbase_gpu_id.h index d432f8e..f9f6703 100644 --- a/mali_kbase/mali_kbase_gpu_id.h +++ b/mali_kbase/mali_kbase_gpu_id.h @@ -116,6 +116,7 @@ #define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) #define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) #define GPU_ID2_PRODUCT_TULX GPU_ID2_MODEL_MAKE(10, 0) +#define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1) #define GPU_ID2_PRODUCT_TIDX GPU_ID2_MODEL_MAKE(10, 3) #define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c index 0481f80..2d1263d 100644 --- a/mali_kbase/mali_kbase_gwt.c +++ b/mali_kbase/mali_kbase_gwt.c @@ -35,7 +35,7 @@ static inline void kbase_gpu_gwt_setup_page_permission( int err = 0; reg = rb_entry(rbnode, struct kbase_va_region, rblink); - if (reg->nr_pages && !(reg->flags & KBASE_REG_FREE) && + if (reg->nr_pages && !kbase_is_region_invalid_or_free(reg) && (reg->flags & KBASE_REG_GPU_WR)) { err = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c index 450926c..1503469 100644 --- a/mali_kbase/mali_kbase_hw.c +++ b/mali_kbase/mali_kbase_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -80,6 +80,9 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TULX: features = base_hw_features_tULx; break; + case GPU_ID2_PRODUCT_TDUX: + features = base_hw_features_tDUx; + break; case GPU_ID2_PRODUCT_TBOX: features = base_hw_features_tBOx; break; @@ -126,6 +129,19 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) for (; *features != BASE_HW_FEATURE_END; features++) set_bit(*features, &kbdev->hw_features_mask[0]); + +#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP) + /* When dumping is enabled, need to disable flush reduction optimization + * for GPUs on which it is safe to have only cache clean operation at + * the end of job chain. + * This is required to make job dumping work. There is some discrepancy + * in the implementation of flush reduction optimization due to + * unclear or ambiguous ARCH spec. + */ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) + clear_bit(BASE_HW_FEATURE_FLUSH_REDUCTION, + &kbdev->hw_features_mask[0]); +#endif } /** @@ -207,10 +223,12 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( {GPU_ID2_PRODUCT_TTRX, {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0}, {U32_MAX, NULL} } }, {GPU_ID2_PRODUCT_TNAX, {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0}, {U32_MAX, NULL} } }, {GPU_ID2_PRODUCT_TBEX, @@ -221,6 +239,10 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tULx_r0p0}, {U32_MAX, NULL} } }, + {GPU_ID2_PRODUCT_TDUX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0}, + {U32_MAX, NULL} } }, + {GPU_ID2_PRODUCT_TBOX, {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBOx_r0p0}, {U32_MAX, NULL} } }, @@ -479,6 +501,9 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TULX: issues = base_hw_issues_model_tULx; break; + case GPU_ID2_PRODUCT_TDUX: + issues = base_hw_issues_model_tDUx; + break; case GPU_ID2_PRODUCT_TBOX: issues = base_hw_issues_model_tBOx; break; diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/mali_kbase_hwcnt.c index efbac6f..265fc21 100644 --- a/mali_kbase/mali_kbase_hwcnt.c +++ b/mali_kbase/mali_kbase_hwcnt.c @@ -794,3 +794,14 @@ int kbase_hwcnt_accumulator_dump( return errcode; } KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump); + +u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum) +{ + struct kbase_hwcnt_context *hctx; + + if (WARN_ON(!accum)) + return 0; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + return hctx->iface->timestamp_ns(accum->backend); +} diff --git a/mali_kbase/mali_kbase_hwcnt_accumulator.h b/mali_kbase/mali_kbase_hwcnt_accumulator.h index fc45743..eb82ea4 100644 --- a/mali_kbase/mali_kbase_hwcnt_accumulator.h +++ b/mali_kbase/mali_kbase_hwcnt_accumulator.h @@ -134,4 +134,13 @@ int kbase_hwcnt_accumulator_dump( u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf); +/** + * kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend + * timestamp. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * + * Return: Accumulator backend timestamp in nanoseconds. + */ +u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum); + #endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_backend_gpu.c b/mali_kbase/mali_kbase_hwcnt_backend_gpu.c index 4bc8916..b68607a 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_gpu.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_gpu.c @@ -25,6 +25,7 @@ #include "mali_kbase_hwcnt_types.h" #include "mali_kbase.h" #include "mali_kbase_pm_policy.h" +#include "mali_kbase_pm_ca.h" #include "mali_kbase_hwaccess_instr.h" #include "mali_kbase_tlstream.h" #ifdef CONFIG_MALI_NO_MALI @@ -58,6 +59,7 @@ struct kbase_hwcnt_backend_gpu_info { * @cpu_dump_va: CPU mapping of gpu_dump_va. * @vmap: Dump buffer vmap. * @enabled: True if dumping has been enabled, else false. + * @pm_core_mask: PM state sync-ed shaders core mask for the enabled dumping. */ struct kbase_hwcnt_backend_gpu { const struct kbase_hwcnt_backend_gpu_info *info; @@ -67,6 +69,7 @@ struct kbase_hwcnt_backend_gpu { void *cpu_dump_va; struct kbase_vmap_struct *vmap; bool enabled; + u64 pm_core_mask; }; /* GPU backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ @@ -116,6 +119,7 @@ static int kbasep_hwcnt_backend_gpu_dump_enable_nolock( if (errcode) goto error; + backend_gpu->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); backend_gpu->enabled = true; return 0; @@ -225,7 +229,8 @@ static int kbasep_hwcnt_backend_gpu_dump_get( backend_gpu->kctx, backend_gpu->vmap, KBASE_SYNC_TO_CPU); return kbase_hwcnt_gpu_dump_get( - dst, backend_gpu->cpu_dump_va, dst_enable_map, accumulate); + dst, backend_gpu->cpu_dump_va, dst_enable_map, + backend_gpu->pm_core_mask, accumulate); } /** @@ -308,7 +313,7 @@ static void kbasep_hwcnt_backend_gpu_destroy( if (backend->kctx_element) { mutex_lock(&kbdev->kctx_list_lock); - KBASE_TLSTREAM_TL_DEL_CTX(kctx); + KBASE_TLSTREAM_TL_DEL_CTX(kbdev, kctx); list_del(&backend->kctx_element->link); mutex_unlock(&kbdev->kctx_list_lock); @@ -368,8 +373,10 @@ static int kbasep_hwcnt_backend_gpu_create( /* Fire tracepoint while lock is held, to ensure tracepoint is not * created in both body and summary stream */ - KBASE_TLSTREAM_TL_NEW_CTX( - backend->kctx, backend->kctx->id, (u32)(backend->kctx->tgid)); + KBASE_TLSTREAM_TL_NEW_CTX(kbdev, + backend->kctx, + backend->kctx->id, + (u32)(backend->kctx->tgid)); mutex_unlock(&kbdev->kctx_list_lock); diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c index 647d3ec..8581fe9 100644 --- a/mali_kbase/mali_kbase_hwcnt_gpu.c +++ b/mali_kbase/mali_kbase_hwcnt_gpu.c @@ -367,15 +367,50 @@ void kbase_hwcnt_gpu_metadata_destroy( } KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy); +static bool is_block_type_shader( + const u64 grp_type, + const u64 blk_type, + const size_t blk) +{ + bool is_shader = false; + + switch (grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + /* blk-value in [0, KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP-1] + * corresponds to a shader, or its implementation + * reserved. As such, here we use the blk index value to + * tell the reserved case. + */ + if (blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER || + (blk < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP && + blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED)) + is_shader = true; + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2) + is_shader = true; + break; + default: + /* Warn on unknown group type */ + WARN_ON(true); + } + + return is_shader; +} + int kbase_hwcnt_gpu_dump_get( struct kbase_hwcnt_dump_buffer *dst, void *src, const struct kbase_hwcnt_enable_map *dst_enable_map, + u64 pm_core_mask, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u32 *dump_src; size_t src_offset, grp, blk, blk_inst; + size_t grp_prev = 0; + u64 core_mask = pm_core_mask; if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) @@ -393,6 +428,23 @@ int kbase_hwcnt_gpu_dump_get( const size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count( metadata, grp, blk); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); + const bool is_shader_core = is_block_type_shader( + kbase_hwcnt_metadata_group_type(metadata, grp), + blk_type, blk); + + if (grp != grp_prev) { + /* grp change would only happen with V4. V5 and + * further are envisaged to be single group + * scenario only. Here needs to drop the lower + * group core-mask by shifting right with + * KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP. + */ + core_mask = pm_core_mask >> + KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; + grp_prev = grp; + } /* Early out if no values in the dest block are enabled */ if (kbase_hwcnt_enable_map_block_enabled( @@ -401,16 +453,25 @@ int kbase_hwcnt_gpu_dump_get( dst, grp, blk, blk_inst); const u32 *src_blk = dump_src + src_offset; - if (accumulate) { - kbase_hwcnt_dump_buffer_block_accumulate( - dst_blk, src_blk, hdr_cnt, ctr_cnt); - } else { - kbase_hwcnt_dump_buffer_block_copy( - dst_blk, src_blk, (hdr_cnt + ctr_cnt)); + if (!is_shader_core || (core_mask & 1)) { + if (accumulate) { + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, + ctr_cnt); + } else { + kbase_hwcnt_dump_buffer_block_copy( + dst_blk, src_blk, + (hdr_cnt + ctr_cnt)); + } + } else if (!accumulate) { + kbase_hwcnt_dump_buffer_block_zero( + dst_blk, (hdr_cnt + ctr_cnt)); } } src_offset += (hdr_cnt + ctr_cnt); + if (is_shader_core) + core_mask = core_mask >> 1; } return 0; diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h index 509608a..12891e0 100644 --- a/mali_kbase/mali_kbase_hwcnt_gpu.h +++ b/mali_kbase/mali_kbase_hwcnt_gpu.h @@ -178,6 +178,7 @@ void kbase_hwcnt_gpu_metadata_destroy( * as returned in out_dump_bytes parameter of * kbase_hwcnt_gpu_metadata_create. * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @pm_core_mask: PM state synchronized shaders core mask with the dump. * @accumulate: True if counters in src should be accumulated into dst, * rather than copied. * @@ -191,6 +192,7 @@ int kbase_hwcnt_gpu_dump_get( struct kbase_hwcnt_dump_buffer *dst, void *src, const struct kbase_hwcnt_enable_map *dst_enable_map, + const u64 pm_core_mask, bool accumulate); /** diff --git a/mali_kbase/mali_kbase_hwcnt_virtualizer.c b/mali_kbase/mali_kbase_hwcnt_virtualizer.c index 26e9852..917e47c 100644 --- a/mali_kbase/mali_kbase_hwcnt_virtualizer.c +++ b/mali_kbase/mali_kbase_hwcnt_virtualizer.c @@ -33,17 +33,24 @@ /** * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure. - * @hctx: Hardware counter context being virtualized. - * @metadata: Hardware counter metadata. - * @lock: Lock acquired at all entrypoints, to protect mutable state. - * @client_count: Current number of virtualizer clients. - * @clients: List of virtualizer clients. - * @accum: Hardware counter accumulator. NULL if no clients. - * @scratch_map: Enable map used as scratch space during counter changes. - * @scratch_buf: Dump buffer used as scratch space during dumps. + * @hctx: Hardware counter context being virtualized. + * @dump_threshold_ns: Minimum threshold period for dumps between different + * clients where a new accumulator dump will not be + * performed, and instead accumulated values will be used. + * If 0, rate limiting is disabled. + * @metadata: Hardware counter metadata. + * @lock: Lock acquired at all entrypoints, to protect mutable + * state. + * @client_count: Current number of virtualizer clients. + * @clients: List of virtualizer clients. + * @accum: Hardware counter accumulator. NULL if no clients. + * @scratch_map: Enable map used as scratch space during counter changes. + * @scratch_buf: Dump buffer used as scratch space during dumps. + * @ts_last_dump_ns: End time of most recent dump across all clients. */ struct kbase_hwcnt_virtualizer { struct kbase_hwcnt_context *hctx; + u64 dump_threshold_ns; const struct kbase_hwcnt_metadata *metadata; struct mutex lock; size_t client_count; @@ -51,6 +58,7 @@ struct kbase_hwcnt_virtualizer { struct kbase_hwcnt_accumulator *accum; struct kbase_hwcnt_enable_map scratch_map; struct kbase_hwcnt_dump_buffer scratch_buf; + u64 ts_last_dump_ns; }; /** @@ -287,6 +295,9 @@ static int kbasep_hwcnt_virtualizer_client_add( hvcli->has_accum = false; hvcli->ts_start_ns = ts_end_ns; + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = ts_end_ns; + return 0; error: hvirt->client_count -= 1; @@ -336,6 +347,9 @@ static void kbasep_hwcnt_virtualizer_client_remove( list_for_each_entry(pos, &hvirt->clients, node) kbasep_hwcnt_virtualizer_client_accumulate( pos, &hvirt->scratch_buf); + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = ts_end_ns; } WARN_ON(errcode); } @@ -421,6 +435,9 @@ static int kbasep_hwcnt_virtualizer_client_set_counters( *ts_start_ns = hvcli->ts_start_ns; hvcli->ts_start_ns = *ts_end_ns; + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + return errcode; } @@ -464,6 +481,9 @@ int kbase_hwcnt_virtualizer_client_set_counters( /* Fix up the timestamps */ *ts_start_ns = hvcli->ts_start_ns; hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; } } else { /* Otherwise, do the full virtualize */ @@ -539,9 +559,86 @@ static int kbasep_hwcnt_virtualizer_client_dump( *ts_start_ns = hvcli->ts_start_ns; hvcli->ts_start_ns = *ts_end_ns; + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + return errcode; } +/** + * kbasep_hwcnt_virtualizer_client_dump_rate_limited - Perform a dump of the + * client's currently enabled counters + * if it hasn't been rate limited, + * otherwise return the client's most + * recent accumulation. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_dump_rate_limited( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + bool rate_limited = true; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + if (hvirt->dump_threshold_ns == 0) { + /* Threshold == 0, so rate limiting disabled */ + rate_limited = false; + } else if (hvirt->ts_last_dump_ns == hvcli->ts_start_ns) { + /* Last dump was performed by this client, and dumps from an + * individual client are never rate limited + */ + rate_limited = false; + } else { + const u64 ts_ns = + kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum); + const u64 time_since_last_dump_ns = + ts_ns - hvirt->ts_last_dump_ns; + + /* Dump period equals or exceeds the threshold */ + if (time_since_last_dump_ns >= hvirt->dump_threshold_ns) + rate_limited = false; + } + + if (!rate_limited) + return kbasep_hwcnt_virtualizer_client_dump( + hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); + + /* If we've gotten this far, the client must have something accumulated + * otherwise it is a logic error + */ + WARN_ON(!hvcli->has_accum); + + if (dump_buf) + kbase_hwcnt_dump_buffer_copy( + dump_buf, &hvcli->accum_buf, &hvcli->enable_map); + hvcli->has_accum = false; + + *ts_start_ns = hvcli->ts_start_ns; + *ts_end_ns = hvirt->ts_last_dump_ns; + hvcli->ts_start_ns = hvirt->ts_last_dump_ns; + + return 0; +} + int kbase_hwcnt_virtualizer_client_dump( struct kbase_hwcnt_virtualizer_client *hvcli, u64 *ts_start_ns, @@ -575,10 +672,13 @@ int kbase_hwcnt_virtualizer_client_dump( /* Fix up the timestamps */ *ts_start_ns = hvcli->ts_start_ns; hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; } } else { /* Otherwise, do the full virtualize */ - errcode = kbasep_hwcnt_virtualizer_client_dump( + errcode = kbasep_hwcnt_virtualizer_client_dump_rate_limited( hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); } @@ -639,6 +739,7 @@ KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy); int kbase_hwcnt_virtualizer_init( struct kbase_hwcnt_context *hctx, + u64 dump_threshold_ns, struct kbase_hwcnt_virtualizer **out_hvirt) { struct kbase_hwcnt_virtualizer *virt; @@ -656,6 +757,7 @@ int kbase_hwcnt_virtualizer_init( return -ENOMEM; virt->hctx = hctx; + virt->dump_threshold_ns = dump_threshold_ns; virt->metadata = metadata; mutex_init(&virt->lock); diff --git a/mali_kbase/mali_kbase_hwcnt_virtualizer.h b/mali_kbase/mali_kbase_hwcnt_virtualizer.h index 1efa81d..8f628c3 100644 --- a/mali_kbase/mali_kbase_hwcnt_virtualizer.h +++ b/mali_kbase/mali_kbase_hwcnt_virtualizer.h @@ -40,14 +40,20 @@ struct kbase_hwcnt_dump_buffer; /** * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer. - * @hctx: Non-NULL pointer to the hardware counter context to virtualize. - * @out_hvirt: Non-NULL pointer to where the pointer to the created virtualizer - * will be stored on success. + * @hctx: Non-NULL pointer to the hardware counter context to + * virtualize. + * @dump_threshold_ns: Minimum threshold period for dumps between different + * clients where a new accumulator dump will not be + * performed, and instead accumulated values will be used. + * If 0, rate limiting will be disabled. + * @out_hvirt: Non-NULL pointer to where the pointer to the created + * virtualizer will be stored on success. * * Return: 0 on success, else error code. */ int kbase_hwcnt_virtualizer_init( struct kbase_hwcnt_context *hctx, + u64 dump_threshold_ns, struct kbase_hwcnt_virtualizer **out_hvirt); /** diff --git a/mali_kbase/mali_kbase_ioctl.h b/mali_kbase/mali_kbase_ioctl.h index ccf67df..033a1bd 100644 --- a/mali_kbase/mali_kbase_ioctl.h +++ b/mali_kbase/mali_kbase_ioctl.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c index 97d7b43..d459ad2 100644 --- a/mali_kbase/mali_kbase_jd.c +++ b/mali_kbase/mali_kbase_jd.c @@ -199,7 +199,9 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st u32 res_no; #ifdef CONFIG_MALI_DMA_FENCE struct kbase_dma_fence_resv_info info = { + .resv_objs = NULL, .dma_fence_resv_count = 0, + .dma_fence_excl_bitmap = NULL }; #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) /* @@ -223,10 +225,8 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st return -EINVAL; katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL); - if (NULL == katom->extres) { - err_ret_val = -ENOMEM; - goto early_err_out; - } + if (!katom->extres) + return -ENOMEM; /* copy user buffer to the end of our real buffer. * Make sure the struct sizes haven't changed in a way @@ -282,7 +282,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st katom->kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); /* did we find a matching region object? */ - if (NULL == reg || (reg->flags & KBASE_REG_FREE)) { + if (kbase_is_region_invalid_or_free(reg)) { /* roll back */ goto failed_loop; } @@ -782,6 +782,7 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom) { + struct kbase_device *kbdev = kctx->kbdev; struct kbase_jd_context *jctx = &kctx->jctx; int queued = 0; int i; @@ -845,11 +846,15 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us * back to user space. Do not record any * dependencies. */ KBASE_TLSTREAM_TL_NEW_ATOM( + kbdev, katom, kbase_jd_atom_id(kctx, katom)); KBASE_TLSTREAM_TL_RET_ATOM_CTX( + kbdev, katom, kctx); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE( + kbdev, + katom, TL_ATOM_STATE_IDLE); ret = jd_done_nolock(katom, NULL); @@ -892,10 +897,11 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us * will be sent back to user space. Do not record any * dependencies. */ KBASE_TLSTREAM_TL_NEW_ATOM( + kbdev, katom, kbase_jd_atom_id(kctx, katom)); - KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, + KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) @@ -967,11 +973,12 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us /* Create a new atom. */ KBASE_TLSTREAM_TL_NEW_ATOM( + kbdev, katom, kbase_jd_atom_id(kctx, katom)); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority); - KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority); + KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { @@ -1236,7 +1243,7 @@ void kbase_jd_done_worker(struct work_struct *data) * Begin transaction on JD context and JS context */ mutex_lock(&jctx->lock); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_DONE); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_DONE); mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c index 80b6d77..4f44644 100644 --- a/mali_kbase/mali_kbase_js.c +++ b/mali_kbase/mali_kbase_js.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,9 +27,6 @@ */ #include <mali_kbase.h> #include <mali_kbase_js.h> -#if defined(CONFIG_MALI_GATOR_SUPPORT) -#include <mali_kbase_gator.h> -#endif #include <mali_kbase_tlstream.h> #include <mali_kbase_hw.h> #include <mali_kbase_ctx_sched.h> @@ -365,6 +362,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) static void jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) { + struct kbase_device *kbdev = kctx->kbdev; int prio = katom->sched_priority; int js = katom->slot_nr; struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; @@ -387,7 +385,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) rb_link_node(&katom->runnable_tree_node, parent, new); rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_READY); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_READY); } /** @@ -1482,10 +1480,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, kbasep_js_is_submit_allowed(js_devdata, kctx)); -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_mmu_as_released(kctx->as_nr); -#endif - KBASE_TLSTREAM_TL_NRET_AS_CTX(&kbdev->as[kctx->as_nr], kctx); + KBASE_TLSTREAM_TL_NRET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); kbase_backend_release_ctx_irq(kbdev, kctx); @@ -1763,10 +1758,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, kbdev->hwaccess.active_kctx[js] = kctx; -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_mmu_as_in_use(kctx->as_nr); -#endif - KBASE_TLSTREAM_TL_RET_AS_CTX(&kbdev->as[kctx->as_nr], kctx); + KBASE_TLSTREAM_TL_RET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); /* Cause any future waiter-on-termination to wait until the context is * descheduled */ @@ -2260,7 +2252,7 @@ static void js_return_worker(struct work_struct *data) unsigned long flags; base_jd_core_req core_req = katom->core_req; - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom); + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom); kbase_backend_complete_wq(kbdev, katom); @@ -2491,10 +2483,8 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, katom->sched_priority); } -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP, - katom->slot_nr), NULL, 0); -#endif + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL, + katom->slot_nr, 0, TL_JS_EVENT_STOP); kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index 3d0de90..5ea54a0 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,6 +34,9 @@ #include <linux/compat.h> #include <linux/version.h> #include <linux/log2.h> +#ifdef CONFIG_OF +#include <linux/of_platform.h> +#endif #include <mali_kbase_config.h> #include <mali_kbase.h> @@ -41,6 +44,8 @@ #include <mali_kbase_cache_policy.h> #include <mali_kbase_hw.h> #include <mali_kbase_tlstream.h> +#include <mali_kbase_native_mgm.h> +#include <mali_kbase_mem_pool_group.h> /* Forward declarations */ static void free_partial_locked(struct kbase_context *kctx, @@ -368,7 +373,7 @@ int kbase_remove_va_region(struct kbase_va_region *reg) merged_back = 1; if (merged_front) { /* We already merged with prev, free it */ - kbase_free_alloced_region(reg); + kfree(reg); } } } @@ -428,7 +433,7 @@ static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg, if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), reg_rbtree); - kbase_free_alloced_region(at_reg); + kfree(at_reg); } /* New region replaces the start of the old one, so insert before. */ else if (at_reg->start_pfn == start_pfn) { @@ -493,8 +498,11 @@ int kbase_add_va_region(struct kbase_context *kctx, /* The executable allocation from the SAME_VA zone would already have an * appropriately aligned GPU VA chosen for it. + * Also the executable allocation from EXEC_VA zone doesn't need the + * special alignment. */ - if (!(reg->flags & KBASE_REG_GPU_NX) && !addr) { + if (!(reg->flags & KBASE_REG_GPU_NX) && !addr && + ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) { if (cpu_va_bits > gpu_pc_bits) { align = max(align, (size_t)((1ULL << gpu_pc_bits) >> PAGE_SHIFT)); @@ -539,6 +547,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) { + struct device *const dev = kbdev->dev; struct rb_root *rbtree = NULL; struct kbase_va_region *tmp; u64 gpu_pfn = addr >> PAGE_SHIFT; @@ -557,18 +566,15 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev, * which *must* be free. */ if (gpu_pfn) { - struct device *dev = kbdev->dev; - KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn, nr_pages); - if (!tmp) { - dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); + if (kbase_is_region_invalid(tmp)) { + dev_warn(dev, "Enclosing region not found or invalid: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); err = -ENOMEM; goto exit; - } - if (!(tmp->flags & KBASE_REG_FREE)) { + } else if (!kbase_is_region_free(tmp)) { dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages); @@ -602,7 +608,13 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev, if (tmp) { err = kbase_insert_va_region_nolock(reg, tmp, start_pfn, nr_pages); + if (unlikely(err)) { + dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages", + start_pfn, nr_pages); + } } else { + dev_dbg(dev, "Failed to find a suitable region: %zu nr_pages, %zu align_offset, %zu align_mask\n", + nr_pages, align_offset, align_mask); err = -ENOMEM; } } @@ -645,6 +657,16 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) if (rbnode) { rb_erase(rbnode, rbtree); reg = rb_entry(rbnode, struct kbase_va_region, rblink); + WARN_ON(reg->va_refcnt != 1); + /* Reset the start_pfn - as the rbtree is being + * destroyed and we've already erased this region, there + * is no further need to attempt to remove it. + * This won't affect the cleanup if the region was + * being used as a sticky resource as the cleanup + * related to sticky resources anyways need to be + * performed before the term of region tracker. + */ + reg->start_pfn = 0; kbase_free_alloced_region(reg); } } while (rbnode); @@ -749,6 +771,8 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, u64 same_va_bits = kbase_get_same_va_bits(kctx); u64 total_va_size; + lockdep_assert_held(&kctx->reg_lock); + total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; /* First verify that a JIT_VA zone has not been created already. */ @@ -901,34 +925,63 @@ exit_unlock: int kbase_mem_init(struct kbase_device *kbdev) { + int err = 0; struct kbasep_mem_device *memdev; - int ret; +#ifdef CONFIG_OF + struct device_node *mgm_node = NULL; +#endif KBASE_DEBUG_ASSERT(kbdev); memdev = &kbdev->memdev; - kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX; + + kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, + KBASE_MEM_POOL_MAX_SIZE_KCTX); /* Initialize memory usage */ atomic_set(&memdev->used_pages, 0); - ret = kbase_mem_pool_init(&kbdev->mem_pool, - KBASE_MEM_POOL_MAX_SIZE_KBDEV, - KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, - kbdev, - NULL); - if (ret) - return ret; + kbdev->mgm_dev = &kbase_native_mgm_dev; - ret = kbase_mem_pool_init(&kbdev->lp_mem_pool, - (KBASE_MEM_POOL_MAX_SIZE_KBDEV >> 9), - KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, - kbdev, - NULL); - if (ret) - kbase_mem_pool_term(&kbdev->mem_pool); +#ifdef CONFIG_OF + /* Check to see whether or not a platform-specific memory group manager + * is configured and available. + */ + mgm_node = of_parse_phandle(kbdev->dev->of_node, + "physical-memory-group-manager", 0); + if (!mgm_node) { + dev_info(kbdev->dev, + "No memory group manager is configured\n"); + } else { + struct platform_device *const pdev = + of_find_device_by_node(mgm_node); - return ret; + if (!pdev) { + dev_err(kbdev->dev, + "The configured memory group manager was not found\n"); + } else { + kbdev->mgm_dev = platform_get_drvdata(pdev); + if (!kbdev->mgm_dev) { + dev_info(kbdev->dev, + "Memory group manager is not ready\n"); + err = -EPROBE_DEFER; + } + } + of_node_put(mgm_node); + } +#endif + + if (likely(!err)) { + struct kbase_mem_pool_group_config mem_pool_defaults; + + kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults, + KBASE_MEM_POOL_MAX_SIZE_KBDEV); + + err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, + &mem_pool_defaults, NULL); + } + + return err; } void kbase_mem_halt(struct kbase_device *kbdev) @@ -949,8 +1002,7 @@ void kbase_mem_term(struct kbase_device *kbdev) if (pages != 0) dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); - kbase_mem_pool_term(&kbdev->mem_pool); - kbase_mem_pool_term(&kbdev->lp_mem_pool); + kbase_mem_pool_group_term(&kbdev->mem_pools); } KBASE_EXPORT_TEST_API(kbase_mem_term); @@ -985,6 +1037,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, if (!new_reg) return NULL; + new_reg->va_refcnt = 1; new_reg->cpu_alloc = NULL; /* no alloc bound yet */ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ new_reg->rbtree = rbtree; @@ -1047,6 +1100,9 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) if (WARN_ON(!kctx)) return; + if (WARN_ON(kbase_is_region_invalid(reg))) + return; + mutex_lock(&kctx->jit_evict_lock); @@ -1095,10 +1151,12 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); - /* To detect use-after-free in debug builds */ - KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE); + + reg->flags |= KBASE_REG_VA_FREED; + kbase_va_region_alloc_put(kctx, reg); + } else { + kfree(reg); } - kfree(reg); } KBASE_EXPORT_TEST_API(kbase_free_alloced_region); @@ -1204,7 +1262,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) { - int err; + int err = 0; if (reg->start_pfn == 0) return 0; @@ -1218,7 +1276,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) if (reg->gpu_alloc->imported.alias.aliased[i].alloc) kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); - } else { + } else if (reg->gpu_alloc) { err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_reg_current_backed_size(reg), kctx->as_nr); @@ -1242,7 +1300,6 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) if (err) return err; - err = kbase_remove_va_region(reg); return err; } @@ -1413,8 +1470,8 @@ static int kbase_do_syncset(struct kbase_context *kctx, /* find the region where the virtual address is contained */ reg = kbase_region_tracker_find_region_enclosing_address(kctx, sset->mem_handle.basep.handle); - if (!reg) { - dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX", + if (kbase_is_region_invalid_or_free(reg)) { + dev_warn(kctx->kbdev->dev, "Can't find a valid region at VA 0x%016llX", sset->mem_handle.basep.handle); err = -EINVAL; goto out_unlock; @@ -1523,8 +1580,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(NULL != reg); lockdep_assert_held(&kctx->reg_lock); - if (reg->flags & KBASE_REG_JIT) { - dev_warn(kctx->kbdev->dev, "Attempt to free JIT memory!\n"); + if (reg->flags & KBASE_REG_NO_USER_FREE) { + dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); return -EINVAL; } @@ -1608,7 +1665,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) /* A real GPU va */ /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) { + if (kbase_is_region_invalid_or_free(reg)) { dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX", gpu_addr); err = -EINVAL; @@ -1715,10 +1772,14 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_left = nr_pages_requested; int res; struct kbase_context *kctx; + struct kbase_device *kbdev; struct tagged_addr *tp; - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || + WARN_ON(alloc->imported.native.kctx == NULL) || + WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { + return -EINVAL; + } if (alloc->reg) { if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) @@ -1726,14 +1787,15 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, } kctx = alloc->imported.native.kctx; + kbdev = kctx->kbdev; if (nr_pages_requested == 0) goto done; /*nothing to do*/ - new_page_count = kbase_atomic_add_pages( - nr_pages_requested, &kctx->used_pages); - kbase_atomic_add_pages(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); + new_page_count = atomic_add_return( + nr_pages_requested, &kctx->used_pages); + atomic_add(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); /* Increase mm counters before we allocate pages so that this * allocation is visible to the OOM killer */ @@ -1748,10 +1810,11 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, if (nr_left >= (SZ_2M / SZ_4K)) { int nr_lp = nr_left / (SZ_2M / SZ_4K); - res = kbase_mem_pool_alloc_pages(&kctx->lp_mem_pool, - nr_lp * (SZ_2M / SZ_4K), - tp, - true); + res = kbase_mem_pool_alloc_pages( + &kctx->mem_pools.large[alloc->group_id], + nr_lp * (SZ_2M / SZ_4K), + tp, + true); if (res > 0) { nr_left -= res; @@ -1797,10 +1860,15 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, do { int err; - np = kbase_mem_pool_alloc(&kctx->lp_mem_pool); + np = kbase_mem_pool_alloc( + &kctx->mem_pools.large[ + alloc->group_id]); if (np) break; - err = kbase_mem_pool_grow(&kctx->lp_mem_pool, 1); + + err = kbase_mem_pool_grow( + &kctx->mem_pools.large[alloc->group_id], + 1); if (err) break; } while (1); @@ -1812,7 +1880,11 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, sa = kmalloc(sizeof(*sa), GFP_KERNEL); if (!sa) { - kbase_mem_pool_free(&kctx->lp_mem_pool, np, false); + kbase_mem_pool_free( + &kctx->mem_pools.large[ + alloc->group_id], + np, + false); goto no_new_partial; } @@ -1841,15 +1913,15 @@ no_new_partial: #endif if (nr_left) { - res = kbase_mem_pool_alloc_pages(&kctx->mem_pool, - nr_left, - tp, - false); + res = kbase_mem_pool_alloc_pages( + &kctx->mem_pools.small[alloc->group_id], + nr_left, tp, false); if (res <= 0) goto alloc_failed; } KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, kctx->id, (u64)new_page_count); @@ -1865,17 +1937,17 @@ alloc_failed: alloc->nents += nr_pages_to_free; kbase_process_page_usage_inc(kctx, nr_pages_to_free); - kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages); - kbase_atomic_add_pages(nr_pages_to_free, - &kctx->kbdev->memdev.used_pages); + atomic_add(nr_pages_to_free, &kctx->used_pages); + atomic_add(nr_pages_to_free, + &kctx->kbdev->memdev.used_pages); kbase_free_phy_pages_helper(alloc, nr_pages_to_free); } kbase_process_page_usage_dec(kctx, nr_pages_requested); - kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages); - kbase_atomic_sub_pages(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); + atomic_sub(nr_pages_requested, &kctx->used_pages); + atomic_sub(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); invalid_request: return -ENOMEM; @@ -1890,6 +1962,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( size_t nr_left = nr_pages_requested; int res; struct kbase_context *kctx; + struct kbase_device *kbdev; struct tagged_addr *tp; struct tagged_addr *new_pages = NULL; @@ -1908,16 +1981,17 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( } kctx = alloc->imported.native.kctx; + kbdev = kctx->kbdev; lockdep_assert_held(&kctx->mem_partials_lock); if (nr_pages_requested == 0) goto done; /*nothing to do*/ - new_page_count = kbase_atomic_add_pages( - nr_pages_requested, &kctx->used_pages); - kbase_atomic_add_pages(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); + new_page_count = atomic_add_return( + nr_pages_requested, &kctx->used_pages); + atomic_add(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); /* Increase mm counters before we allocate pages so that this * allocation is visible to the OOM killer @@ -2023,6 +2097,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( #endif KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, kctx->id, (u64)new_page_count); @@ -2068,9 +2143,8 @@ alloc_failed: } kbase_process_page_usage_dec(kctx, nr_pages_requested); - kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages); - kbase_atomic_sub_pages(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); + atomic_sub(nr_pages_requested, &kctx->used_pages); + atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages); invalid_request: return NULL; @@ -2088,7 +2162,10 @@ static void free_partial(struct kbase_context *kctx, struct tagged_addr tp) clear_bit(p - head_page, sa->sub_pages); if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { list_del(&sa->link); - kbase_mem_pool_free(&kctx->lp_mem_pool, head_page, true); + kbase_mem_pool_free( + &kctx->mem_pools.large[BASE_MEM_GROUP_DEFAULT], + head_page, + true); kfree(sa); } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == SZ_2M / SZ_4K - 1) { @@ -2103,15 +2180,19 @@ int kbase_free_phy_pages_helper( size_t nr_pages_to_free) { struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; bool syncback; bool reclaimed = (alloc->evicted != 0); struct tagged_addr *start_free; int new_page_count __maybe_unused; size_t freed = 0; - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); - KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || + WARN_ON(alloc->imported.native.kctx == NULL) || + WARN_ON(alloc->nents < nr_pages_to_free) || + WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { + return -EINVAL; + } /* early out if nothing to do */ if (0 == nr_pages_to_free) @@ -2133,11 +2214,12 @@ int kbase_free_phy_pages_helper( /* This is a 2MB entry, so free all the 512 pages that * it points to */ - kbase_mem_pool_free_pages(&kctx->lp_mem_pool, - 512, - start_free, - syncback, - reclaimed); + kbase_mem_pool_free_pages( + &kctx->mem_pools.large[alloc->group_id], + 512, + start_free, + syncback, + reclaimed); nr_pages_to_free -= 512; start_free += 512; freed += 512; @@ -2151,16 +2233,17 @@ int kbase_free_phy_pages_helper( local_end_free = start_free; while (nr_pages_to_free && - !is_huge(*local_end_free) && - !is_partial(*local_end_free)) { + !is_huge(*local_end_free) && + !is_partial(*local_end_free)) { local_end_free++; nr_pages_to_free--; } - kbase_mem_pool_free_pages(&kctx->mem_pool, - local_end_free - start_free, - start_free, - syncback, - reclaimed); + kbase_mem_pool_free_pages( + &kctx->mem_pools.small[alloc->group_id], + local_end_free - start_free, + start_free, + syncback, + reclaimed); freed += local_end_free - start_free; start_free += local_end_free - start_free; } @@ -2174,14 +2257,15 @@ int kbase_free_phy_pages_helper( */ if (!reclaimed) { kbase_process_page_usage_dec(kctx, freed); - new_page_count = kbase_atomic_sub_pages(freed, - &kctx->used_pages); - kbase_atomic_sub_pages(freed, - &kctx->kbdev->memdev.used_pages); + new_page_count = atomic_sub_return(freed, + &kctx->used_pages); + atomic_sub(freed, + &kctx->kbdev->memdev.used_pages); KBASE_TLSTREAM_AUX_PAGESALLOC( - kctx->id, - (u64)new_page_count); + kbdev, + kctx->id, + (u64)new_page_count); } return 0; @@ -2216,6 +2300,7 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free) { struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; bool syncback; bool reclaimed = (alloc->evicted != 0); struct tagged_addr *start_free; @@ -2294,12 +2379,13 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, int new_page_count; kbase_process_page_usage_dec(kctx, freed); - new_page_count = kbase_atomic_sub_pages(freed, - &kctx->used_pages); - kbase_atomic_sub_pages(freed, - &kctx->kbdev->memdev.used_pages); + new_page_count = atomic_sub_return(freed, + &kctx->used_pages); + atomic_sub(freed, + &kctx->kbdev->memdev.used_pages); KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, kctx->id, (u64)new_page_count); } @@ -2824,7 +2910,7 @@ static void kbase_jit_destroy_worker(struct work_struct *work) mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_lock(kctx); - reg->flags &= ~KBASE_REG_JIT; + reg->flags &= ~KBASE_REG_NO_USER_FREE; kbase_mem_free_region(kctx, reg); kbase_gpu_vm_unlock(kctx); } while (1); @@ -2914,13 +3000,13 @@ static int kbase_jit_grow(struct kbase_context *kctx, } if (pages_required >= (SZ_2M / SZ_4K)) { - pool = &kctx->lp_mem_pool; + pool = &kctx->mem_pools.large[BASE_MEM_GROUP_DEFAULT]; /* Round up to number of 2 MB pages required */ pages_required += ((SZ_2M / SZ_4K) - 1); pages_required /= (SZ_2M / SZ_4K); } else { #endif - pool = &kctx->mem_pool; + pool = &kctx->mem_pools.small[BASE_MEM_GROUP_DEFAULT]; #ifdef CONFIG_MALI_2MB_ALLOC } #endif @@ -3005,6 +3091,7 @@ static void trace_jit_stats(struct kbase_context *kctx, { const u32 alloc_count = kctx->jit_current_allocations_per_bin[bin_id]; + struct kbase_device *kbdev = kctx->kbdev; struct kbase_va_region *walker; u32 va_pages = 0; @@ -3020,8 +3107,8 @@ static void trace_jit_stats(struct kbase_context *kctx, } mutex_unlock(&kctx->jit_evict_lock); - KBASE_TLSTREAM_AUX_JIT_STATS(kctx->id, bin_id, max_allocations, - alloc_count, va_pages, ph_pages); + KBASE_TLSTREAM_AUX_JIT_STATS(kbdev, kctx->id, bin_id, + max_allocations, alloc_count, va_pages, ph_pages); } struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, @@ -3031,12 +3118,21 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { /* Too many current allocations */ + dev_dbg(kctx->kbdev->dev, + "Max JIT allocations limit reached: active allocations %d, max allocations %d\n", + kctx->jit_current_allocations, + kctx->jit_max_allocations); return NULL; } if (info->max_allocations > 0 && kctx->jit_current_allocations_per_bin[info->bin_id] >= info->max_allocations) { /* Too many current allocations in this bin */ + dev_dbg(kctx->kbdev->dev, + "Per bin limit of max JIT allocations reached: bin_id %d, active allocations %d, max allocations %d\n", + info->bin_id, + kctx->jit_current_allocations_per_bin[info->bin_id], + info->max_allocations); return NULL; } @@ -3147,6 +3243,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, * better so return the allocation to the pool and * return the function with failure. */ + dev_dbg(kctx->kbdev->dev, + "JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n", + info->va_pages, info->commit_pages); goto update_failed_unlocked; } } else { @@ -3163,10 +3262,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extent, &flags, &gpu_addr); - if (!reg) + if (!reg) { + /* Most likely not enough GPU virtual space left for + * the new JIT allocation. + */ + dev_dbg(kctx->kbdev->dev, + "Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n", + info->va_pages, info->commit_pages); goto out_unlocked; + } - reg->flags |= KBASE_REG_JIT; + reg->flags |= KBASE_REG_NO_USER_FREE; mutex_lock(&kctx->jit_evict_lock); list_add(®->jit_node, &kctx->jit_active_head); @@ -3288,7 +3394,7 @@ bool kbase_jit_evict(struct kbase_context *kctx) mutex_unlock(&kctx->jit_evict_lock); if (reg) { - reg->flags &= ~KBASE_REG_JIT; + reg->flags &= ~KBASE_REG_NO_USER_FREE; kbase_mem_free_region(kctx, reg); } @@ -3310,7 +3416,7 @@ void kbase_jit_term(struct kbase_context *kctx) list_del(&walker->jit_node); list_del_init(&walker->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); - walker->flags &= ~KBASE_REG_JIT; + walker->flags &= ~KBASE_REG_NO_USER_FREE; kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -3322,7 +3428,7 @@ void kbase_jit_term(struct kbase_context *kctx) list_del(&walker->jit_node); list_del_init(&walker->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); - walker->flags &= ~KBASE_REG_JIT; + walker->flags &= ~KBASE_REG_NO_USER_FREE; kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -3376,8 +3482,14 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages, +#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ +KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#else reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); +#endif #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) pinned_pages = get_user_pages_remote(NULL, mm, address, @@ -3719,7 +3831,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( /* Find the region */ reg = kbase_region_tracker_find_region_enclosing_address( kctx, gpu_addr); - if (NULL == reg || (reg->flags & KBASE_REG_FREE)) + if (kbase_is_region_invalid_or_free(reg)) goto failed; /* Allocate the metadata object */ diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index a873bb1..de5550f 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,9 +39,6 @@ #include <mali_kbase_hw.h> #include "mali_kbase_pm.h" #include "mali_kbase_defs.h" -#if defined(CONFIG_MALI_GATOR_SUPPORT) -#include "mali_kbase_gator.h" -#endif /* Required for kbase_mem_evictable_unmake */ #include "mali_kbase_mem_linux.h" @@ -94,49 +91,54 @@ struct kbase_aliased { /** * @brief Physical pages tracking object properties */ -#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1ul << 0) -#define KBASE_MEM_PHY_ALLOC_LARGE (1ul << 1) +#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1u << 0) +#define KBASE_MEM_PHY_ALLOC_LARGE (1u << 1) -/* physical pages tracking object. +/* struct kbase_mem_phy_alloc - Physical pages tracking object. + * * Set up to track N pages. * N not stored here, the creator holds that info. * This object only tracks how many elements are actually valid (present). - * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not - * shared with another region or client. CPU mappings are OK to exist when changing, as - * long as the tracked mappings objects are updated as part of the change. + * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc + * is not shared with another region or client. CPU mappings are OK to + * exist when changing, as long as the tracked mappings objects are + * updated as part of the change. + * + * @kref: number of users of this alloc + * @gpu_mappings: count number of times mapped on the GPU + * @nents: 0..N + * @pages: N elements, only 0..nents are valid + * @mappings: List of CPU mappings of this physical memory allocation. + * @evict_node: Node used to store this allocation on the eviction list + * @evicted: Physical backing size when the pages where evicted + * @reg: Back reference to the region structure which created this + * allocation, or NULL if it has been freed. + * @type: type of buffer + * @permanent_map: Kernel side mapping of the alloc, shall never be + * referred directly. kbase_phy_alloc_mapping_get() & + * kbase_phy_alloc_mapping_put() pair should be used + * around access to the kernel-side CPU mapping so that + * mapping doesn't disappear whilst it is being accessed. + * @properties: Bitmask of properties, e.g. KBASE_MEM_PHY_ALLOC_LARGE. + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager, if present. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @imported: member in union valid based on @a type */ struct kbase_mem_phy_alloc { - struct kref kref; /* number of users of this alloc */ + struct kref kref; atomic_t gpu_mappings; - size_t nents; /* 0..N */ - struct tagged_addr *pages; /* N elements, only 0..nents are valid */ - - /* kbase_cpu_mappings */ + size_t nents; + struct tagged_addr *pages; struct list_head mappings; - - /* Node used to store this allocation on the eviction list */ struct list_head evict_node; - /* Physical backing size when the pages where evicted */ size_t evicted; - /* - * Back reference to the region structure which created this - * allocation, or NULL if it has been freed. - */ struct kbase_va_region *reg; - - /* type of buffer */ enum kbase_memory_type type; - - /* Kernel side mapping of the alloc, shall never be referred directly. - * kbase_phy_alloc_mapping_get() & kbase_phy_alloc_mapping_put() pair - * should be used around access to the kernel-side CPU mapping so that - * mapping doesn't disappear whilst it is being accessed. - */ struct kbase_vmap_struct *permanent_map; + u8 properties; + u8 group_id; - unsigned long properties; - - /* member in union valid based on @a type */ union { #if defined(CONFIG_DMA_SHARED_BUFFER) struct { @@ -306,12 +308,31 @@ struct kbase_va_region { * Extent must be a power of 2 */ #define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) -/* Memory is handled by JIT - user space should not be able to free it */ -#define KBASE_REG_JIT (1ul << 24) +/* Whilst this flag is set the GPU allocation is not supposed to be freed by + * user space. The flag will remain set for the lifetime of JIT allocations. + */ +#define KBASE_REG_NO_USER_FREE (1ul << 24) /* Memory has permanent kernel side mapping */ #define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) +/* GPU VA region has been freed by the userspace, but still remains allocated + * due to the reference held by CPU mappings created on the GPU VA region. + * + * A region with this flag set has had kbase_gpu_munmap() called on it, but can + * still be looked-up in the region tracker as a non-free region. Hence must + * not create or update any more GPU mappings on such regions because they will + * not be unmapped when the region is finally destroyed. + * + * Since such regions are still present in the region tracker, new allocations + * attempted with BASE_MEM_SAME_VA might fail if their address intersects with + * a region with this flag set. + * + * In addition, this flag indicates the gpu_alloc member might no longer valid + * e.g. in infinite cache simulation. + */ +#define KBASE_REG_VA_FREED (1ul << 26) + #define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) /* only used with 32-bit clients */ @@ -349,8 +370,69 @@ struct kbase_va_region { u16 jit_usage_id; /* The JIT bin this allocation came from */ u8 jit_bin_id; + + int va_refcnt; /* number of users of this va */ }; +static inline bool kbase_is_region_free(struct kbase_va_region *reg) +{ + return (!reg || reg->flags & KBASE_REG_FREE); +} + +static inline bool kbase_is_region_invalid(struct kbase_va_region *reg) +{ + return (!reg || reg->flags & KBASE_REG_VA_FREED); +} + +static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg) +{ + /* Possibly not all functions that find regions would be using this + * helper, so they need to be checked when maintaining this function. + */ + return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg)); +} + +int kbase_remove_va_region(struct kbase_va_region *reg); +static inline void kbase_region_refcnt_free(struct kbase_va_region *reg) +{ + /* If region was mapped then remove va region*/ + if (reg->start_pfn) + kbase_remove_va_region(reg); + + /* To detect use-after-free in debug builds */ + KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE); + kfree(reg); +} + +static inline struct kbase_va_region *kbase_va_region_alloc_get( + struct kbase_context *kctx, struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(!region->va_refcnt); + + /* non-atomic as kctx->reg_lock is held */ + region->va_refcnt++; + + return region; +} + +static inline struct kbase_va_region *kbase_va_region_alloc_put( + struct kbase_context *kctx, struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(region->va_refcnt <= 0); + WARN_ON(region->flags & KBASE_REG_FREE); + + /* non-atomic as kctx->reg_lock is held */ + region->va_refcnt--; + if (!region->va_refcnt) + kbase_region_refcnt_free(region); + + return NULL; +} + /* Common functions */ static inline struct tagged_addr *kbase_get_cpu_phy_pages( struct kbase_va_region *reg) @@ -392,7 +474,7 @@ static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) static inline struct kbase_mem_phy_alloc *kbase_alloc_create( struct kbase_context *kctx, size_t nr_pages, - enum kbase_memory_type type) + enum kbase_memory_type type, int group_id) { struct kbase_mem_phy_alloc *alloc; size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; @@ -439,6 +521,7 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create( alloc->pages = (void *)(alloc + 1); INIT_LIST_HEAD(&alloc->mappings); alloc->type = type; + alloc->group_id = group_id; if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) alloc->imported.user_buf.dma_addrs = @@ -448,7 +531,7 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create( } static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, - struct kbase_context *kctx) + struct kbase_context *kctx, int group_id) { KBASE_DEBUG_ASSERT(reg); KBASE_DEBUG_ASSERT(!reg->cpu_alloc); @@ -456,7 +539,7 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, - KBASE_MEM_TYPE_NATIVE); + KBASE_MEM_TYPE_NATIVE, group_id); if (IS_ERR(reg->cpu_alloc)) return PTR_ERR(reg->cpu_alloc); else if (!reg->cpu_alloc) @@ -466,7 +549,7 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) && (reg->flags & KBASE_REG_CPU_CACHED)) { reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, - KBASE_MEM_TYPE_NATIVE); + KBASE_MEM_TYPE_NATIVE, group_id); if (IS_ERR_OR_NULL(reg->gpu_alloc)) { kbase_mem_phy_alloc_put(reg->cpu_alloc); return -ENOMEM; @@ -486,24 +569,6 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, return 0; } -static inline u32 kbase_atomic_add_pages(u32 num_pages, atomic_t *used_pages) -{ - int new_val = atomic_add_return(num_pages, used_pages); -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_total_alloc_pages_change((long long int)new_val); -#endif - return new_val; -} - -static inline u32 kbase_atomic_sub_pages(u32 num_pages, atomic_t *used_pages) -{ - int new_val = atomic_sub_return(num_pages, used_pages); -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_total_alloc_pages_change((long long int)new_val); -#endif - return new_val; -} - /* * Max size for kbdev memory pool (in pages) */ @@ -525,10 +590,42 @@ static inline u32 kbase_atomic_sub_pages(u32 num_pages, atomic_t *used_pages) #define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER 0 /** + * kbase_mem_pool_config_set_max_size - Set maximum number of free pages in + * initial configuration of a memory pool + * + * @config: Initial configuration for a physical memory pool + * @max_size: Maximum number of free pages that a pool created from + * @config can hold + */ +static inline void kbase_mem_pool_config_set_max_size( + struct kbase_mem_pool_config *const config, size_t const max_size) +{ + WRITE_ONCE(config->max_size, max_size); +} + +/** + * kbase_mem_pool_config_get_max_size - Get maximum number of free pages from + * initial configuration of a memory pool + * + * @config: Initial configuration for a physical memory pool + * + * Return: Maximum number of free pages that a pool created from @config + * can hold + */ +static inline size_t kbase_mem_pool_config_get_max_size( + const struct kbase_mem_pool_config *const config) +{ + return READ_ONCE(config->max_size); +} + +/** * kbase_mem_pool_init - Create a memory pool for a kbase device * @pool: Memory pool to initialize - * @max_size: Maximum number of free pages the pool can hold + * @config: Initial configuration for the memory pool * @order: Page order for physical page size (order=0=>4kB, order=9=>2MB) + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager, if present. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). * @kbdev: Kbase device where memory is used * @next_pool: Pointer to the next pool or NULL. * @@ -539,7 +636,7 @@ static inline u32 kbase_atomic_sub_pages(u32 num_pages, atomic_t *used_pages) * certain corner cases grow above @max_size. * * If @next_pool is not NULL, we will allocate from @next_pool before going to - * the kernel allocator. Similarily pages can spill over to @next_pool when + * the memory group manager. Similarly pages can spill over to @next_pool when * @pool is full. Pages are zeroed before they spill over to another pool, to * prevent leaking information between applications. * @@ -549,8 +646,9 @@ static inline u32 kbase_atomic_sub_pages(u32 num_pages, atomic_t *used_pages) * Return: 0 on success, negative -errno on error */ int kbase_mem_pool_init(struct kbase_mem_pool *pool, - size_t max_size, - size_t order, + const struct kbase_mem_pool_config *config, + unsigned int order, + int group_id, struct kbase_device *kbdev, struct kbase_mem_pool *next_pool); @@ -867,7 +965,6 @@ int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, int kbase_add_va_region_rbtree(struct kbase_device *kbdev, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); -int kbase_remove_va_region(struct kbase_va_region *reg); bool kbase_check_alloc_flags(unsigned long flags); bool kbase_check_import_flags(unsigned long flags); @@ -1218,6 +1315,8 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( * * @param[in] alloc allocation object to free pages from * @param[in] nr_pages_to_free number of physical pages to free +* +* Return: 0 on success, otherwise a negative error code */ int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index c70112d..70da90c 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,6 +50,23 @@ #include <mali_kbase_tlstream.h> #include <mali_kbase_ioctl.h> +#if (KERNEL_VERSION(4, 17, 0) > LINUX_VERSION_CODE) +#define vm_fault_t int + +static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn) +{ + int err = vm_insert_pfn(vma, addr, pfn); + + if (unlikely(err == -ENOMEM)) + return VM_FAULT_OOM; + if (unlikely(err < 0 && err != -EBUSY)) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} +#endif + static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, u64 offset_bytes, size_t size, @@ -188,7 +205,7 @@ void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, kctx, gpu_addr); } - if (reg == NULL || (reg->flags & KBASE_REG_FREE) != 0) + if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; kern_mapping = reg->cpu_alloc->permanent_map; @@ -241,6 +258,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, KBASE_DEBUG_ASSERT(gpu_va); dev = kctx->kbdev->dev; + dev_dbg(dev, "Allocating %lld va_pages, %lld commit_pages, %lld extent, 0x%llX flags\n", + va_pages, commit_pages, extent, *flags); + *gpu_va = 0; /* return 0 on failure */ if (!kbase_check_alloc_flags(*flags)) { @@ -279,6 +299,12 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent)) goto bad_sizes; +#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED + /* Ensure that memory is fully physically-backed. */ + if (*flags & BASE_MEM_GROW_ON_GPF) + commit_pages = va_pages; +#endif + /* find out which VA zone to use */ if (*flags & BASE_MEM_SAME_VA) { rbtree = &kctx->reg_rbtree_same; @@ -300,7 +326,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, if (kbase_update_region_flags(kctx, reg, *flags) != 0) goto invalid_flags; - if (kbase_reg_prepare_native(reg, kctx) != 0) { + if (kbase_reg_prepare_native(reg, kctx, BASE_MEM_GROUP_DEFAULT) != 0) { dev_err(dev, "Failed to prepare region"); goto prepare_failed; } @@ -438,7 +464,7 @@ int kbase_mem_query(struct kbase_context *kctx, /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) + if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; switch (query) { @@ -656,14 +682,16 @@ void kbase_mem_evictable_deinit(struct kbase_context *kctx) void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) { struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; int __maybe_unused new_page_count; kbase_process_page_usage_dec(kctx, alloc->nents); - new_page_count = kbase_atomic_sub_pages(alloc->nents, - &kctx->used_pages); - kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); + new_page_count = atomic_sub_return(alloc->nents, + &kctx->used_pages); + atomic_sub(alloc->nents, &kctx->kbdev->memdev.used_pages); KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, kctx->id, (u64)new_page_count); } @@ -676,11 +704,12 @@ static void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) { struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; int __maybe_unused new_page_count; - new_page_count = kbase_atomic_add_pages(alloc->nents, - &kctx->used_pages); - kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); + new_page_count = atomic_add_return(alloc->nents, + &kctx->used_pages); + atomic_add(alloc->nents, &kctx->kbdev->memdev.used_pages); /* Increase mm counters so that the allocation is accounted for * against the process and thus is visible to the OOM killer, @@ -688,6 +717,7 @@ void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) kbase_process_page_usage_inc(kctx, alloc->nents); KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, kctx->id, (u64)new_page_count); } @@ -804,7 +834,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) + if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; /* Is the region being transitioning between not needed and needed? */ @@ -931,7 +961,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, goto invalid_flags; reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, - KBASE_MEM_TYPE_IMPORTED_UMM); + KBASE_MEM_TYPE_IMPORTED_UMM, BASE_MEM_GROUP_DEFAULT); if (IS_ERR_OR_NULL(reg->gpu_alloc)) goto no_alloc_obj; @@ -1056,8 +1086,9 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( if (!reg) goto no_region; - reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, - KBASE_MEM_TYPE_IMPORTED_USER_BUF); + reg->gpu_alloc = kbase_alloc_create( + kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_USER_BUF, + BASE_MEM_GROUP_DEFAULT); if (IS_ERR_OR_NULL(reg->gpu_alloc)) goto no_alloc_obj; @@ -1106,7 +1137,13 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) faulted_pages = get_user_pages(current, current->mm, address, *va_pages, +#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ +KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#else reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); +#endif #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) faulted_pages = get_user_pages(address, *va_pages, reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); @@ -1242,7 +1279,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, goto no_reg; /* zero-sized page array, as we don't need one/can support one */ - reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS); + reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS, + BASE_MEM_GROUP_DEFAULT); if (IS_ERR_OR_NULL(reg->gpu_alloc)) goto no_alloc_obj; @@ -1280,10 +1318,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); /* validate found region */ - if (!aliasing_reg) - goto bad_handle; /* Not found */ - if (aliasing_reg->flags & KBASE_REG_FREE) - goto bad_handle; /* Free region */ + if (kbase_is_region_invalid_or_free(aliasing_reg)) + goto bad_handle; /* Not found/already free */ if (aliasing_reg->flags & KBASE_REG_DONT_NEED) goto bad_handle; /* Ephemeral region */ if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED)) @@ -1580,7 +1616,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) + if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; KBASE_DEBUG_ASSERT(reg->cpu_alloc); @@ -1603,6 +1639,12 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) if (reg->flags & KBASE_REG_DONT_NEED) goto out_unlock; +#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED + /* Reject resizing commit size */ + if (reg->flags & KBASE_REG_PF_GROW) + new_pages = reg->nr_pages; +#endif + if (new_pages == reg->gpu_alloc->nents) { /* no change */ res = 0; @@ -1718,6 +1760,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) list_del(&map->mappings_list); + kbase_va_region_alloc_put(map->kctx, map->region); kbase_gpu_vm_unlock(map->kctx); kbase_mem_phy_alloc_put(map->alloc); @@ -1726,58 +1769,110 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) KBASE_EXPORT_TEST_API(kbase_cpu_vm_close); +static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma, + struct kbase_va_region *reg, + pgoff_t *start_off, + size_t nr_pages) +{ + struct kbase_aliased *aliased = + reg->cpu_alloc->imported.alias.aliased; + + if (!reg->cpu_alloc->imported.alias.stride || + reg->nr_pages < (*start_off + nr_pages)) { + return NULL; + } + + while (*start_off >= reg->cpu_alloc->imported.alias.stride) { + aliased++; + *start_off -= reg->cpu_alloc->imported.alias.stride; + } -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) -static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) + if (!aliased->alloc) { + /* sink page not available for dumping map */ + return NULL; + } + + if ((*start_off + nr_pages) > aliased->length) { + /* not fully backed by physical pages */ + return NULL; + } + + return aliased; +} + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) +static vm_fault_t kbase_cpu_vm_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) { #else -static int kbase_cpu_vm_fault(struct vm_fault *vmf) +static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; #endif struct kbase_cpu_mapping *map = vma->vm_private_data; - pgoff_t rel_pgoff; + pgoff_t map_start_pgoff; + pgoff_t fault_pgoff; size_t i; pgoff_t addr; + size_t nents; + struct tagged_addr *pages; + vm_fault_t ret = VM_FAULT_SIGBUS; KBASE_DEBUG_ASSERT(map); KBASE_DEBUG_ASSERT(map->count > 0); KBASE_DEBUG_ASSERT(map->kctx); KBASE_DEBUG_ASSERT(map->alloc); - rel_pgoff = vmf->pgoff - map->region->start_pfn; + map_start_pgoff = vma->vm_pgoff - map->region->start_pfn; kbase_gpu_vm_lock(map->kctx); - if (rel_pgoff >= map->alloc->nents) - goto locked_bad_fault; + if (unlikely(map->region->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) { + struct kbase_aliased *aliased = + get_aliased_alloc(vma, map->region, &map_start_pgoff, 1); + + if (!aliased) + goto exit; + nents = aliased->length; + pages = aliased->alloc->pages + aliased->offset; + } else { + nents = map->alloc->nents; + pages = map->alloc->pages; + } + + fault_pgoff = map_start_pgoff + (vmf->pgoff - vma->vm_pgoff); + + if (fault_pgoff >= nents) + goto exit; + + /* Apparently when job dumping is enabled, there are accesses to the GPU + * memory (JIT) that has been marked as reclaimable. Disallowing that + * causes job dumping to fail. + */ +#if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP) /* Fault on access to DONT_NEED regions */ if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) - goto locked_bad_fault; - - /* insert all valid pages from the fault location */ - i = rel_pgoff; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - addr = (pgoff_t)((uintptr_t)vmf->virtual_address >> PAGE_SHIFT); -#else - addr = (pgoff_t)(vmf->address >> PAGE_SHIFT); + goto exit; #endif - while (i < map->alloc->nents && (addr < vma->vm_end >> PAGE_SHIFT)) { - int ret = vm_insert_pfn(vma, addr << PAGE_SHIFT, - PFN_DOWN(as_phys_addr_t(map->alloc->pages[i]))); - if (ret < 0 && ret != -EBUSY) - goto locked_bad_fault; + + /* We are inserting all valid pages from the start of CPU mapping and + * not from the fault location (the mmap handler was previously doing + * the same). + */ + i = map_start_pgoff; + addr = (pgoff_t)(vma->vm_start >> PAGE_SHIFT); + while (i < nents && (addr < vma->vm_end >> PAGE_SHIFT)) { + ret = vmf_insert_pfn(vma, addr << PAGE_SHIFT, + PFN_DOWN(as_phys_addr_t(pages[i]))); + if (ret != VM_FAULT_NOPAGE) + goto exit; i++; addr++; } +exit: kbase_gpu_vm_unlock(map->kctx); - /* we resolved it, nothing for VM to do */ - return VM_FAULT_NOPAGE; - -locked_bad_fault: - kbase_gpu_vm_unlock(map->kctx); - return VM_FAULT_SIGBUS; + return ret; } const struct vm_operations_struct kbase_vm_ops = { @@ -1795,10 +1890,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, int free_on_close) { struct kbase_cpu_mapping *map; - struct tagged_addr *page_array; int err = 0; - int i; - u64 start_off; map = kzalloc(sizeof(*map), GFP_KERNEL); @@ -1830,38 +1922,16 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, vma->vm_ops = &kbase_vm_ops; vma->vm_private_data = map; - page_array = kbase_get_cpu_phy_pages(reg); - start_off = vma->vm_pgoff - reg->start_pfn + - (aligned_offset >> PAGE_SHIFT); if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) { + pgoff_t rel_pgoff = vma->vm_pgoff - reg->start_pfn + + (aligned_offset >> PAGE_SHIFT); struct kbase_aliased *aliased = - reg->cpu_alloc->imported.alias.aliased; + get_aliased_alloc(vma, reg, &rel_pgoff, nr_pages); - if (!reg->cpu_alloc->imported.alias.stride || - reg->nr_pages < (start_off + nr_pages)) { + if (!aliased) { err = -EINVAL; goto out; } - - while (start_off >= reg->cpu_alloc->imported.alias.stride) { - aliased++; - start_off -= reg->cpu_alloc->imported.alias.stride; - } - - if (!aliased->alloc) { - /* sink page not available for dumping map */ - err = -EINVAL; - goto out; - } - - if ((start_off + nr_pages) > aliased->length) { - /* not fully backed by physical pages */ - err = -EINVAL; - goto out; - } - - /* ready the pages for dumping map */ - page_array = aliased->alloc->pages + aliased->offset; } if (!(reg->flags & KBASE_REG_CPU_CACHED) && @@ -1876,19 +1946,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, } if (!kaddr) { - unsigned long addr = vma->vm_start + aligned_offset; - vma->vm_flags |= VM_PFNMAP; - for (i = 0; i < nr_pages; i++) { - phys_addr_t phys; - - phys = as_phys_addr_t(page_array[i + start_off]); - err = vm_insert_pfn(vma, addr, PFN_DOWN(phys)); - if (WARN_ON(err)) - break; - - addr += PAGE_SIZE; - } } else { WARN_ON(aligned_offset); /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ @@ -1903,7 +1961,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, goto out; } - map->region = reg; + map->region = kbase_va_region_alloc_get(kctx, reg); map->free_on_close = free_on_close; map->kctx = kctx; map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); @@ -1945,7 +2003,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct goto out; } - new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW); + new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW, + BASE_MEM_GROUP_DEFAULT); if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { err = -ENOMEM; new_reg->cpu_alloc = NULL; @@ -2134,7 +2193,7 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma) reg = kbase_region_tracker_find_region_enclosing_address(kctx, (u64)vma->vm_pgoff << PAGE_SHIFT); - if (reg && !(reg->flags & KBASE_REG_FREE)) { + if (!kbase_is_region_invalid_or_free(reg)) { /* will this mapping overflow the size of the region? */ if (nr_pages > (reg->nr_pages - (vma->vm_pgoff - reg->start_pfn))) { @@ -2334,7 +2393,7 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) + if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; /* check access permissions can be satisfied diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c index 0f91be1..0723e32 100644 --- a/mali_kbase/mali_kbase_mem_pool.c +++ b/mali_kbase/mali_kbase_mem_pool.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -155,7 +155,8 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) { struct page *p; gfp_t gfp; - struct device *dev = pool->kbdev->dev; + struct kbase_device *const kbdev = pool->kbdev; + struct device *const dev = kbdev->dev; dma_addr_t dma_addr; int i; @@ -167,18 +168,21 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) gfp = GFP_HIGHUSER | __GFP_ZERO; #endif - /* don't warn on higer order failures */ + /* don't warn on higher order failures */ if (pool->order) gfp |= __GFP_NOWARN; - p = alloc_pages(gfp, pool->order); + p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev, + pool->group_id, gfp, pool->order); if (!p) return NULL; dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) { - __free_pages(p, pool->order); + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, + pool->group_id, p, pool->order); return NULL; } @@ -192,7 +196,8 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p) { - struct device *dev = pool->kbdev->dev; + struct kbase_device *const kbdev = pool->kbdev; + struct device *const dev = kbdev->dev; dma_addr_t dma_addr = kbase_dma_addr(p); int i; @@ -200,7 +205,9 @@ static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, DMA_BIDIRECTIONAL); for (i = 0; i < (1u << pool->order); i++) kbase_clear_dma_addr(p+i); - __free_pages(p, pool->order); + + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, + pool->group_id, p, pool->order); pool_dbg(pool, "freed page to kernel\n"); } @@ -369,14 +376,21 @@ static int kbase_mem_pool_reclaim_shrink(struct shrinker *s, #endif int kbase_mem_pool_init(struct kbase_mem_pool *pool, - size_t max_size, - size_t order, + const struct kbase_mem_pool_config *config, + unsigned int order, + int group_id, struct kbase_device *kbdev, struct kbase_mem_pool *next_pool) { + if (WARN_ON(group_id < 0) || + WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { + return -EINVAL; + } + pool->cur_size = 0; - pool->max_size = max_size; + pool->max_size = kbase_mem_pool_config_get_max_size(config); pool->order = order; + pool->group_id = group_id; pool->kbdev = kbdev; pool->next_pool = next_pool; pool->dying = false; diff --git a/mali_kbase/mali_kbase_mem_pool_debugfs.c b/mali_kbase/mali_kbase_mem_pool_debugfs.c index 4b4eeb3..9896202 100644 --- a/mali_kbase/mali_kbase_mem_pool_debugfs.c +++ b/mali_kbase/mali_kbase_mem_pool_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,71 +23,165 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> -#include <mali_kbase_mem_pool_debugfs.h> +#include "mali_kbase_mem_pool_debugfs.h" +#include "mali_kbase_debugfs_helper.h" #ifdef CONFIG_DEBUG_FS -static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val) +void kbase_mem_pool_debugfs_trim(void *const array, size_t const index, + size_t const value) { - struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + struct kbase_mem_pool *const mem_pools = array; - *val = kbase_mem_pool_size(pool); + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return; - return 0; + kbase_mem_pool_trim(&mem_pools[index], value); } -static int kbase_mem_pool_debugfs_size_set(void *data, u64 val) +void kbase_mem_pool_debugfs_set_max_size(void *const array, + size_t const index, size_t const value) { - struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + struct kbase_mem_pool *const mem_pools = array; - kbase_mem_pool_trim(pool, val); + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return; - return 0; + kbase_mem_pool_set_max_size(&mem_pools[index], value); } -DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops, - kbase_mem_pool_debugfs_size_get, - kbase_mem_pool_debugfs_size_set, - "%llu\n"); +size_t kbase_mem_pool_debugfs_size(void *const array, size_t const index) +{ + struct kbase_mem_pool *const mem_pools = array; + + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return 0; + + return kbase_mem_pool_size(&mem_pools[index]); +} -static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val) +size_t kbase_mem_pool_debugfs_max_size(void *const array, size_t const index) { - struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + struct kbase_mem_pool *const mem_pools = array; - *val = kbase_mem_pool_max_size(pool); + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return 0; - return 0; + return kbase_mem_pool_max_size(&mem_pools[index]); } -static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val) +void kbase_mem_pool_config_debugfs_set_max_size(void *const array, + size_t const index, size_t const value) { - struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + struct kbase_mem_pool_config *const configs = array; - kbase_mem_pool_set_max_size(pool, val); + if (WARN_ON(!configs) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return; + + kbase_mem_pool_config_set_max_size(&configs[index], value); +} + +size_t kbase_mem_pool_config_debugfs_max_size(void *const array, + size_t const index) +{ + struct kbase_mem_pool_config *const configs = array; - return 0; + if (WARN_ON(!configs) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return 0; + + return kbase_mem_pool_config_get_max_size(&configs[index]); +} + +static int kbase_mem_pool_debugfs_size_show(struct seq_file *sfile, void *data) +{ + CSTD_UNUSED(data); + return kbase_debugfs_helper_seq_read(sfile, + MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_size); +} + +static ssize_t kbase_mem_pool_debugfs_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + int err; + + CSTD_UNUSED(ppos); + err = kbase_debugfs_helper_seq_write(file, ubuf, count, + MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_trim); + return err ? err : count; +} + +static int kbase_mem_pool_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbase_mem_pool_debugfs_size_show, + in->i_private); +} + +static const struct file_operations kbase_mem_pool_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbase_mem_pool_debugfs_open, + .read = seq_read, + .write = kbase_mem_pool_debugfs_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int kbase_mem_pool_debugfs_max_size_show(struct seq_file *sfile, + void *data) +{ + CSTD_UNUSED(data); + return kbase_debugfs_helper_seq_read(sfile, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); +} + +static ssize_t kbase_mem_pool_debugfs_max_size_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + int err; + + CSTD_UNUSED(ppos); + err = kbase_debugfs_helper_seq_write(file, ubuf, count, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); + return err ? err : count; +} + +static int kbase_mem_pool_debugfs_max_size_open(struct inode *in, + struct file *file) +{ + return single_open(file, kbase_mem_pool_debugfs_max_size_show, + in->i_private); } -DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops, - kbase_mem_pool_debugfs_max_size_get, - kbase_mem_pool_debugfs_max_size_set, - "%llu\n"); +static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = { + .owner = THIS_MODULE, + .open = kbase_mem_pool_debugfs_max_size_open, + .read = seq_read, + .write = kbase_mem_pool_debugfs_max_size_write, + .llseek = seq_lseek, + .release = single_release, +}; void kbase_mem_pool_debugfs_init(struct dentry *parent, - struct kbase_mem_pool *pool, - struct kbase_mem_pool *lp_pool) + struct kbase_context *kctx) { debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent, - pool, &kbase_mem_pool_debugfs_size_fops); + &kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops); debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent, - pool, &kbase_mem_pool_debugfs_max_size_fops); + &kctx->mem_pools.small, &kbase_mem_pool_debugfs_max_size_fops); debugfs_create_file("lp_mem_pool_size", S_IRUGO | S_IWUSR, parent, - lp_pool, &kbase_mem_pool_debugfs_size_fops); + &kctx->mem_pools.large, &kbase_mem_pool_debugfs_fops); debugfs_create_file("lp_mem_pool_max_size", S_IRUGO | S_IWUSR, parent, - lp_pool, &kbase_mem_pool_debugfs_max_size_fops); + &kctx->mem_pools.large, &kbase_mem_pool_debugfs_max_size_fops); } #endif /* CONFIG_DEBUG_FS */ diff --git a/mali_kbase/mali_kbase_mem_pool_debugfs.h b/mali_kbase/mali_kbase_mem_pool_debugfs.h index 990d91c..2932945 100644 --- a/mali_kbase/mali_kbase_mem_pool_debugfs.h +++ b/mali_kbase/mali_kbase_mem_pool_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,26 +20,104 @@ * */ -#ifndef _KBASE_MEM_POOL_DEBUGFS_H -#define _KBASE_MEM_POOL_DEBUGFS_H +#ifndef _KBASE_MEM_POOL_DEBUGFS_H_ +#define _KBASE_MEM_POOL_DEBUGFS_H_ #include <mali_kbase.h> /** * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool * @parent: Parent debugfs dentry - * @pool: Memory pool of small pages to control - * @lp_pool: Memory pool of large pages to control + * @kctx: The kbase context * * Adds four debugfs files under @parent: - * - mem_pool_size: get/set the current size of @pool - * - mem_pool_max_size: get/set the max size of @pool - * - lp_mem_pool_size: get/set the current size of @lp_pool - * - lp_mem_pool_max_size: get/set the max size of @lp_pool + * - mem_pool_size: get/set the current sizes of @kctx: mem_pools + * - mem_pool_max_size: get/set the max sizes of @kctx: mem_pools + * - lp_mem_pool_size: get/set the current sizes of @kctx: lp_mem_pool + * - lp_mem_pool_max_size: get/set the max sizes of @kctx:lp_mem_pool */ void kbase_mem_pool_debugfs_init(struct dentry *parent, - struct kbase_mem_pool *pool, - struct kbase_mem_pool *lp_pool); + struct kbase_context *kctx); -#endif /*_KBASE_MEM_POOL_DEBUGFS_H*/ +/** + * kbase_mem_pool_debugfs_trim - Grow or shrink a memory pool to a new size + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @value: New number of pages in the pool. + * + * If @value > current size, fill the pool with new pages from the kernel, but + * not above the max_size for the pool. + * If @value < current size, shrink the pool by freeing pages to the kernel. + */ +void kbase_mem_pool_debugfs_trim(void *array, size_t index, size_t value); + +/** + * kbase_mem_pool_debugfs_set_max_size - Set maximum number of free pages in + * memory pool + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @value: Maximum number of free pages the pool can hold. + * + * If the maximum size is reduced, the pool will be shrunk to adhere to the + * new limit. For details see kbase_mem_pool_shrink(). + */ +void kbase_mem_pool_debugfs_set_max_size(void *array, size_t index, + size_t value); + +/** + * kbase_mem_pool_debugfs_size - Get number of free pages in a memory pool + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Note: the size of the pool may in certain corner cases exceed @max_size! + * + * Return: Number of free pages in the pool + */ +size_t kbase_mem_pool_debugfs_size(void *array, size_t index); + +/** + * kbase_mem_pool_debugfs_max_size - Get maximum number of free pages in a + * memory pool + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Return: Maximum number of free pages in the pool + */ +size_t kbase_mem_pool_debugfs_max_size(void *array, size_t index); + +/** + * kbase_mem_pool_config_debugfs_set_max_size - Set maximum number of free pages + * in initial configuration of pool + * + * @array: Array of initial configurations for a set of physical memory pools. + * @index: A memory group ID to be used as an index into the array. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @value : Maximum number of free pages that a memory pool created from the + * selected configuration can hold. + */ +void kbase_mem_pool_config_debugfs_set_max_size(void *array, size_t index, + size_t value); + +/** + * kbase_mem_pool_config_debugfs_max_size - Get maximum number of free pages + * from initial configuration of pool + * + * @array: Array of initial configurations for a set of physical memory pools. + * @index: A memory group ID to be used as an index into the array. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Return: Maximum number of free pages that a memory pool created from the + * selected configuration can hold. + */ +size_t kbase_mem_pool_config_debugfs_max_size(void *array, size_t index); + +#endif /*_KBASE_MEM_POOL_DEBUGFS_H_ */ diff --git a/mali_kbase/mali_kbase_mem_pool_group.c b/mali_kbase/mali_kbase_mem_pool_group.c new file mode 100644 index 0000000..aa25548 --- /dev/null +++ b/mali_kbase/mali_kbase_mem_pool_group.c @@ -0,0 +1,115 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include <mali_kbase.h> +#include <mali_kbase_mem.h> +#include <mali_kbase_mem_pool_group.h> + +#include <linux/memory_group_manager.h> + +void kbase_mem_pool_group_config_set_max_size( + struct kbase_mem_pool_group_config *const configs, + size_t const max_size) +{ + size_t const large_max_size = max_size >> + (KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER - + KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER); + int gid; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + kbase_mem_pool_config_set_max_size(&configs->small[gid], + max_size); + + kbase_mem_pool_config_set_max_size(&configs->large[gid], + large_max_size); + } +} + +int kbase_mem_pool_group_init( + struct kbase_mem_pool_group *const mem_pools, + struct kbase_device *const kbdev, + const struct kbase_mem_pool_group_config *const configs, + struct kbase_mem_pool_group *next_pools) +{ + int gid, err = 0; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + err = kbase_mem_pool_init(&mem_pools->small[gid], + &configs->small[gid], + KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, + gid, + kbdev, + next_pools ? &next_pools->small[gid] : NULL); + + if (!err) { + err = kbase_mem_pool_init(&mem_pools->large[gid], + &configs->large[gid], + KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, + gid, + kbdev, + next_pools ? &next_pools->large[gid] : NULL); + if (err) + kbase_mem_pool_term(&mem_pools->small[gid]); + } + + /* Break out of the loop early to avoid incrementing the count + * of memory pool pairs successfully initialized. + */ + if (err) + break; + } + + if (err) { + /* gid gives the number of memory pool pairs successfully + * initialized, which is one greater than the array index of the + * last group. + */ + while (gid-- > 0) { + kbase_mem_pool_term(&mem_pools->small[gid]); + kbase_mem_pool_term(&mem_pools->large[gid]); + } + } + + return err; +} + +void kbase_mem_pool_group_mark_dying( + struct kbase_mem_pool_group *const mem_pools) +{ + int gid; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + kbase_mem_pool_mark_dying(&mem_pools->small[gid]); + kbase_mem_pool_mark_dying(&mem_pools->large[gid]); + } +} + +void kbase_mem_pool_group_term( + struct kbase_mem_pool_group *const mem_pools) +{ + int gid; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + kbase_mem_pool_term(&mem_pools->small[gid]); + kbase_mem_pool_term(&mem_pools->large[gid]); + } +} diff --git a/mali_kbase/mali_kbase_mem_pool_group.h b/mali_kbase/mali_kbase_mem_pool_group.h new file mode 100644 index 0000000..0484f59 --- /dev/null +++ b/mali_kbase/mali_kbase_mem_pool_group.h @@ -0,0 +1,92 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_MEM_POOL_GROUP_H_ +#define _KBASE_MEM_POOL_GROUP_H_ + +#include <mali_kbase_defs.h> + +/** + * kbase_mem_pool_group_config_init - Set the initial configuration for a + * set of memory pools + * + * This function sets the initial configuration for every memory pool so that + * the maximum amount of free memory that each pool can hold is identical. + * The equivalent number of 2 MiB pages is calculated automatically for the + * purpose of configuring the large page pools. + * + * @configs: Initial configuration for the set of memory pools + * @max_size: Maximum number of free 4 KiB pages each pool can hold + */ +void kbase_mem_pool_group_config_set_max_size( + struct kbase_mem_pool_group_config *configs, size_t max_size); + +/** + * kbase_mem_pool_group_init - Initialize a set of memory pools + * + * Initializes a complete set of physical memory pools. Memory pools are used to + * allow efficient reallocation of previously-freed physical pages. A pair of + * memory pools is initialized for each physical memory group: one for 4 KiB + * pages and one for 2 MiB pages. + * + * If @next_pools is not NULL then a request to allocate memory from an + * empty pool in @mem_pools will attempt to allocate from the equivalent pool + * in @next_pools before going to the memory group manager. Similarly + * pages can spill over to the equivalent pool in @next_pools when a pool + * is full in @mem_pools. Pages are zeroed before they spill over to another + * pool, to prevent leaking information between applications. + * + * @mem_pools: Set of memory pools to initialize + * @kbdev: Kbase device where memory is used + * @configs: Initial configuration for the set of memory pools + * @next_pools: Set of memory pools from which to allocate memory if there + * is no free memory in one of the @mem_pools + * + * Return: 0 on success, otherwise a negative error code + */ +int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools, + struct kbase_device *kbdev, + const struct kbase_mem_pool_group_config *configs, + struct kbase_mem_pool_group *next_pools); + +/** + * kbase_mem_pool_group_term - Mark a set of memory pools as dying + * + * Marks a complete set of physical memory pools previously initialized by + * @kbase_mem_pool_group_init as dying. This will cause any ongoing allocation + * operations (eg growing on page fault) to be terminated. + * + * @mem_pools: Set of memory pools to mark + */ +void kbase_mem_pool_group_mark_dying(struct kbase_mem_pool_group *mem_pools); + +/** + * kbase_mem_pool_group_term - Terminate a set of memory pools + * + * Terminates a complete set of physical memory pools previously initialized by + * @kbase_mem_pool_group_init. + * + * @mem_pools: Set of memory pools to terminate + */ +void kbase_mem_pool_group_term(struct kbase_mem_pool_group *mem_pools); + +#endif /* _KBASE_MEM_POOL_GROUP_H_ */ diff --git a/mali_kbase/mali_kbase_mmu.c b/mali_kbase/mali_kbase_mmu.c index 84341ca..8192bc8 100644 --- a/mali_kbase/mali_kbase_mmu.c +++ b/mali_kbase/mali_kbase_mmu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,9 +32,6 @@ #include <linux/dma-mapping.h> #include <mali_kbase.h> #include <mali_midg_regmap.h> -#if defined(CONFIG_MALI_GATOR_SUPPORT) -#include <mali_kbase_gator.h> -#endif #include <mali_kbase_tlstream.h> #include <mali_kbase_instr_defs.h> #include <mali_kbase_debug.h> @@ -230,7 +227,7 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, /* Find region and check if it should be writable. */ region = kbase_region_tracker_find_region_enclosing_address(kctx, fault->addr); - if (!region || region->flags & KBASE_REG_FREE) { + if (kbase_is_region_invalid_or_free(region)) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Memory is not mapped on the GPU", @@ -358,13 +355,20 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, bool alloc_failed = false; size_t pages_still_required; + if (WARN_ON(region->gpu_alloc->group_id >= + MEMORY_GROUP_MANAGER_NR_GROUPS)) { + /* Do not try to grow the memory pool */ + *pages_to_grow = 0; + return false; + } + #ifdef CONFIG_MALI_2MB_ALLOC if (new_pages >= (SZ_2M / SZ_4K)) { - root_pool = &kctx->lp_mem_pool; + root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id]; *grow_2mb_pool = true; } else { #endif - root_pool = &kctx->mem_pool; + root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id]; *grow_2mb_pool = false; #ifdef CONFIG_MALI_2MB_ALLOC } @@ -637,7 +641,7 @@ page_fault_retry: region = kbase_region_tracker_find_region_enclosing_address(kctx, fault->addr); - if (!region || region->flags & KBASE_REG_FREE) { + if (kbase_is_region_invalid_or_free(region)) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Memory is not mapped on the GPU", fault); @@ -651,6 +655,13 @@ page_fault_retry: goto fault_done; } + if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Bad physical memory group ID", fault); + goto fault_done; + } + if ((region->flags & GROWABLE_FLAGS_REQUIRED) != GROWABLE_FLAGS_REQUIRED) { kbase_gpu_vm_unlock(kctx); @@ -763,10 +774,7 @@ page_fault_retry: "Page table update failure", fault); goto fault_done; } -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_page_fault_insert_pages(as_no, new_pages); -#endif - KBASE_TLSTREAM_AUX_PAGEFAULT(kctx->id, (u64)new_pages); + KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, (u64)new_pages); /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); @@ -831,15 +839,24 @@ page_fault_retry: #ifdef CONFIG_MALI_2MB_ALLOC if (grow_2mb_pool) { /* Round page requirement up to nearest 2 MB */ + struct kbase_mem_pool *const lp_mem_pool = + &kctx->mem_pools.large[ + region->gpu_alloc->group_id]; + pages_to_grow = (pages_to_grow + - ((1 << kctx->lp_mem_pool.order) - 1)) - >> kctx->lp_mem_pool.order; - ret = kbase_mem_pool_grow(&kctx->lp_mem_pool, - pages_to_grow); + ((1 << lp_mem_pool->order) - 1)) + >> lp_mem_pool->order; + + ret = kbase_mem_pool_grow(lp_mem_pool, + pages_to_grow); } else { #endif - ret = kbase_mem_pool_grow(&kctx->mem_pool, - pages_to_grow); + struct kbase_mem_pool *const mem_pool = + &kctx->mem_pools.small[ + region->gpu_alloc->group_id]; + + ret = kbase_mem_pool_grow(mem_pool, + pages_to_grow); #ifdef CONFIG_MALI_2MB_ALLOC } #endif @@ -873,7 +890,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, int i; struct page *p; - p = kbase_mem_pool_alloc(&kbdev->mem_pool); + p = kbase_mem_pool_alloc( + &kbdev->mem_pools.small[BASE_MEM_GROUP_DEFAULT]); if (!p) return 0; @@ -888,15 +906,16 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, if (mmut->kctx) { int new_page_count; - new_page_count = kbase_atomic_add_pages(1, - &mmut->kctx->used_pages); + new_page_count = atomic_add_return(1, + &mmut->kctx->used_pages); KBASE_TLSTREAM_AUX_PAGESALLOC( - mmut->kctx->id, - (u64)new_page_count); + kbdev, + mmut->kctx->id, + (u64)new_page_count); kbase_process_page_usage_inc(mmut->kctx, 1); } - kbase_atomic_add_pages(1, &kbdev->memdev.used_pages); + atomic_add(1, &kbdev->memdev.used_pages); for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) kbdev->mmu_mode->entry_invalidate(&page[i]); @@ -907,7 +926,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, return page_to_phys(p); alloc_free: - kbase_mem_pool_free(&kbdev->mem_pool, p, false); + kbase_mem_pool_free(&kbdev->mem_pools.small[BASE_MEM_GROUP_DEFAULT], p, + false); return 0; } @@ -1129,8 +1149,10 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, * the page walk to succeed */ mutex_unlock(&kctx->mmu.mmu_lock); - err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool, - MIDGARD_MMU_BOTTOMLEVEL); + err = kbase_mem_pool_grow( + &kctx->kbdev->mem_pools.small[ + BASE_MEM_GROUP_DEFAULT], + MIDGARD_MMU_BOTTOMLEVEL); mutex_lock(&kctx->mmu.mmu_lock); } while (!err); if (err) { @@ -1205,16 +1227,17 @@ static inline void cleanup_empty_pte(struct kbase_device *kbdev, tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte); tmp_p = phys_to_page(tmp_pgd); - kbase_mem_pool_free(&kbdev->mem_pool, tmp_p, false); + kbase_mem_pool_free(&kbdev->mem_pools.small[BASE_MEM_GROUP_DEFAULT], + tmp_p, false); /* If the MMU tables belong to a context then we accounted the memory * usage to that context, so decrement here. */ if (mmut->kctx) { kbase_process_page_usage_dec(mmut->kctx, 1); - kbase_atomic_sub_pages(1, &mmut->kctx->used_pages); + atomic_sub(1, &mmut->kctx->used_pages); } - kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages); + atomic_sub(1, &kbdev->memdev.used_pages); } int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, @@ -1273,8 +1296,9 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, * the page walk to succeed */ mutex_unlock(&mmut->mmu_lock); - err = kbase_mem_pool_grow(&kbdev->mem_pool, - cur_level); + err = kbase_mem_pool_grow( + &kbdev->mem_pools.small[BASE_MEM_GROUP_DEFAULT], + cur_level); mutex_lock(&mmut->mmu_lock); } while (!err); @@ -1756,8 +1780,10 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, * the page walk to succeed */ mutex_unlock(&kctx->mmu.mmu_lock); - err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool, - MIDGARD_MMU_BOTTOMLEVEL); + err = kbase_mem_pool_grow( + &kctx->kbdev->mem_pools.small[ + BASE_MEM_GROUP_DEFAULT], + MIDGARD_MMU_BOTTOMLEVEL); mutex_lock(&kctx->mmu.mmu_lock); } while (!err); if (err) { @@ -1846,15 +1872,18 @@ static void mmu_teardown_level(struct kbase_device *kbdev, } p = pfn_to_page(PFN_DOWN(pgd)); - kbase_mem_pool_free(&kbdev->mem_pool, p, true); - kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages); + + kbase_mem_pool_free(&kbdev->mem_pools.small[BASE_MEM_GROUP_DEFAULT], + p, true); + + atomic_sub(1, &kbdev->memdev.used_pages); /* If MMU tables belong to a context then pages will have been accounted * against it, so we must decrement the usage counts here. */ if (mmut->kctx) { kbase_process_page_usage_dec(mmut->kctx, 1); - kbase_atomic_sub_pages(1, &mmut->kctx->used_pages); + atomic_sub(1, &mmut->kctx->used_pages); } } @@ -1878,8 +1907,9 @@ int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, while (!mmut->pgd) { int err; - err = kbase_mem_pool_grow(&kbdev->mem_pool, - MIDGARD_MMU_BOTTOMLEVEL); + err = kbase_mem_pool_grow( + &kbdev->mem_pools.small[BASE_MEM_GROUP_DEFAULT], + MIDGARD_MMU_BOTTOMLEVEL); if (err) { kbase_mmu_term(kbdev, mmut); return -ENOMEM; @@ -1902,7 +1932,7 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) mutex_unlock(&mmut->mmu_lock); if (mmut->kctx) - KBASE_TLSTREAM_AUX_PAGESALLOC(mmut->kctx->id, 0); + KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0); } kfree(mmut->mmu_teardown_pages); diff --git a/mali_kbase/mali_kbase_native_mgm.c b/mali_kbase/mali_kbase_native_mgm.c new file mode 100644 index 0000000..8c4a7fd --- /dev/null +++ b/mali_kbase/mali_kbase_native_mgm.c @@ -0,0 +1,85 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include <linux/gfp.h> +#include <linux/memory_group_manager.h> + +#include <mali_kbase.h> +#include <mali_kbase_native_mgm.h> + +/** + * kbase_native_mgm_alloc - Native physical memory allocation method + * + * Delegates all memory allocation requests to the kernel's alloc_pages + * function. + * + * @mgm_dev: The memory group manager the request is being made through. + * @group_id: A physical memory group ID, which must be valid but is not used. + * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @gfp_mask: Bitmask of Get Free Page flags affecting allocator behavior. + * @order: Page order for physical page size (order=0 means 4 KiB, + * order=9 means 2 MiB). + * + * Return: Pointer to allocated page, or NULL if allocation failed. + */ +static struct page *kbase_native_mgm_alloc( + struct memory_group_manager_device *mgm_dev, int group_id, + gfp_t gfp_mask, unsigned int order) +{ + CSTD_UNUSED(mgm_dev); + WARN_ON(group_id < 0); + WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS); + + return alloc_pages(gfp_mask, order); +} + +/** + * kbase_native_mgm_free - Native physical memory freeing method + * + * Delegates all memory freeing requests to the kernel's __free_pages function. + * + * @mgm_dev: The memory group manager the request is being made through. + * @group_id: A physical memory group ID, which must be valid but is not used. + * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @page: Address of the struct associated with a page of physical + * memory that was allocated by calling the alloc method of + * the same memory pool with the same argument values. + * @order: Page order for physical page size (order=0 means 4 KiB, + * order=9 means 2 MiB). + */ +static void kbase_native_mgm_free(struct memory_group_manager_device *mgm_dev, + int group_id, struct page *page, unsigned int order) +{ + CSTD_UNUSED(mgm_dev); + WARN_ON(group_id < 0); + WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS); + + __free_pages(page, order); +} + +struct memory_group_manager_device kbase_native_mgm_dev = { + .ops = { + .mgm_alloc_page = kbase_native_mgm_alloc, + .mgm_free_page = kbase_native_mgm_free + }, + .data = NULL +}; diff --git a/mali_kbase/mali_kbase_native_mgm.h b/mali_kbase/mali_kbase_native_mgm.h new file mode 100644 index 0000000..431b1f4 --- /dev/null +++ b/mali_kbase/mali_kbase_native_mgm.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_NATIVE_MGM_H_ +#define _KBASE_NATIVE_MGM_H_ + +#include <linux/memory_group_manager.h> + +/** + * kbase_native_mgm_dev - Native memory group manager device + * + * An implementation of the memory group manager interface that is intended for + * internal use when no platform-specific memory group manager is available. + * + * It ignores the specified group ID and delegates to the kernel's physical + * memory allocation and freeing functions. + */ +extern struct memory_group_manager_device kbase_native_mgm_dev; + +#endif /* _KBASE_NATIVE_MGM_H_ */ diff --git a/mali_kbase/mali_kbase_smc.c b/mali_kbase/mali_kbase_smc.c index 2176479..3470f58 100644 --- a/mali_kbase/mali_kbase_smc.c +++ b/mali_kbase/mali_kbase_smc.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,6 +27,18 @@ #include <linux/compiler.h> +/* __asmeq is not available on Kernel versions >= 4.20 */ +#ifndef __asmeq +/* + * This is used to ensure the compiler did actually allocate the register we + * asked it for some inline assembly sequences. Apparently we can't trust the + * compiler from one version to another so a bit of paranoia won't hurt. This + * string is meant to be concatenated with the inline asm string and will + * cause compilation to stop on mismatch. (for details, see gcc PR 15089) + */ +#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" +#endif + static noinline u64 invoke_smc_fid(u64 function_id, u64 arg0, u64 arg1, u64 arg2) { diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c index e762af4..f7969be 100644 --- a/mali_kbase/mali_kbase_softjobs.c +++ b/mali_kbase/mali_kbase_softjobs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -621,10 +621,20 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) 1, /* Write */ buffers[i].pages); if (pinned_pages < 0) { + /* get_user_pages_fast has failed - page array is not + * valid. Don't try to release any pages. + */ + buffers[i].nr_pages = 0; + ret = pinned_pages; goto out_cleanup; } if (pinned_pages != nr_pages) { + /* Adjust number of pages, so that we only attempt to + * release pages in the array that we know are valid. + */ + buffers[i].nr_pages = pinned_pages; + ret = -EINVAL; goto out_cleanup; } @@ -640,8 +650,8 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) katom->kctx, user_extres.ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); - if (NULL == reg || NULL == reg->gpu_alloc || - (reg->flags & KBASE_REG_FREE)) { + if (kbase_is_region_invalid_or_free(reg) || + reg->gpu_alloc == NULL) { ret = -EINVAL; goto out_unlock; } @@ -674,8 +684,18 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) alloc->imported.user_buf.address, nr_pages, 0, buffers[i].extres_pages); - if (ret != nr_pages) + if (ret != nr_pages) { + /* Adjust number of pages, so that we only + * attempt to release pages in the array that we + * know are valid. + */ + if (ret < 0) + buffers[i].nr_extres_pages = 0; + else + buffers[i].nr_extres_pages = ret; + goto out_unlock; + } ret = 0; break; } @@ -905,6 +925,7 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) __user void *data = (__user void *)(uintptr_t) katom->jc; struct base_jit_alloc_info *info; struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; u32 count; int ret; u32 i; @@ -937,7 +958,7 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) ret = kbasep_jit_alloc_validate(kctx, info); if (ret) goto free_info; - KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(katom, + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(kbdev, katom, info->va_pages, info->commit_pages, info->extent, info->id, info->bin_id, info->max_allocations, info->flags, info->usage_id); @@ -999,6 +1020,7 @@ static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom) static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; struct base_jit_alloc_info *info; struct kbase_va_region *reg; struct kbase_vmap_struct mapping; @@ -1085,6 +1107,8 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) } katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + dev_warn_ratelimited(kbdev->dev, "JIT alloc softjob failed: atom id %d\n", + kbase_jd_atom_id(kctx, katom)); return 0; } @@ -1111,6 +1135,7 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) } for (i = 0, info = katom->softjob_data; i < count; i++, info++) { + u64 entry_mmu_flags = 0; /* * Write the address of the JIT allocation to the user provided * GPU allocation. @@ -1129,9 +1154,22 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) reg = kctx->jit_alloc[info->id]; new_addr = reg->start_pfn << PAGE_SHIFT; *ptr = new_addr; - KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(katom, + +#if defined(CONFIG_MALI_VECTOR_DUMP) + /* + * Retrieve the mmu flags for JIT allocation + * only if dumping is enabled + */ + kctx->kbdev->mmu_mode->entry_set_ate(&entry_mmu_flags, + (struct tagged_addr){ 0 }, + reg->flags, + MIDGARD_MMU_BOTTOMLEVEL); +#endif + + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(kbdev, katom, info->gpu_alloc_addr, - new_addr, info->va_pages); + new_addr, info->va_pages, + entry_mmu_flags); kbase_vunmap(kctx, &mapping); } @@ -1165,6 +1203,7 @@ static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; __user void *data = (__user void *)(uintptr_t) katom->jc; u8 *ids; u32 count = MAX(katom->nr_extres, 1); @@ -1204,7 +1243,7 @@ static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) *ids = (u8)katom->jc; } for (i = 0; i < count; i++) - KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(katom, ids[i]); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(kbdev, katom, ids[i]); list_add_tail(&katom->jit_node, &kctx->jit_atoms_head); @@ -1432,8 +1471,10 @@ static void kbase_ext_res_finish(struct kbase_jd_atom *katom) int kbase_process_soft_job(struct kbase_jd_atom *katom) { int ret = 0; + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(katom); + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom); switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: @@ -1496,7 +1537,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) } /* Atom is complete */ - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom); + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom); return ret; } diff --git a/mali_kbase/mali_kbase_tlstream.c b/mali_kbase/mali_kbase_tlstream.c index 10e3889..c663896 100644 --- a/mali_kbase/mali_kbase_tlstream.c +++ b/mali_kbase/mali_kbase_tlstream.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -53,7 +53,11 @@ #define PACKET_SIZE 4096 /* bytes */ /* The number of packets used by one timeline stream. */ -#define PACKET_COUNT 16 +#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP) + #define PACKET_COUNT 64 +#else + #define PACKET_COUNT 32 +#endif /* The number of bytes reserved for packet header. * These value must be defined according to MIPE documentation. */ @@ -158,7 +162,7 @@ enum tl_msg_id_obj { KBASE_TL_EVENT_ATOM_SOFTJOB_END, /* Job dump specific events. */ - KBASE_JD_GPU_SOFT_RESET + KBASE_JD_GPU_SOFT_RESET, }; /* Message ids of trace events that are recorded in the auxiliary stream. */ @@ -172,32 +176,38 @@ enum tl_msg_id_aux { KBASE_AUX_PROTECTED_LEAVE_START, KBASE_AUX_PROTECTED_LEAVE_END, KBASE_AUX_JIT_STATS, + KBASE_AUX_EVENT_JOB_SLOT, }; /*****************************************************************************/ /** * struct tl_stream - timeline stream structure - * @lock: message order lock - * @buffer: array of buffers - * @wbi: write buffer index - * @rbi: read buffer index - * @numbered: if non-zero stream's packets are sequentially numbered - * @autoflush_counter: counter tracking stream's autoflush state + * @lock: Message order lock + * @buffer: Array of buffers + * @wbi: Write buffer index + * @rbi: Read buffer index + * @numbered: If non-zero stream's packets are sequentially numbered + * @autoflush_counter: Counter tracking stream's autoflush state * * This structure holds information needed to construct proper packets in the - * timeline stream. Each message in sequence must bear timestamp that is greater - * to one in previous message in the same stream. For this reason lock is held - * throughout the process of message creation. Each stream contains set of - * buffers. Each buffer will hold one MIPE packet. In case there is no free - * space required to store incoming message the oldest buffer is discarded. - * Each packet in timeline body stream has sequence number embedded (this value - * must increment monotonically and is used by packets receiver to discover + * timeline stream. + * + * Each message in the sequence must bear a timestamp that is + * greater than the previous message in the same stream. For this reason + * a lock is held throughout the process of message creation. + * + * Each stream contains a set of buffers. Each buffer will hold one MIPE + * packet. In case there is no free space required to store the incoming + * message the oldest buffer is discarded. Each packet in timeline body + * stream has a sequence number embedded, this value must increment + * monotonically and is used by the packets receiver to discover these * buffer overflows. - * Autoflush counter is set to negative number when there is no data pending - * for flush and it is set to zero on every update of the buffer. Autoflush - * timer will increment the counter by one on every expiry. In case there will - * be no activity on the buffer during two consecutive timer expiries, stream + * + * The autoflush counter is set to a negative number when there is no data + * pending for flush and it is set to zero on every update of the buffer. The + * autoflush timer will increment the counter by one on every expiry. If there + * is no activity on the buffer for two consecutive timer expiries, the stream * buffer will be flushed. */ struct tl_stream { @@ -217,11 +227,11 @@ struct tl_stream { /** * struct tp_desc - tracepoint message descriptor structure - * @id: tracepoint ID identifying message in stream - * @id_str: human readable version of tracepoint ID - * @name: tracepoint description - * @arg_types: tracepoint's arguments types declaration - * @arg_names: comma separated list of tracepoint's arguments names + * @id: Tracepoint ID identifying message in stream + * @id_str: Human readable version of tracepoint ID + * @name: Tracepoint description + * @arg_types: Tracepoint's arguments types declaration + * @arg_names: Comma separated list of tracepoint's arguments names */ struct tp_desc { u32 id; @@ -250,21 +260,30 @@ static const struct { {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY, 1} }; -/* The timeline streams generated by kernel. */ -static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT]; - -/* Autoflush timer. */ -static struct timer_list autoflush_timer; - -/* If non-zero autoflush timer is active. */ -static atomic_t autoflush_timer_active; - -/* Reader lock. Only one reader is allowed to have access to the timeline - * streams at any given time. */ -static DEFINE_MUTEX(tl_reader_lock); +/** + * struct kbase_timeline - timeline state structure + * @streams: The timeline streams generated by kernel + * @autoflush_timer: Autoflush timer + * @autoflush_timer_active: If non-zero autoflush timer is active + * @reader_lock: Reader lock. Only one reader is allowed to + * have access to the timeline streams at any given time. + * @event_queue: Timeline stream event queue + * @bytes_collected: Number of bytes read by user + * @bytes_generated: Number of bytes generated by tracepoint messages + */ +struct kbase_timeline { + struct tl_stream streams[TL_STREAM_TYPE_COUNT]; + struct timer_list autoflush_timer; + atomic_t autoflush_timer_active; + struct mutex reader_lock; + wait_queue_head_t event_queue; +#if MALI_UNIT_TEST + atomic_t bytes_collected; + atomic_t bytes_generated; +#endif /* MALI_UNIT_TEST */ + atomic_t *is_enabled; +}; -/* Timeline stream event queue. */ -static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue); /* The timeline stream file operations functions. */ static ssize_t kbasep_tlstream_read( @@ -449,8 +468,8 @@ static const struct tp_desc tp_desc_obj[] = { KBASE_TL_ATTRIB_ATOM_JIT, __stringify(KBASE_TL_ATTRIB_ATOM_JIT), "jit done for atom", - "@pLLL", - "atom,edit_addr,new_addr,va_pages" + "@pLLLL", + "atom,edit_addr,new_addr,va_pages,jit_flags" }, { KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, @@ -530,8 +549,8 @@ static const struct tp_desc tp_desc_aux[] = { KBASE_AUX_PAGEFAULT, __stringify(KBASE_AUX_PAGEFAULT), "Page fault", - "@IL", - "ctx_nr,page_cnt_change" + "@IIL", + "ctx_nr,as_nr,page_cnt_change" }, { KBASE_AUX_PAGESALLOC, @@ -581,22 +600,16 @@ static const struct tp_desc tp_desc_aux[] = { "per-bin JIT statistics", "@IIIIII", "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages" + }, + { + KBASE_AUX_EVENT_JOB_SLOT, + __stringify(KBASE_AUX_EVENT_JOB_SLOT), + "event on a given job slot", + "@pIII", + "ctx,slot_nr,atom_nr,event" } }; -#if MALI_UNIT_TEST -/* Number of bytes read by user. */ -static atomic_t tlstream_bytes_collected = {0}; - -/* Number of bytes generated by tracepoint messages. */ -static atomic_t tlstream_bytes_generated = {0}; -#endif /* MALI_UNIT_TEST */ - -/*****************************************************************************/ - -/* Indicator of whether the timeline stream file descriptor is used. */ -atomic_t kbase_tlstream_enabled = {0}; - /*****************************************************************************/ /** @@ -868,7 +881,7 @@ static void kbasep_timeline_stream_init( tl_stream_cfg[stream_type].stream_id, stream->numbered); - kbasep_timeline_stream_reset(tl_stream[stream_type]); + kbasep_timeline_stream_reset(stream); } /** @@ -882,9 +895,10 @@ static void kbasep_timeline_stream_term(struct tl_stream *stream) /** * kbasep_tlstream_msgbuf_submit - submit packet to the user space - * @stream: pointer to the stream structure - * @wb_idx_raw: write buffer index - * @wb_size: length of data stored in current buffer + * @timeline: Timeline instance + * @stream: Pointer to the stream structure + * @wb_idx_raw: Write buffer index + * @wb_size: Length of data stored in current buffer * * Function updates currently written buffer with packet header. Then write * index is incremented and buffer is handled to user space. Parameters @@ -895,6 +909,7 @@ static void kbasep_timeline_stream_term(struct tl_stream *stream) * Warning: User must update the stream structure with returned value. */ static size_t kbasep_tlstream_msgbuf_submit( + struct kbase_timeline *timeline, struct tl_stream *stream, unsigned int wb_idx_raw, unsigned int wb_size) @@ -920,7 +935,7 @@ static size_t kbasep_tlstream_msgbuf_submit( atomic_inc(&stream->wbi); /* Inform user that packets are ready for reading. */ - wake_up_interruptible(&tl_event_queue); + wake_up_interruptible(&timeline->event_queue); wb_size = PACKET_HEADER_SIZE; if (stream->numbered) @@ -931,9 +946,10 @@ static size_t kbasep_tlstream_msgbuf_submit( /** * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer - * @stream_type: type of the stream that shall be locked - * @msg_size: message size - * @flags: pointer to store flags passed back on stream release + * @timeline: Timeline instance + * @stream_type: Type of the stream that shall be locked + * @msg_size: Message size + * @flags: Pointer to store flags passed back on stream release * * Function will lock the stream and reserve the number of bytes requested * in msg_size for the user. @@ -945,6 +961,7 @@ static size_t kbasep_tlstream_msgbuf_submit( * (i.e. do not use any operation that may sleep). */ static char *kbasep_tlstream_msgbuf_acquire( + struct kbase_timeline *timeline, enum tl_stream_type stream_type, size_t msg_size, unsigned long *flags) __acquires(&stream->lock) @@ -959,7 +976,7 @@ static char *kbasep_tlstream_msgbuf_acquire( PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >= msg_size); - stream = tl_stream[stream_type]; + stream = &timeline->streams[stream_type]; spin_lock_irqsave(&stream->lock, *flags); @@ -969,7 +986,7 @@ static char *kbasep_tlstream_msgbuf_acquire( /* Select next buffer if data will not fit into current one. */ if (PACKET_SIZE < wb_size + msg_size) { - wb_size = kbasep_tlstream_msgbuf_submit( + wb_size = kbasep_tlstream_msgbuf_submit(timeline, stream, wb_idx_raw, wb_size); wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; } @@ -978,7 +995,7 @@ static char *kbasep_tlstream_msgbuf_acquire( atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size); #if MALI_UNIT_TEST - atomic_add(msg_size, &tlstream_bytes_generated); + atomic_add(msg_size, &timeline->bytes_generated); #endif /* MALI_UNIT_TEST */ return &stream->buffer[wb_idx].data[wb_size]; @@ -986,13 +1003,15 @@ static char *kbasep_tlstream_msgbuf_acquire( /** * kbasep_tlstream_msgbuf_release - unlock selected stream - * @stream_type: type of the stream that shall be locked - * @flags: value obtained during stream acquire + * @timeline: Timeline instance + * @stream_type: Type of the stream that shall be locked + * @flags: Value obtained during stream acquire * * Function releases stream that has been previously locked with a call to * kbasep_tlstream_msgbuf_acquire(). */ static void kbasep_tlstream_msgbuf_release( + struct kbase_timeline *timeline, enum tl_stream_type stream_type, unsigned long flags) __releases(&stream->lock) { @@ -1000,7 +1019,7 @@ static void kbasep_tlstream_msgbuf_release( KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); - stream = tl_stream[stream_type]; + stream = &timeline->streams[stream_type]; /* Mark stream as containing unflushed data. */ atomic_set(&stream->autoflush_counter, 0); @@ -1012,13 +1031,16 @@ static void kbasep_tlstream_msgbuf_release( /** * kbasep_tlstream_flush_stream - flush stream - * @stype: type of stream to be flushed + * @timeline: Timeline instance + * @stype: Type of stream to be flushed * * Flush pending data in timeline stream. */ -static void kbasep_tlstream_flush_stream(enum tl_stream_type stype) +static void kbasep_tlstream_flush_stream( + struct kbase_timeline *timeline, + enum tl_stream_type stype) { - struct tl_stream *stream = tl_stream[stype]; + struct tl_stream *stream = &timeline->streams[stype]; unsigned long flags; unsigned int wb_idx_raw; unsigned int wb_idx; @@ -1035,7 +1057,7 @@ static void kbasep_tlstream_flush_stream(enum tl_stream_type stype) wb_size = atomic_read(&stream->buffer[wb_idx].size); if (wb_size > min_size) { - wb_size = kbasep_tlstream_msgbuf_submit( + wb_size = kbasep_tlstream_msgbuf_submit(timeline, stream, wb_idx_raw, wb_size); wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; atomic_set(&stream->buffer[wb_idx].size, wb_size); @@ -1045,7 +1067,7 @@ static void kbasep_tlstream_flush_stream(enum tl_stream_type stype) /** * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback - * @timer: unused + * @timer: Timer list * * Timer is executed periodically to check if any of the stream contains * buffer ready to be submitted to user space. @@ -1054,11 +1076,13 @@ static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *timer) { enum tl_stream_type stype; int rcode; + struct kbase_timeline *timeline = + container_of(timer, struct kbase_timeline, autoflush_timer); CSTD_UNUSED(timer); for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { - struct tl_stream *stream = tl_stream[stype]; + struct tl_stream *stream = &timeline->streams[stype]; unsigned long flags; unsigned int wb_idx_raw; unsigned int wb_idx; @@ -1091,7 +1115,7 @@ static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *timer) wb_size = atomic_read(&stream->buffer[wb_idx].size); if (wb_size > min_size) { - wb_size = kbasep_tlstream_msgbuf_submit( + wb_size = kbasep_tlstream_msgbuf_submit(timeline, stream, wb_idx_raw, wb_size); wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; atomic_set(&stream->buffer[wb_idx].size, @@ -1100,17 +1124,18 @@ static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *timer) spin_unlock_irqrestore(&stream->lock, flags); } - if (atomic_read(&autoflush_timer_active)) + if (atomic_read(&timeline->autoflush_timer_active)) rcode = mod_timer( - &autoflush_timer, + &timeline->autoflush_timer, jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); CSTD_UNUSED(rcode); } /** * kbasep_tlstream_packet_pending - check timeline streams for pending packets - * @stype: pointer to variable where stream type will be placed - * @rb_idx_raw: pointer to variable where read buffer index will be placed + * @timeline: Timeline instance + * @stype: Pointer to variable where stream type will be placed + * @rb_idx_raw: Pointer to variable where read buffer index will be placed * * Function checks all streams for pending packets. It will stop as soon as * packet ready to be submitted to user space is detected. Variables under @@ -1120,8 +1145,9 @@ static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *timer) * Return: non-zero if any of timeline streams has at last one packet ready */ static int kbasep_tlstream_packet_pending( - enum tl_stream_type *stype, - unsigned int *rb_idx_raw) + struct kbase_timeline *timeline, + enum tl_stream_type *stype, + unsigned int *rb_idx_raw) { int pending = 0; @@ -1132,15 +1158,16 @@ static int kbasep_tlstream_packet_pending( *stype = 0; (*stype < TL_STREAM_TYPE_COUNT) && !pending; (*stype)++) { - if (NULL != tl_stream[*stype]) { - *rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi); - /* Read buffer index may be updated by writer in case of - * overflow. Read and write buffer indexes must be - * loaded in correct order. */ - smp_rmb(); - if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw) - pending = 1; - } + struct tl_stream *stream = &timeline->streams[*stype]; + *rb_idx_raw = atomic_read(&stream->rbi); + /* Read buffer index may be updated by writer in case of + * overflow. Read and write buffer indexes must be + * loaded in correct order. + */ + smp_rmb(); + if (atomic_read(&stream->wbi) != *rb_idx_raw) + pending = 1; + } (*stype)--; @@ -1149,10 +1176,10 @@ static int kbasep_tlstream_packet_pending( /** * kbasep_tlstream_read - copy data from streams to buffer provided by user - * @filp: pointer to file structure (unused) - * @buffer: pointer to the buffer provided by user - * @size: maximum amount of data that can be stored in the buffer - * @f_pos: pointer to file offset (unused) + * @filp: Pointer to file structure + * @buffer: Pointer to the buffer provided by user + * @size: Maximum amount of data that can be stored in the buffer + * @f_pos: Pointer to file offset (unused) * * Return: number of bytes stored in the buffer */ @@ -1163,37 +1190,48 @@ static ssize_t kbasep_tlstream_read( loff_t *f_pos) { ssize_t copy_len = 0; + struct kbase_timeline *timeline; KBASE_DEBUG_ASSERT(filp); KBASE_DEBUG_ASSERT(f_pos); + if (WARN_ON(!filp->private_data)) + return -EFAULT; + + timeline = (struct kbase_timeline *) filp->private_data; + if (!buffer) return -EINVAL; if ((0 > *f_pos) || (PACKET_SIZE > size)) return -EINVAL; - mutex_lock(&tl_reader_lock); + mutex_lock(&timeline->reader_lock); while (copy_len < size) { enum tl_stream_type stype; + struct tl_stream *stream; unsigned int rb_idx_raw = 0; unsigned int wb_idx_raw; unsigned int rb_idx; size_t rb_size; - /* If we don't have any data yet, wait for packet to be - * submitted. If we already read some packets and there is no - * packet pending return back to user. */ + /* If we already read some packets and there is no + * packet pending then return back to user. + * If we don't have any data yet, wait for packet to be + * submitted. + */ if (0 < copy_len) { if (!kbasep_tlstream_packet_pending( + timeline, &stype, &rb_idx_raw)) break; } else { if (wait_event_interruptible( - tl_event_queue, + timeline->event_queue, kbasep_tlstream_packet_pending( + timeline, &stype, &rb_idx_raw))) { copy_len = -ERESTARTSYS; @@ -1204,12 +1242,13 @@ static ssize_t kbasep_tlstream_read( /* Check if this packet fits into the user buffer. * If so copy its content. */ rb_idx = rb_idx_raw % PACKET_COUNT; - rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size); + stream = &timeline->streams[stype]; + rb_size = atomic_read(&stream->buffer[rb_idx].size); if (rb_size > size - copy_len) break; if (copy_to_user( &buffer[copy_len], - tl_stream[stype]->buffer[rb_idx].data, + stream->buffer[rb_idx].data, rb_size)) { copy_len = -EFAULT; break; @@ -1221,80 +1260,93 @@ static ssize_t kbasep_tlstream_read( * that we have just sent to user. */ smp_rmb(); - wb_idx_raw = atomic_read(&tl_stream[stype]->wbi); + wb_idx_raw = atomic_read(&stream->wbi); if (wb_idx_raw - rb_idx_raw < PACKET_COUNT) { copy_len += rb_size; - atomic_inc(&tl_stream[stype]->rbi); + atomic_inc(&stream->rbi); #if MALI_UNIT_TEST - atomic_add(rb_size, &tlstream_bytes_collected); + atomic_add(rb_size, &timeline->bytes_collected); #endif /* MALI_UNIT_TEST */ } else { const unsigned int new_rb_idx_raw = wb_idx_raw - PACKET_COUNT + 1; /* Adjust read buffer index to the next valid buffer */ - atomic_set(&tl_stream[stype]->rbi, new_rb_idx_raw); + atomic_set(&stream->rbi, new_rb_idx_raw); } } - mutex_unlock(&tl_reader_lock); + mutex_unlock(&timeline->reader_lock); return copy_len; } /** * kbasep_tlstream_poll - poll timeline stream for packets - * @filp: pointer to file structure - * @wait: pointer to poll table + * @filp: Pointer to file structure + * @wait: Pointer to poll table * Return: POLLIN if data can be read without blocking, otherwise zero */ static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait) { enum tl_stream_type stream_type; unsigned int rb_idx; + struct kbase_timeline *timeline; KBASE_DEBUG_ASSERT(filp); KBASE_DEBUG_ASSERT(wait); - poll_wait(filp, &tl_event_queue, wait); - if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx)) + if (WARN_ON(!filp->private_data)) + return -EFAULT; + + timeline = (struct kbase_timeline *) filp->private_data; + + poll_wait(filp, &timeline->event_queue, wait); + if (kbasep_tlstream_packet_pending(timeline, &stream_type, &rb_idx)) return POLLIN; return 0; } /** * kbasep_tlstream_release - release timeline stream descriptor - * @inode: pointer to inode structure - * @filp: pointer to file structure + * @inode: Pointer to inode structure + * @filp: Pointer to file structure * * Return always return zero */ static int kbasep_tlstream_release(struct inode *inode, struct file *filp) { + struct kbase_timeline *timeline; + KBASE_DEBUG_ASSERT(inode); KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(filp->private_data); + CSTD_UNUSED(inode); - CSTD_UNUSED(filp); + + timeline = (struct kbase_timeline *) filp->private_data; /* Stop autoflush timer before releasing access to streams. */ - atomic_set(&autoflush_timer_active, 0); - del_timer_sync(&autoflush_timer); + atomic_set(&timeline->autoflush_timer_active, 0); + del_timer_sync(&timeline->autoflush_timer); - atomic_set(&kbase_tlstream_enabled, 0); + atomic_set(timeline->is_enabled, 0); return 0; } /** * kbasep_tlstream_timeline_header - prepare timeline header stream packet - * @stream_type: type of the stream that will carry header data - * @tp_desc: pointer to array with tracepoint descriptors - * @tp_count: number of descriptors in the given array + * @timeline: Timeline instance + * @stream_type: Type of the stream that will carry header data + * @tp_desc: Pointer to array with tracepoint descriptors + * @tp_count: Number of descriptors in the given array * * Functions fills in information about tracepoints stored in body stream * associated with this header stream. */ static void kbasep_tlstream_timeline_header( + struct kbase_timeline *timeline, enum tl_stream_type stream_type, const struct tp_desc *tp_desc, u32 tp_count) @@ -1329,7 +1381,7 @@ static void kbasep_tlstream_timeline_header( KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size); - buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags); + buffer = kbasep_tlstream_msgbuf_acquire(timeline, stream_type, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv)); @@ -1357,84 +1409,92 @@ static void kbasep_tlstream_timeline_header( KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(stream_type, flags); + kbasep_tlstream_msgbuf_release(timeline, stream_type, flags); /* We don't expect any more data to be read in this stream. * As header stream must be read before its associated body stream, * make this packet visible to the user straightaway. */ - kbasep_tlstream_flush_stream(stream_type); + kbasep_tlstream_flush_stream(timeline, stream_type); } /*****************************************************************************/ -int kbase_tlstream_init(void) +int kbase_tlstream_init(struct kbase_timeline **timeline, + atomic_t *timeline_is_enabled) { enum tl_stream_type i; + struct kbase_timeline *result; - /* Prepare stream structures. */ - for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { - tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL); - if (!tl_stream[i]) - break; - kbasep_timeline_stream_init(tl_stream[i], i); - } - if (TL_STREAM_TYPE_COUNT > i) { - for (; i > 0; i--) { - kbasep_timeline_stream_term(tl_stream[i - 1]); - kfree(tl_stream[i - 1]); - } + if (!timeline || !timeline_is_enabled) + return -EINVAL; + + result = kzalloc(sizeof(*result), GFP_KERNEL); + if (!result) return -ENOMEM; - } + + mutex_init(&result->reader_lock); + init_waitqueue_head(&result->event_queue); + + /* Prepare stream structures. */ + for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) + kbasep_timeline_stream_init(&result->streams[i], i); /* Initialize autoflush timer. */ - atomic_set(&autoflush_timer_active, 0); - kbase_timer_setup(&autoflush_timer, + atomic_set(&result->autoflush_timer_active, 0); + kbase_timer_setup(&result->autoflush_timer, kbasep_tlstream_autoflush_timer_callback); + result->is_enabled = timeline_is_enabled; + *timeline = result; return 0; } -void kbase_tlstream_term(void) +void kbase_tlstream_term(struct kbase_timeline *timeline) { enum tl_stream_type i; - for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { - kbasep_timeline_stream_term(tl_stream[i]); - kfree(tl_stream[i]); - } + if (!timeline) + return; + + for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) + kbasep_timeline_stream_term(&timeline->streams[i]); + + kfree(timeline); } -static void kbase_create_timeline_objects(struct kbase_context *kctx) +static void kbase_create_timeline_objects(struct kbase_device *kbdev) { - struct kbase_device *kbdev = kctx->kbdev; - unsigned int lpu_id; - unsigned int as_nr; + unsigned int lpu_id; + unsigned int as_nr; struct kbasep_kctx_list_element *element; + struct kbase_timeline *timeline = kbdev->timeline; /* Create LPU objects. */ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { u32 *lpu = &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; - KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, lpu_id, *lpu); + KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(kbdev, lpu, lpu_id, *lpu); } /* Create Address Space objects. */ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) - KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(&kbdev->as[as_nr], as_nr); + KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(kbdev, &kbdev->as[as_nr], as_nr); /* Create GPU object and make it retain all LPUs and address spaces. */ KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU( kbdev, + kbdev, kbdev->gpu_props.props.raw_props.gpu_id, kbdev->gpu_props.num_cores); for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { void *lpu = &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; - KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, kbdev); + KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(kbdev, lpu, kbdev); } for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU( + kbdev, &kbdev->as[as_nr], kbdev); @@ -1442,6 +1502,7 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) mutex_lock(&kbdev->kctx_list_lock); list_for_each_entry(element, &kbdev->kctx_list, link) { KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX( + kbdev, element->kctx, element->kctx->id, (u32)(element->kctx->tgid)); @@ -1450,53 +1511,71 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) * This will prevent context creation message to be directed to both * summary and body stream. */ - kbase_tlstream_reset_body_streams(); + kbase_tlstream_reset_body_streams(timeline); mutex_unlock(&kbdev->kctx_list_lock); /* Static object are placed into summary packet that needs to be * transmitted first. Flush all streams to make it available to * user space. */ - kbase_tlstream_flush_streams(); + kbase_tlstream_flush_streams(timeline); +} + +#ifdef CONFIG_MALI_DEVFREQ +static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) +{ + struct devfreq *devfreq = kbdev->devfreq; + + /* Devfreq initialization failure isn't a fatal error, so devfreq might + * be null. + */ + if (devfreq) { + mutex_lock(&devfreq->lock); + KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, + (u64)devfreq->last_status.current_frequency); + mutex_unlock(&devfreq->lock); + } } +#endif /* CONFIG_MALI_DEVFREQ */ -int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags) +int kbase_tlstream_acquire(struct kbase_device *kbdev, u32 flags) { int ret; u32 tlstream_enabled = TLSTREAM_ENABLED | flags; + struct kbase_timeline *timeline = kbdev->timeline; - if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) { + if (0 == atomic_cmpxchg(timeline->is_enabled, 0, tlstream_enabled)) { int rcode; ret = anon_inode_getfd( "[mali_tlstream]", &kbasep_tlstream_fops, - kctx, + timeline, O_RDONLY | O_CLOEXEC); if (ret < 0) { - atomic_set(&kbase_tlstream_enabled, 0); + atomic_set(timeline->is_enabled, 0); return ret; } /* Reset and initialize header streams. */ kbasep_timeline_stream_reset( - tl_stream[TL_STREAM_TYPE_OBJ_HEADER]); + &timeline->streams[TL_STREAM_TYPE_OBJ_HEADER]); kbasep_timeline_stream_reset( - tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]); + &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]); kbasep_timeline_stream_reset( - tl_stream[TL_STREAM_TYPE_AUX_HEADER]); - kbasep_tlstream_timeline_header( + &timeline->streams[TL_STREAM_TYPE_AUX_HEADER]); + kbasep_tlstream_timeline_header(timeline, TL_STREAM_TYPE_OBJ_HEADER, tp_desc_obj, ARRAY_SIZE(tp_desc_obj)); - kbasep_tlstream_timeline_header( + kbasep_tlstream_timeline_header(timeline, TL_STREAM_TYPE_AUX_HEADER, tp_desc_aux, ARRAY_SIZE(tp_desc_aux)); /* Start autoflush timer. */ - atomic_set(&autoflush_timer_active, 1); + atomic_set(&timeline->autoflush_timer_active, 1); rcode = mod_timer( - &autoflush_timer, + &timeline->autoflush_timer, jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); CSTD_UNUSED(rcode); @@ -1504,9 +1583,9 @@ int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags) * timeout as the default value of 3 seconds is often * insufficient. */ if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { - dev_info(kctx->kbdev->dev, + dev_info(kbdev->dev, "Job dumping is enabled, readjusting the software event's timeout\n"); - atomic_set(&kctx->kbdev->js_data.soft_job_timeout_ms, + atomic_set(&kbdev->js_data.soft_job_timeout_ms, 1800000); } @@ -1514,7 +1593,15 @@ int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags) * Create static timeline objects that will be * read by client. */ - kbase_create_timeline_objects(kctx); + kbase_create_timeline_objects(kbdev); + +#ifdef CONFIG_MALI_DEVFREQ + /* Devfreq target tracepoints are only fired when the target + * changes, so we won't know the current target unless we + * send it now. + */ + kbase_tlstream_current_devfreq_target(kbdev); +#endif /* CONFIG_MALI_DEVFREQ */ } else { ret = -EBUSY; @@ -1523,35 +1610,36 @@ int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags) return ret; } -void kbase_tlstream_flush_streams(void) +void kbase_tlstream_flush_streams(struct kbase_timeline *timeline) { enum tl_stream_type stype; for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) - kbasep_tlstream_flush_stream(stype); + kbasep_tlstream_flush_stream(timeline, stype); } -void kbase_tlstream_reset_body_streams(void) +void kbase_tlstream_reset_body_streams(struct kbase_timeline *timeline) { kbasep_timeline_stream_reset( - tl_stream[TL_STREAM_TYPE_OBJ]); + &timeline->streams[TL_STREAM_TYPE_OBJ]); kbasep_timeline_stream_reset( - tl_stream[TL_STREAM_TYPE_AUX]); + &timeline->streams[TL_STREAM_TYPE_AUX]); } #if MALI_UNIT_TEST -void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated) +void kbase_tlstream_stats(struct kbase_timeline *timeline, + u32 *bytes_collected, u32 *bytes_generated) { KBASE_DEBUG_ASSERT(bytes_collected); KBASE_DEBUG_ASSERT(bytes_generated); - *bytes_collected = atomic_read(&tlstream_bytes_collected); - *bytes_generated = atomic_read(&tlstream_bytes_generated); + *bytes_collected = atomic_read(&timeline->bytes_collected); + *bytes_generated = atomic_read(&timeline->bytes_generated); } #endif /* MALI_UNIT_TEST */ /*****************************************************************************/ -void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid) +void __kbase_tlstream_tl_summary_new_ctx(struct kbase_timeline *tl, void *context, u32 nr, u32 tgid) { const u32 msg_id = KBASE_TL_NEW_CTX; const size_t msg_size = @@ -1561,7 +1649,7 @@ void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ_SUMMARY, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1577,10 +1665,10 @@ void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid) KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) +void __kbase_tlstream_tl_summary_new_gpu(struct kbase_timeline *tl, void *gpu, u32 id, u32 core_count) { const u32 msg_id = KBASE_TL_NEW_GPU; const size_t msg_size = @@ -1590,7 +1678,7 @@ void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ_SUMMARY, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1605,10 +1693,10 @@ void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) buffer, pos, &core_count, sizeof(core_count)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) +void __kbase_tlstream_tl_summary_new_lpu(struct kbase_timeline *tl, void *lpu, u32 nr, u32 fn) { const u32 msg_id = KBASE_TL_NEW_LPU; const size_t msg_size = @@ -1618,7 +1706,7 @@ void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ_SUMMARY, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1633,10 +1721,10 @@ void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) buffer, pos, &fn, sizeof(fn)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) +void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(struct kbase_timeline *tl, void *lpu, void *gpu) { const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; const size_t msg_size = @@ -1645,7 +1733,7 @@ void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ_SUMMARY, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1658,10 +1746,10 @@ void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) buffer, pos, &gpu, sizeof(gpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr) +void __kbase_tlstream_tl_summary_new_as(struct kbase_timeline *tl, void *as, u32 nr) { const u32 msg_id = KBASE_TL_NEW_AS; const size_t msg_size = @@ -1670,7 +1758,7 @@ void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ_SUMMARY, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1683,10 +1771,10 @@ void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr) buffer, pos, &nr, sizeof(nr)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) +void __kbase_tlstream_tl_summary_lifelink_as_gpu(struct kbase_timeline *tl, void *as, void *gpu) { const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; const size_t msg_size = @@ -1695,7 +1783,7 @@ void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ_SUMMARY, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1708,12 +1796,12 @@ void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) buffer, pos, &gpu, sizeof(gpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ_SUMMARY, flags); } /*****************************************************************************/ -void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) +void __kbase_tlstream_tl_new_ctx(struct kbase_timeline *tl, void *context, u32 nr, u32 tgid) { const u32 msg_id = KBASE_TL_NEW_CTX; const size_t msg_size = @@ -1723,7 +1811,7 @@ void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1738,10 +1826,10 @@ void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) buffer, pos, &tgid, sizeof(tgid)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) +void __kbase_tlstream_tl_new_atom(struct kbase_timeline *tl, void *atom, u32 nr) { const u32 msg_id = KBASE_TL_NEW_ATOM; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) + @@ -1750,7 +1838,7 @@ void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1763,10 +1851,10 @@ void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) buffer, pos, &nr, sizeof(nr)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_del_ctx(void *context) +void __kbase_tlstream_tl_del_ctx(struct kbase_timeline *tl, void *context) { const u32 msg_id = KBASE_TL_DEL_CTX; const size_t msg_size = @@ -1775,7 +1863,7 @@ void __kbase_tlstream_tl_del_ctx(void *context) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1786,10 +1874,10 @@ void __kbase_tlstream_tl_del_ctx(void *context) buffer, pos, &context, sizeof(context)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_del_atom(void *atom) +void __kbase_tlstream_tl_del_atom(struct kbase_timeline *tl, void *atom) { const u32 msg_id = KBASE_TL_DEL_ATOM; const size_t msg_size = @@ -1798,7 +1886,7 @@ void __kbase_tlstream_tl_del_atom(void *atom) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1809,10 +1897,10 @@ void __kbase_tlstream_tl_del_atom(void *atom) buffer, pos, &atom, sizeof(atom)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) +void __kbase_tlstream_tl_ret_ctx_lpu(struct kbase_timeline *tl, void *context, void *lpu) { const u32 msg_id = KBASE_TL_RET_CTX_LPU; const size_t msg_size = @@ -1821,7 +1909,7 @@ void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1834,10 +1922,10 @@ void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) buffer, pos, &lpu, sizeof(lpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) +void __kbase_tlstream_tl_ret_atom_ctx(struct kbase_timeline *tl, void *atom, void *context) { const u32 msg_id = KBASE_TL_RET_ATOM_CTX; const size_t msg_size = @@ -1846,7 +1934,7 @@ void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1859,10 +1947,10 @@ void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) buffer, pos, &context, sizeof(context)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_ret_atom_lpu( +void __kbase_tlstream_tl_ret_atom_lpu(struct kbase_timeline *tl, void *atom, void *lpu, const char *attrib_match_list) { const u32 msg_id = KBASE_TL_RET_ATOM_LPU; @@ -1875,7 +1963,7 @@ void __kbase_tlstream_tl_ret_atom_lpu( char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1890,10 +1978,10 @@ void __kbase_tlstream_tl_ret_atom_lpu( buffer, pos, attrib_match_list, msg_s0); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) +void __kbase_tlstream_tl_nret_ctx_lpu(struct kbase_timeline *tl, void *context, void *lpu) { const u32 msg_id = KBASE_TL_NRET_CTX_LPU; const size_t msg_size = @@ -1902,7 +1990,7 @@ void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1915,10 +2003,10 @@ void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) buffer, pos, &lpu, sizeof(lpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) +void __kbase_tlstream_tl_nret_atom_ctx(struct kbase_timeline *tl, void *atom, void *context) { const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; const size_t msg_size = @@ -1927,7 +2015,7 @@ void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1940,10 +2028,10 @@ void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) buffer, pos, &context, sizeof(context)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) +void __kbase_tlstream_tl_nret_atom_lpu(struct kbase_timeline *tl, void *atom, void *lpu) { const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; const size_t msg_size = @@ -1952,7 +2040,7 @@ void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1965,10 +2053,10 @@ void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) buffer, pos, &lpu, sizeof(lpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) +void __kbase_tlstream_tl_ret_as_ctx(struct kbase_timeline *tl, void *as, void *ctx) { const u32 msg_id = KBASE_TL_RET_AS_CTX; const size_t msg_size = @@ -1977,7 +2065,7 @@ void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -1990,10 +2078,10 @@ void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) buffer, pos, &ctx, sizeof(ctx)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) +void __kbase_tlstream_tl_nret_as_ctx(struct kbase_timeline *tl, void *as, void *ctx) { const u32 msg_id = KBASE_TL_NRET_AS_CTX; const size_t msg_size = @@ -2002,7 +2090,7 @@ void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2015,10 +2103,10 @@ void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) buffer, pos, &ctx, sizeof(ctx)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as) +void __kbase_tlstream_tl_ret_atom_as(struct kbase_timeline *tl, void *atom, void *as) { const u32 msg_id = KBASE_TL_RET_ATOM_AS; const size_t msg_size = @@ -2027,7 +2115,7 @@ void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2040,10 +2128,10 @@ void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as) buffer, pos, &as, sizeof(as)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as) +void __kbase_tlstream_tl_nret_atom_as(struct kbase_timeline *tl, void *atom, void *as) { const u32 msg_id = KBASE_TL_NRET_ATOM_AS; const size_t msg_size = @@ -2052,7 +2140,7 @@ void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2065,10 +2153,10 @@ void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as) buffer, pos, &as, sizeof(as)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_attrib_atom_config( +void __kbase_tlstream_tl_attrib_atom_config(struct kbase_timeline *tl, void *atom, u64 jd, u64 affinity, u32 config) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; @@ -2079,7 +2167,7 @@ void __kbase_tlstream_tl_attrib_atom_config( char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2096,10 +2184,10 @@ void __kbase_tlstream_tl_attrib_atom_config( buffer, pos, &config, sizeof(config)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio) +void __kbase_tlstream_tl_attrib_atom_priority(struct kbase_timeline *tl, void *atom, u32 prio) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY; const size_t msg_size = @@ -2108,7 +2196,7 @@ void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2121,10 +2209,10 @@ void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio) buffer, pos, &prio, sizeof(prio)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state) +void __kbase_tlstream_tl_attrib_atom_state(struct kbase_timeline *tl, void *atom, u32 state) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE; const size_t msg_size = @@ -2133,7 +2221,7 @@ void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2146,10 +2234,10 @@ void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state) buffer, pos, &state, sizeof(state)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom) +void __kbase_tlstream_tl_attrib_atom_prioritized(struct kbase_timeline *tl, void *atom) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED; const size_t msg_size = @@ -2158,7 +2246,7 @@ void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2169,21 +2257,23 @@ void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom) buffer, pos, &atom, sizeof(atom)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_attrib_atom_jit( - void *atom, u64 edit_addr, u64 new_addr, u64 va_pages) +void __kbase_tlstream_tl_attrib_atom_jit(struct kbase_timeline *tl, + void *atom, u64 edit_addr, u64 new_addr, + u64 va_pages, u64 jit_flags) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) - + sizeof(edit_addr) + sizeof(new_addr) + sizeof(va_pages); + + sizeof(edit_addr) + sizeof(new_addr) + sizeof(va_pages) + + sizeof(jit_flags); unsigned long flags; char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2198,13 +2288,15 @@ void __kbase_tlstream_tl_attrib_atom_jit( buffer, pos, &new_addr, sizeof(new_addr)); pos = kbasep_tlstream_write_bytes( buffer, pos, &va_pages, sizeof(va_pages)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &jit_flags, sizeof(jit_flags)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_attrib_atom_jitallocinfo( +void __kbase_tlstream_tl_attrib_atom_jitallocinfo(struct kbase_timeline *tl, void *atom, u64 va_pages, u64 commit_pages, u64 extent, u32 jit_id, u32 bin_id, u32 max_allocations, u32 jit_flags, u32 usage_id) @@ -2220,7 +2312,7 @@ void __kbase_tlstream_tl_attrib_atom_jitallocinfo( char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2249,10 +2341,10 @@ void __kbase_tlstream_tl_attrib_atom_jitallocinfo( buffer, pos, &usage_id, sizeof(usage_id)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(void *atom, u32 jit_id) +void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(struct kbase_timeline *tl, void *atom, u32 jit_id) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITFREEINFO; const size_t msg_size = @@ -2261,7 +2353,7 @@ void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(void *atom, u32 jit_id) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2275,11 +2367,11 @@ void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(void *atom, u32 jit_id) buffer, pos, &jit_id, sizeof(jit_id)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_attrib_as_config( +void __kbase_tlstream_tl_attrib_as_config(struct kbase_timeline *tl, void *as, u64 transtab, u64 memattr, u64 transcfg) { const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; @@ -2290,7 +2382,7 @@ void __kbase_tlstream_tl_attrib_as_config( char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2307,10 +2399,10 @@ void __kbase_tlstream_tl_attrib_as_config( buffer, pos, &transcfg, sizeof(transcfg)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_event_lpu_softstop(void *lpu) +void __kbase_tlstream_tl_event_lpu_softstop(struct kbase_timeline *tl, void *lpu) { const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP; const size_t msg_size = @@ -2319,7 +2411,7 @@ void __kbase_tlstream_tl_event_lpu_softstop(void *lpu) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2330,10 +2422,10 @@ void __kbase_tlstream_tl_event_lpu_softstop(void *lpu) buffer, pos, &lpu, sizeof(lpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom) +void __kbase_tlstream_tl_event_atom_softstop_ex(struct kbase_timeline *tl, void *atom) { const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX; const size_t msg_size = @@ -2342,7 +2434,7 @@ void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2353,10 +2445,10 @@ void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom) buffer, pos, &atom, sizeof(atom)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom) +void __kbase_tlstream_tl_event_atom_softstop_issue(struct kbase_timeline *tl, void *atom) { const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE; const size_t msg_size = @@ -2365,7 +2457,7 @@ void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2376,10 +2468,10 @@ void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom) buffer, pos, &atom, sizeof(atom)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_event_atom_softjob_start(void *atom) +void __kbase_tlstream_tl_event_atom_softjob_start(struct kbase_timeline *tl, void *atom) { const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START; const size_t msg_size = @@ -2388,7 +2480,7 @@ void __kbase_tlstream_tl_event_atom_softjob_start(void *atom) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2399,10 +2491,10 @@ void __kbase_tlstream_tl_event_atom_softjob_start(void *atom) buffer, pos, &atom, sizeof(atom)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_event_atom_softjob_end(void *atom) +void __kbase_tlstream_tl_event_atom_softjob_end(struct kbase_timeline *tl, void *atom) { const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END; const size_t msg_size = @@ -2411,7 +2503,7 @@ void __kbase_tlstream_tl_event_atom_softjob_end(void *atom) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2422,10 +2514,10 @@ void __kbase_tlstream_tl_event_atom_softjob_end(void *atom) buffer, pos, &atom, sizeof(atom)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) +void __kbase_tlstream_jd_gpu_soft_reset(struct kbase_timeline *tl, void *gpu) { const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; const size_t msg_size = @@ -2434,7 +2526,7 @@ void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2445,12 +2537,12 @@ void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) buffer, pos, &gpu, sizeof(gpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_OBJ, flags); } /*****************************************************************************/ -void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) +void __kbase_tlstream_aux_pm_state(struct kbase_timeline *tl, u32 core_type, u64 state) { const u32 msg_id = KBASE_AUX_PM_STATE; const size_t msg_size = @@ -2460,7 +2552,7 @@ void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2472,35 +2564,36 @@ void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) +void __kbase_tlstream_aux_pagefault(struct kbase_timeline *tl, u32 ctx_nr, u32 as_nr, u64 page_count_change) { const u32 msg_id = KBASE_AUX_PAGEFAULT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) + - sizeof(page_count_change); + sizeof(as_nr) + sizeof(page_count_change); unsigned long flags; char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_tlstream_write_bytes(buffer, pos, &as_nr, sizeof(as_nr)); pos = kbasep_tlstream_write_bytes( buffer, pos, &page_count_change, sizeof(page_count_change)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) +void __kbase_tlstream_aux_pagesalloc(struct kbase_timeline *tl, u32 ctx_nr, u64 page_count) { const u32 msg_id = KBASE_AUX_PAGESALLOC; const size_t msg_size = @@ -2510,7 +2603,7 @@ void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2521,10 +2614,10 @@ void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) buffer, pos, &page_count, sizeof(page_count)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_devfreq_target(u64 target_freq) +void __kbase_tlstream_aux_devfreq_target(struct kbase_timeline *tl, u64 target_freq) { const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET; const size_t msg_size = @@ -2533,7 +2626,7 @@ void __kbase_tlstream_aux_devfreq_target(u64 target_freq) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2543,10 +2636,10 @@ void __kbase_tlstream_aux_devfreq_target(u64 target_freq) buffer, pos, &target_freq, sizeof(target_freq)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_protected_enter_start(void *gpu) +void __kbase_tlstream_aux_protected_enter_start(struct kbase_timeline *tl, void *gpu) { const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START; const size_t msg_size = @@ -2555,7 +2648,7 @@ void __kbase_tlstream_aux_protected_enter_start(void *gpu) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2566,9 +2659,9 @@ void __kbase_tlstream_aux_protected_enter_start(void *gpu) buffer, pos, &gpu, sizeof(gpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_protected_enter_end(void *gpu) +void __kbase_tlstream_aux_protected_enter_end(struct kbase_timeline *tl, void *gpu) { const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END; const size_t msg_size = @@ -2577,7 +2670,7 @@ void __kbase_tlstream_aux_protected_enter_end(void *gpu) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2588,10 +2681,10 @@ void __kbase_tlstream_aux_protected_enter_end(void *gpu) buffer, pos, &gpu, sizeof(gpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_protected_leave_start(void *gpu) +void __kbase_tlstream_aux_protected_leave_start(struct kbase_timeline *tl, void *gpu) { const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START; const size_t msg_size = @@ -2600,7 +2693,7 @@ void __kbase_tlstream_aux_protected_leave_start(void *gpu) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2611,9 +2704,10 @@ void __kbase_tlstream_aux_protected_leave_start(void *gpu) buffer, pos, &gpu, sizeof(gpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_protected_leave_end(void *gpu) + +void __kbase_tlstream_aux_protected_leave_end(struct kbase_timeline *tl, void *gpu) { const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END; const size_t msg_size = @@ -2622,7 +2716,7 @@ void __kbase_tlstream_aux_protected_leave_end(void *gpu) char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2633,10 +2727,10 @@ void __kbase_tlstream_aux_protected_leave_end(void *gpu) buffer, pos, &gpu, sizeof(gpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bid, +void __kbase_tlstream_aux_jit_stats(struct kbase_timeline *tl, u32 ctx_nr, u32 bid, u32 max_allocs, u32 allocs, u32 va_pages, u32 ph_pages) { @@ -2649,7 +2743,7 @@ void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bid, char *buffer; size_t pos = 0; - buffer = kbasep_tlstream_msgbuf_acquire( + buffer = kbasep_tlstream_msgbuf_acquire(tl, TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); @@ -2670,5 +2764,38 @@ void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bid, buffer, pos, &ph_pages, sizeof(ph_pages)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_AUX, flags); } + +void __kbase_tlstream_aux_event_job_slot(struct kbase_timeline *tl, + struct kbase_context *context, u32 slot_nr, u32 atom_nr, u32 event) +{ + const u32 msg_id = KBASE_AUX_EVENT_JOB_SLOT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(context) + sizeof(slot_nr) + + sizeof(atom_nr) + sizeof(event); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire(tl, + TL_STREAM_TYPE_AUX, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &context, sizeof(context)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &slot_nr, sizeof(slot_nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom_nr, sizeof(atom_nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &event, sizeof(event)); + + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(tl, TL_STREAM_TYPE_AUX, flags); +} + diff --git a/mali_kbase/mali_kbase_tlstream.h b/mali_kbase/mali_kbase_tlstream.h index e2a3ea4..7b2407f 100644 --- a/mali_kbase/mali_kbase_tlstream.h +++ b/mali_kbase/mali_kbase_tlstream.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,26 +24,38 @@ #define _KBASE_TLSTREAM_H #include <mali_kbase.h> +#include <mali_kbase_gator.h> + +#include <linux/typecheck.h> /*****************************************************************************/ +struct kbase_timeline; + /** * kbase_tlstream_init - initialize timeline infrastructure in kernel + * @timeline: Newly created instance of kbase_timeline will + * be stored in this pointer. + * @timeline_is_enabled: Timeline status will be written to this variable + * when a client is attached/detached. The variable + * must be valid while timeline instance is valid. * Return: zero on success, negative number on error */ -int kbase_tlstream_init(void); +int kbase_tlstream_init(struct kbase_timeline **timeline, + atomic_t *timeline_is_enabled); /** * kbase_tlstream_term - terminate timeline infrastructure in kernel * - * Timeline need have to been previously enabled with kbase_tlstream_init(). + * @timeline: Timeline instance to be terminated. It must be previously created + * with kbase_tlstream_init(). */ -void kbase_tlstream_term(void); +void kbase_tlstream_term(struct kbase_timeline *timeline); /** * kbase_tlstream_acquire - acquire timeline stream file descriptor - * @kctx: kernel common context - * @flags: timeline stream flags + * @kbdev: Kbase device + * @flags: Timeline stream flags * * This descriptor is meant to be used by userspace timeline to gain access to * kernel timeline stream. This stream is later broadcasted by user space to the @@ -54,30 +66,33 @@ void kbase_tlstream_term(void); * * Return: file descriptor on success, negative number on error */ -int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags); +int kbase_tlstream_acquire(struct kbase_device *kbdev, u32 flags); /** * kbase_tlstream_flush_streams - flush timeline streams. + * @timeline: Timeline instance * * Function will flush pending data in all timeline streams. */ -void kbase_tlstream_flush_streams(void); +void kbase_tlstream_flush_streams(struct kbase_timeline *timeline); /** * kbase_tlstream_reset_body_streams - reset timeline body streams. * * Function will discard pending data in all timeline body streams. + * @timeline: Timeline instance */ -void kbase_tlstream_reset_body_streams(void); +void kbase_tlstream_reset_body_streams(struct kbase_timeline *timeline); #if MALI_UNIT_TEST /** * kbase_tlstream_test - start timeline stream data generator - * @tpw_count: number of trace point writers in each context - * @msg_delay: time delay in milliseconds between trace points written by one + * @kbdev: Kernel common context + * @tpw_count: Number of trace point writers in each context + * @msg_delay: Time delay in milliseconds between trace points written by one * writer - * @msg_count: number of trace points written by one writer - * @aux_msg: if non-zero aux messages will be included + * @msg_count: Number of trace points written by one writer + * @aux_msg: If non-zero aux messages will be included * * This test starts a requested number of asynchronous writers in both IRQ and * thread context. Each writer will generate required number of test @@ -91,17 +106,19 @@ void kbase_tlstream_reset_body_streams(void); * writers finish. */ void kbase_tlstream_test( - unsigned int tpw_count, - unsigned int msg_delay, - unsigned int msg_count, - int aux_msg); + struct kbase_device *kbdev, + unsigned int tpw_count, + unsigned int msg_delay, + unsigned int msg_count, + int aux_msg); /** * kbase_tlstream_stats - read timeline stream statistics - * @bytes_collected: will hold number of bytes read by the user - * @bytes_generated: will hold number of bytes generated by trace points + * @timeline: Timeline instance + * @bytes_collected: Will hold number of bytes read by the user + * @bytes_generated: Will hold number of bytes generated by trace points */ -void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); +void kbase_tlstream_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated); #endif /* MALI_UNIT_TEST */ /*****************************************************************************/ @@ -111,379 +128,467 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); #define TL_ATOM_STATE_DONE 2 #define TL_ATOM_STATE_POSTED 3 -void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid); -void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); -void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); -void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); -void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr); -void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); -void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid); -void __kbase_tlstream_tl_new_atom(void *atom, u32 nr); -void __kbase_tlstream_tl_del_ctx(void *context); -void __kbase_tlstream_tl_del_atom(void *atom); -void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); -void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); -void __kbase_tlstream_tl_ret_atom_lpu( - void *atom, void *lpu, const char *attrib_match_list); -void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); -void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); -void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); -void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); -void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); -void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as); -void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as); -void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); -void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2); -void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2); -void __kbase_tlstream_tl_attrib_atom_config( - void *atom, u64 jd, u64 affinity, u32 config); -void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio); -void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state); -void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom); -void __kbase_tlstream_tl_attrib_atom_jit( - void *atom, u64 edit_addr, u64 new_addr, u64 va_pages); -void __kbase_tlstream_tl_attrib_atom_jitallocinfo( - void *atom, u64 va_pages, u64 commit_pages, u64 extent, - u32 jit_id, u32 bin_id, u32 max_allocations, u32 flags, - u32 usage_id); -void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(void *atom, u32 jit_id); -void __kbase_tlstream_tl_attrib_as_config( - void *as, u64 transtab, u64 memattr, u64 transcfg); -void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom); -void __kbase_tlstream_tl_event_lpu_softstop(void *lpu); -void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom); -void __kbase_tlstream_tl_event_atom_softjob_start(void *atom); -void __kbase_tlstream_tl_event_atom_softjob_end(void *atom); -void __kbase_tlstream_jd_gpu_soft_reset(void *gpu); -void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state); -void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); -void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); -void __kbase_tlstream_aux_devfreq_target(u64 target_freq); -void __kbase_tlstream_aux_protected_enter_start(void *gpu); -void __kbase_tlstream_aux_protected_enter_end(void *gpu); -void __kbase_tlstream_aux_protected_leave_start(void *gpu); -void __kbase_tlstream_aux_protected_leave_end(void *gpu); -void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bin_id, - u32 max_allocations, u32 allocations, - u32 va_pages_nr, u32 ph_pages_nr); +/* We want these values to match */ +#define TL_JS_EVENT_START GATOR_JOB_SLOT_START +#define TL_JS_EVENT_STOP GATOR_JOB_SLOT_STOP +#define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED + +void __kbase_tlstream_tl_summary_new_ctx(struct kbase_timeline *tl, void *context, u32 nr, u32 tgid); +void __kbase_tlstream_tl_summary_new_gpu(struct kbase_timeline *tl, void *gpu, u32 id, u32 core_count); +void __kbase_tlstream_tl_summary_new_lpu(struct kbase_timeline *tl, void *lpu, u32 nr, u32 fn); +void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(struct kbase_timeline *tl, void *lpu, void *gpu); +void __kbase_tlstream_tl_summary_new_as(struct kbase_timeline *tl, void *as, u32 nr); +void __kbase_tlstream_tl_summary_lifelink_as_gpu(struct kbase_timeline *tl, void *as, void *gpu); +void __kbase_tlstream_tl_new_ctx(struct kbase_timeline *tl, void *context, u32 nr, u32 tgid); +void __kbase_tlstream_tl_new_atom(struct kbase_timeline *tl, void *atom, u32 nr); +void __kbase_tlstream_tl_del_ctx(struct kbase_timeline *tl, void *context); +void __kbase_tlstream_tl_del_atom(struct kbase_timeline *tl, void *atom); +void __kbase_tlstream_tl_ret_ctx_lpu(struct kbase_timeline *tl, void *context, void *lpu); +void __kbase_tlstream_tl_ret_atom_ctx(struct kbase_timeline *tl, void *atom, void *context); +void __kbase_tlstream_tl_ret_atom_lpu(struct kbase_timeline *tl, + void *atom, void *lpu, const char *attrib_match_list); +void __kbase_tlstream_tl_nret_ctx_lpu(struct kbase_timeline *tl, void *context, void *lpu); +void __kbase_tlstream_tl_nret_atom_ctx(struct kbase_timeline *tl, void *atom, void *context); +void __kbase_tlstream_tl_nret_atom_lpu(struct kbase_timeline *tl, void *atom, void *lpu); +void __kbase_tlstream_tl_ret_as_ctx(struct kbase_timeline *tl, void *as, void *ctx); +void __kbase_tlstream_tl_nret_as_ctx(struct kbase_timeline *tl, void *as, void *ctx); +void __kbase_tlstream_tl_ret_atom_as(struct kbase_timeline *tl, void *atom, void *as); +void __kbase_tlstream_tl_nret_atom_as(struct kbase_timeline *tl, void *atom, void *as); +void __kbase_tlstream_tl_dep_atom_atom(struct kbase_timeline *tl, void *atom1, void *atom2); +void __kbase_tlstream_tl_ndep_atom_atom(struct kbase_timeline *tl, void *atom1, void *atom2); +void __kbase_tlstream_tl_rdep_atom_atom(struct kbase_timeline *tl, void *atom1, void *atom2); +void __kbase_tlstream_tl_attrib_atom_config(struct kbase_timeline *tl, + void *atom, u64 jd, u64 affinity, u32 config); +void __kbase_tlstream_tl_attrib_atom_priority(struct kbase_timeline *tl, void *atom, u32 prio); +void __kbase_tlstream_tl_attrib_atom_state(struct kbase_timeline *tl, void *atom, u32 state); +void __kbase_tlstream_tl_attrib_atom_prioritized(struct kbase_timeline *tl, void *atom); +void __kbase_tlstream_tl_attrib_atom_jit(struct kbase_timeline *tl, + void *atom, u64 edit_addr, u64 new_addr, + u64 va_pages, u64 jit_flags); +void __kbase_tlstream_tl_attrib_atom_jitallocinfo(struct kbase_timeline *tl, + void *atom, u64 va_pages, u64 commit_pages, u64 extent, + u32 jit_id, u32 bin_id, u32 max_allocations, u32 flags, + u32 usage_id); +void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(struct kbase_timeline *tl, void *atom, u32 jit_id); +void __kbase_tlstream_tl_attrib_as_config(struct kbase_timeline *tl, + void *as, u64 transtab, u64 memattr, u64 transcfg); +void __kbase_tlstream_tl_event_atom_softstop_ex(struct kbase_timeline *tl, void *atom); +void __kbase_tlstream_tl_event_lpu_softstop(struct kbase_timeline *tl, void *lpu); +void __kbase_tlstream_tl_event_atom_softstop_issue(struct kbase_timeline *tl, void *atom); +void __kbase_tlstream_tl_event_atom_softjob_start(struct kbase_timeline *tl, void *atom); +void __kbase_tlstream_tl_event_atom_softjob_end(struct kbase_timeline *tl, void *atom); +void __kbase_tlstream_jd_gpu_soft_reset(struct kbase_timeline *tl, void *gpu); +void __kbase_tlstream_aux_pm_state(struct kbase_timeline *tl, u32 core_type, u64 state); +void __kbase_tlstream_aux_pagefault(struct kbase_timeline *tl, u32 ctx_nr, u32 as_nr, u64 page_count_change); +void __kbase_tlstream_aux_pagesalloc(struct kbase_timeline *tl, u32 ctx_nr, u64 page_count); +void __kbase_tlstream_aux_devfreq_target(struct kbase_timeline *tl, u64 target_freq); +void __kbase_tlstream_aux_protected_enter_start(struct kbase_timeline *tl, void *gpu); +void __kbase_tlstream_aux_protected_enter_end(struct kbase_timeline *tl, void *gpu); +void __kbase_tlstream_aux_protected_leave_start(struct kbase_timeline *tl, void *gpu); +void __kbase_tlstream_aux_protected_leave_end(struct kbase_timeline *tl, void *gpu); +void __kbase_tlstream_aux_jit_stats(struct kbase_timeline *tl, u32 ctx_nr, u32 bin_id, + u32 max_allocations, u32 allocations, + u32 va_pages_nr, u32 ph_pages_nr); +void __kbase_tlstream_aux_event_job_slot(struct kbase_timeline *tl, + struct kbase_context *context, u32 slot_nr, u32 atom_nr, u32 event); #define TLSTREAM_ENABLED (1 << 31) -extern atomic_t kbase_tlstream_enabled; - -#define __TRACE_IF_ENABLED(trace_name, ...) \ +#define __TRACE_IF_ENABLED(trace_name, kbdev, ...) \ do { \ - int enabled = atomic_read(&kbase_tlstream_enabled); \ + int enabled = atomic_read(&kbdev->timeline_is_enabled); \ + typecheck(struct kbase_device *, kbdev); \ if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_##trace_name(__VA_ARGS__); \ + __kbase_tlstream_##trace_name(kbdev->timeline, __VA_ARGS__); \ } while (0) -#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...) \ +#define __TRACE_IF_ENABLED_LATENCY(trace_name, kbdev, ...) \ do { \ - int enabled = atomic_read(&kbase_tlstream_enabled); \ + int enabled = atomic_read(&kbdev->timeline_is_enabled); \ + typecheck(struct kbase_device *, kbdev); \ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ - __kbase_tlstream_##trace_name(__VA_ARGS__); \ + __kbase_tlstream_##trace_name(kbdev->timeline, __VA_ARGS__); \ } while (0) -#define __TRACE_IF_ENABLED_JD(trace_name, ...) \ +#define __TRACE_IF_ENABLED_JD(trace_name, kbdev, ...) \ do { \ - int enabled = atomic_read(&kbase_tlstream_enabled); \ + int enabled = atomic_read(&kbdev->timeline_is_enabled); \ + typecheck(struct kbase_device *, kbdev); \ if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED) \ - __kbase_tlstream_##trace_name(__VA_ARGS__); \ + __kbase_tlstream_##trace_name(kbdev->timeline, __VA_ARGS__); \ } while (0) + +/*****************************************************************************/ + +/* Gator tracepoints are hooked into TLSTREAM macro interface. + * When the following tracepoints are called, corresponding + * Gator tracepoint will be called as well. + */ +#if defined(CONFIG_MALI_GATOR_SUPPORT) + +/* `event` is one of TL_JS_EVENT values here. + * The values of TL_JS_EVENT are guaranteed to match + * with corresponding GATOR_JOB_SLOT values. + */ +#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, context, slot_nr, atom_nr, event) \ + do { \ + kbase_trace_mali_job_slots_event(kbdev->id, \ + GATOR_MAKE_EVENT(event, slot_nr), \ + context, (u8) atom_nr); \ + __TRACE_IF_ENABLED(aux_event_job_slot, kbdev, context, slot_nr, atom_nr, event); \ + } while (0) + +#define KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state) \ + do { \ + kbase_trace_mali_pm_status(kbdev->id, \ + core_type, state); \ + __TRACE_IF_ENABLED(aux_pm_state, kbdev, core_type, state); \ + } while (0) + + +#define KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, ctx_nr, as_nr, page_count_change) \ + do { \ + kbase_trace_mali_page_fault_insert_pages(kbdev->id, \ + as_nr, \ + page_count_change); \ + __TRACE_IF_ENABLED(aux_pagefault, kbdev, ctx_nr, as_nr, page_count_change); \ + } while (0) + +/* kbase_trace_mali_total_alloc_pages_change is handled differently here. + * We stream the total amount of pages allocated for `kbdev` rather + * than `page_count`, which is per-context. + */ +#define KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, ctx_nr, page_count) do { \ + u32 global_pages_count = atomic_read(&kbdev->memdev.used_pages); \ + kbase_trace_mali_total_alloc_pages_change(kbdev->id, \ + global_pages_count); \ + __TRACE_IF_ENABLED(aux_pagesalloc, kbdev, ctx_nr, page_count); \ + } while (0) + + +#endif + /*****************************************************************************/ /** * KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX - create context object in timeline * summary - * @context: name of the context object - * @nr: context number - * @tgid: thread Group Id + * @kbdev: Kbase device + * @context: Name of the context object + * @nr: Context number + * @tgid: Thread Group Id * * Function emits a timeline message informing about context creation. Context * is created with context number (its attribute), that can be used to link * kbase context with userspace context. * This message is directed to timeline summary stream. */ -#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(context, nr, tgid) \ - __TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid) +#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(kbdev, context, nr, tgid) \ + __TRACE_IF_ENABLED(tl_summary_new_ctx, kbdev, context, nr, tgid) /** * KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU - create GPU object in timeline summary - * @gpu: name of the GPU object - * @id: id value of this GPU - * @core_count: number of cores this GPU hosts + * @kbdev: Kbase device + * @gpu: Name of the GPU object + * @id: ID value of this GPU + * @core_count: Number of cores this GPU hosts * * Function emits a timeline message informing about GPU creation. GPU is * created with two attributes: id and core count. * This message is directed to timeline summary stream. */ -#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(gpu, id, core_count) \ - __TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count) +#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(kbdev, gpu, id, core_count) \ + __TRACE_IF_ENABLED(tl_summary_new_gpu, kbdev, gpu, id, core_count) /** * KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU - create LPU object in timeline summary - * @lpu: name of the Logical Processing Unit object - * @nr: sequential number assigned to this LPU - * @fn: property describing this LPU's functional abilities + * @kbdev: Kbase device + * @lpu: Name of the Logical Processing Unit object + * @nr: Sequential number assigned to this LPU + * @fn: Property describing this LPU's functional abilities * * Function emits a timeline message informing about LPU creation. LPU is * created with two attributes: number linking this LPU with GPU's job slot * and function bearing information about this LPU abilities. * This message is directed to timeline summary stream. */ -#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, nr, fn) \ - __TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn) +#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(kbdev, lpu, nr, fn) \ + __TRACE_IF_ENABLED(tl_summary_new_lpu, kbdev, lpu, nr, fn) /** * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU - lifelink LPU object to GPU - * @lpu: name of the Logical Processing Unit object - * @gpu: name of the GPU object + * @kbdev: Kbase device + * @lpu: Name of the Logical Processing Unit object + * @gpu: Name of the GPU object * * Function emits a timeline message informing that LPU object shall be deleted * along with GPU object. * This message is directed to timeline summary stream. */ -#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, gpu) \ - __TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu) +#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(kbdev, lpu, gpu) \ + __TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, kbdev, lpu, gpu) /** * KBASE_TLSTREAM_TL_SUMMARY_NEW_AS - create address space object in timeline summary - * @as: name of the address space object - * @nr: sequential number assigned to this address space + * @kbdev: Kbase device + * @as: Name of the address space object + * @nr: Sequential number assigned to this address space * * Function emits a timeline message informing about address space creation. * Address space is created with one attribute: number identifying this * address space. * This message is directed to timeline summary stream. */ -#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(as, nr) \ - __TRACE_IF_ENABLED(tl_summary_new_as, as, nr) +#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(kbdev, as, nr) \ + __TRACE_IF_ENABLED(tl_summary_new_as, kbdev, as, nr) /** * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU - lifelink address space object to GPU - * @as: name of the address space object - * @gpu: name of the GPU object + * @kbdev: Kbase device + * @as: Name of the address space object + * @gpu: Name of the GPU object * * Function emits a timeline message informing that address space object * shall be deleted along with GPU object. * This message is directed to timeline summary stream. */ -#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(as, gpu) \ - __TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu) +#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(kbdev, as, gpu) \ + __TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, kbdev, as, gpu) /** * KBASE_TLSTREAM_TL_NEW_CTX - create context object in timeline - * @context: name of the context object - * @nr: context number - * @tgid: thread Group Id + * @kbdev: Kbase device + * @context: Name of the context object + * @nr: Context number + * @tgid: Thread Group Id * * Function emits a timeline message informing about context creation. Context * is created with context number (its attribute), that can be used to link * kbase context with userspace context. */ -#define KBASE_TLSTREAM_TL_NEW_CTX(context, nr, tgid) \ - __TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid) +#define KBASE_TLSTREAM_TL_NEW_CTX(kbdev, context, nr, tgid) \ + __TRACE_IF_ENABLED(tl_new_ctx, kbdev, context, nr, tgid) /** * KBASE_TLSTREAM_TL_NEW_ATOM - create atom object in timeline - * @atom: name of the atom object - * @nr: sequential number assigned to this atom + * @kbdev: Kbase device + * @atom: Name of the atom object + * @nr: Sequential number assigned to this atom * * Function emits a timeline message informing about atom creation. Atom is * created with atom number (its attribute) that links it with actual work * bucket id understood by hardware. */ -#define KBASE_TLSTREAM_TL_NEW_ATOM(atom, nr) \ - __TRACE_IF_ENABLED(tl_new_atom, atom, nr) +#define KBASE_TLSTREAM_TL_NEW_ATOM(kbdev, atom, nr) \ + __TRACE_IF_ENABLED(tl_new_atom, kbdev, atom, nr) /** * KBASE_TLSTREAM_TL_DEL_CTX - destroy context object in timeline - * @context: name of the context object + * @kbdev: Kbase device + * @context: Name of the context object * * Function emits a timeline message informing that context object ceased to * exist. */ -#define KBASE_TLSTREAM_TL_DEL_CTX(context) \ - __TRACE_IF_ENABLED(tl_del_ctx, context) +#define KBASE_TLSTREAM_TL_DEL_CTX(kbdev, context) \ + __TRACE_IF_ENABLED(tl_del_ctx, kbdev, context) /** * KBASE_TLSTREAM_TL_DEL_ATOM - destroy atom object in timeline - * @atom: name of the atom object + * @kbdev: Kbase device + * @atom: Name of the atom object * * Function emits a timeline message informing that atom object ceased to * exist. */ -#define KBASE_TLSTREAM_TL_DEL_ATOM(atom) \ - __TRACE_IF_ENABLED(tl_del_atom, atom) +#define KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, atom) \ + __TRACE_IF_ENABLED(tl_del_atom, kbdev, atom) /** * KBASE_TLSTREAM_TL_RET_CTX_LPU - retain context by LPU - * @context: name of the context object - * @lpu: name of the Logical Processing Unit object + * @kbdev: Kbase device + * @context: Name of the context object + * @lpu: Name of the Logical Processing Unit object * * Function emits a timeline message informing that context is being held * by LPU and must not be deleted unless it is released. */ -#define KBASE_TLSTREAM_TL_RET_CTX_LPU(context, lpu) \ - __TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu) +#define KBASE_TLSTREAM_TL_RET_CTX_LPU(kbdev, context, lpu) \ + __TRACE_IF_ENABLED(tl_ret_ctx_lpu, kbdev, context, lpu) /** * KBASE_TLSTREAM_TL_RET_ATOM_CTX - retain atom by context - * @atom: name of the atom object - * @context: name of the context object + * @kbdev: Kbase device + * @atom: Name of the atom object + * @context: Name of the context object * * Function emits a timeline message informing that atom object is being held * by context and must not be deleted unless it is released. */ -#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(atom, context) \ - __TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context) +#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, atom, context) \ + __TRACE_IF_ENABLED(tl_ret_atom_ctx, kbdev, atom, context) /** * KBASE_TLSTREAM_TL_RET_ATOM_LPU - retain atom by LPU - * @atom: name of the atom object - * @lpu: name of the Logical Processing Unit object - * @attrib_match_list: list containing match operator attributes + * @kbdev: Kbase device + * @atom: Name of the atom object + * @lpu: Name of the Logical Processing Unit object + * @attrib_match_list: List containing match operator attributes * * Function emits a timeline message informing that atom object is being held * by LPU and must not be deleted unless it is released. */ -#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(atom, lpu, attrib_match_list) \ - __TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list) +#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(kbdev, atom, lpu, attrib_match_list) \ + __TRACE_IF_ENABLED(tl_ret_atom_lpu, kbdev, atom, lpu, attrib_match_list) /** * KBASE_TLSTREAM_TL_NRET_CTX_LPU - release context by LPU - * @context: name of the context object - * @lpu: name of the Logical Processing Unit object + * @kbdev: Kbase device + * @context: Name of the context object + * @lpu: Name of the Logical Processing Unit object * * Function emits a timeline message informing that context is being released * by LPU object. */ -#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(context, lpu) \ - __TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu) +#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, context, lpu) \ + __TRACE_IF_ENABLED(tl_nret_ctx_lpu, kbdev, context, lpu) /** * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - release atom by context - * @atom: name of the atom object - * @context: name of the context object + * @kbdev: Kbase device + * @atom: Name of the atom object + * @context: Name of the context object * * Function emits a timeline message informing that atom object is being * released by context. */ -#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(atom, context) \ - __TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context) +#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(kbdev, atom, context) \ + __TRACE_IF_ENABLED(tl_nret_atom_ctx, kbdev, atom, context) /** * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - release atom by LPU - * @atom: name of the atom object - * @lpu: name of the Logical Processing Unit object + * @kbdev: Kbase device + * @atom: Name of the atom object + * @lpu: Name of the Logical Processing Unit object * * Function emits a timeline message informing that atom object is being * released by LPU. */ -#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(atom, lpu) \ - __TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu) +#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, atom, lpu) \ + __TRACE_IF_ENABLED(tl_nret_atom_lpu, kbdev, atom, lpu) /** * KBASE_TLSTREAM_TL_RET_AS_CTX - lifelink address space object to context - * @as: name of the address space object - * @ctx: name of the context object + * @kbdev: Kbase device + * @as: Name of the address space object + * @ctx: Name of the context object * * Function emits a timeline message informing that address space object * is being held by the context object. */ -#define KBASE_TLSTREAM_TL_RET_AS_CTX(as, ctx) \ - __TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx) +#define KBASE_TLSTREAM_TL_RET_AS_CTX(kbdev, as, ctx) \ + __TRACE_IF_ENABLED(tl_ret_as_ctx, kbdev, as, ctx) /** * KBASE_TLSTREAM_TL_NRET_AS_CTX - release address space by context - * @as: name of the address space object - * @ctx: name of the context object + * @kbdev: Kbase device + * @as: Name of the address space object + * @ctx: Name of the context object * * Function emits a timeline message informing that address space object * is being released by atom. */ -#define KBASE_TLSTREAM_TL_NRET_AS_CTX(as, ctx) \ - __TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx) +#define KBASE_TLSTREAM_TL_NRET_AS_CTX(kbdev, as, ctx) \ + __TRACE_IF_ENABLED(tl_nret_as_ctx, kbdev, as, ctx) /** * KBASE_TLSTREAM_TL_RET_ATOM_AS - retain atom by address space - * @atom: name of the atom object - * @as: name of the address space object + * @kbdev: Kbase device + * @atom: Name of the atom object + * @as: Name of the address space object * * Function emits a timeline message informing that atom object is being held * by address space and must not be deleted unless it is released. */ -#define KBASE_TLSTREAM_TL_RET_ATOM_AS(atom, as) \ - __TRACE_IF_ENABLED(tl_ret_atom_as, atom, as) +#define KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, atom, as) \ + __TRACE_IF_ENABLED(tl_ret_atom_as, kbdev, atom, as) /** * KBASE_TLSTREAM_TL_NRET_ATOM_AS - release atom by address space - * @atom: name of the atom object - * @as: name of the address space object + * @kbdev: Kbase device + * @atom: Name of the atom object + * @as: Name of the address space object * * Function emits a timeline message informing that atom object is being * released by address space. */ -#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \ - __TRACE_IF_ENABLED(tl_nret_atom_as, atom, as) +#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, atom, as) \ + __TRACE_IF_ENABLED(tl_nret_atom_as, kbdev, atom, as) /** * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes - * @atom: name of the atom object - * @jd: job descriptor address - * @affinity: job affinity - * @config: job config + * @kbdev: Kbase device + * @atom: Name of the atom object + * @jd: Job descriptor address + * @affinity: Job affinity + * @config: Job config * * Function emits a timeline message containing atom attributes. */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(atom, jd, affinity, config) \ - __TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config) +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, atom, jd, affinity, config) \ + __TRACE_IF_ENABLED(tl_attrib_atom_config, kbdev, atom, jd, affinity, config) /** * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority - * @atom: name of the atom object - * @prio: atom priority + * @kbdev: Kbase device + * @atom: Name of the atom object + * @prio: Atom priority * * Function emits a timeline message containing atom priority. */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(atom, prio) \ - __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio) +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, atom, prio) \ + __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, kbdev, atom, prio) /** * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state - * @atom: name of the atom object - * @state: atom state + * @kbdev: Kbase device + * @atom: Name of the atom object + * @state: Atom state * * Function emits a timeline message containing atom state. */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, state) \ - __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state) +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, state) \ + __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, kbdev, atom, state) /** * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - atom was prioritized - * @atom: name of the atom object + * @kbdev: Kbase device + * @atom: Name of the atom object * * Function emits a timeline message signalling priority change */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(atom) \ - __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_prioritized, atom) +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(kbdev, atom) \ + __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_prioritized, kbdev, atom) /** * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit happened on atom - * @atom: atom identifier - * @edit_addr: address edited by jit - * @new_addr: address placed into the edited location - * @va_pages: maximum number of pages this jit can allocate + * @kbdev: Kbase device + * @atom: Atom identifier + * @edit_addr: Address edited by jit + * @new_addr: Address placed into the edited location + * @va_pages: Maximum number of pages this jit can allocate + * @jit_flags: Flags defining the properties of the memory region */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr, va_pages) \ - __TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, \ - new_addr, va_pages) +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(kbdev, atom, edit_addr, new_addr, \ + va_pages, jit_flags) \ + __TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, kbdev, atom, edit_addr, \ + new_addr, va_pages, jit_flags) /** * Information about the JIT allocation atom. * + * @kbdev: Kbase device * @atom: Atom identifier. * @va_pages: The minimum number of virtual pages required. * @commit_pages: The minimum number of physical pages which @@ -504,177 +609,215 @@ extern atomic_t kbase_tlstream_enabled; * The kernel should attempt to use a previous * allocation with the same usage_id */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(atom, va_pages,\ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(kbdev, atom, va_pages, \ commit_pages, extent, jit_id, bin_id,\ max_allocations, jit_flags, usage_id) \ - __TRACE_IF_ENABLED(tl_attrib_atom_jitallocinfo, atom, va_pages,\ + __TRACE_IF_ENABLED(tl_attrib_atom_jitallocinfo, kbdev, atom, va_pages, \ commit_pages, extent, jit_id, bin_id,\ max_allocations, jit_flags, usage_id) /** * Information about the JIT free atom. * + * @kbdev: Kbase device * @atom: Atom identifier. * @jit_id: Unique ID provided by the caller, this is used * to pair allocation and free requests. */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(atom, jit_id) \ - __TRACE_IF_ENABLED(tl_attrib_atom_jitfreeinfo, atom, jit_id) +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(kbdev, atom, jit_id) \ + __TRACE_IF_ENABLED(tl_attrib_atom_jitfreeinfo, kbdev, atom, jit_id) /** * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes - * @as: assigned address space - * @transtab: configuration of the TRANSTAB register - * @memattr: configuration of the MEMATTR register - * @transcfg: configuration of the TRANSCFG register (or zero if not present) + * @kbdev: Kbase device + * @as: Assigned address space + * @transtab: Configuration of the TRANSTAB register + * @memattr: Configuration of the MEMATTR register + * @transcfg: Configuration of the TRANSCFG register (or zero if not present) * * Function emits a timeline message containing address space attributes. */ -#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, transtab, memattr, transcfg) \ - __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg) +#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as, transtab, memattr, transcfg) \ + __TRACE_IF_ENABLED(tl_attrib_as_config, kbdev, as, transtab, memattr, transcfg) /** * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX - * @atom: atom identifier + * @kbdev: Kbase device + * @atom: Atom identifier */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \ - __TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom) +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, atom) \ + __TRACE_IF_ENABLED(tl_event_atom_softstop_ex, kbdev, atom) /** * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP - * @lpu: name of the LPU object + * @kbdev: Kbase device + * @lpu: Name of the LPU object */ -#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \ - __TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu) +#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(kbdev, lpu) \ + __TRACE_IF_ENABLED(tl_event_lpu_softstop, kbdev, lpu) /** * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE - * @atom: atom identifier + * @kbdev: Kbase device + * @atom: Atom identifier */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \ - __TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom) +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, atom) \ + __TRACE_IF_ENABLED(tl_event_atom_softstop_issue, kbdev, atom) /** * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START - * @atom: atom identifier + * @kbdev: Kbase device + * @atom: Atom identifier */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(atom) \ - __TRACE_IF_ENABLED(tl_event_atom_softjob_start, atom) +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, atom) \ + __TRACE_IF_ENABLED(tl_event_atom_softjob_start, kbdev, atom) /** * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END - * @atom: atom identifier + * @kbdev: Kbase device + * @atom: Atom identifier */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(atom) \ - __TRACE_IF_ENABLED(tl_event_atom_softjob_end, atom) +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, atom) \ + __TRACE_IF_ENABLED(tl_event_atom_softjob_end, kbdev, atom) /** * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset - * @gpu: name of the GPU object + * @kbdev: Kbase device + * @gpu: Name of the GPU object * * This imperative tracepoint is specific to job dumping. * Function emits a timeline message indicating GPU soft reset. */ -#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(gpu) \ - __TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu) +#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, gpu) \ + __TRACE_IF_ENABLED(jd_gpu_soft_reset, kbdev, gpu) /** * KBASE_TLSTREAM_AUX_PM_STATE - timeline message: power management state - * @core_type: core type (shader, tiler, l2 cache, l3 cache) + * @kbdev: Kbase device + * @core_type: Core type (shader, tiler, l2 cache, l3 cache) * @state: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF) */ -#define KBASE_TLSTREAM_AUX_PM_STATE(core_type, state) \ - __TRACE_IF_ENABLED(aux_pm_state, core_type, state) +#if !defined(KBASE_TLSTREAM_AUX_PM_STATE) +#define KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state) \ + __TRACE_IF_ENABLED(aux_pm_state, kbdev, core_type, state) +#endif /** * KBASE_TLSTREAM_AUX_PAGEFAULT - timeline message: MMU page fault event * resulting in new pages being mapped - * @ctx_nr: kernel context number - * @page_count_change: number of pages to be added + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @as_nr: Address space number + * @page_count_change: Number of pages to be added */ -#define KBASE_TLSTREAM_AUX_PAGEFAULT(ctx_nr, page_count_change) \ - __TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change) +#if !defined(KBASE_TLSTREAM_AUX_PAGEFAULT) +#define KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, ctx_nr, as_nr, page_count_change) \ + __TRACE_IF_ENABLED(aux_pagefault, kbdev, ctx_nr, as_nr, page_count_change) +#endif /** * KBASE_TLSTREAM_AUX_PAGESALLOC - timeline message: total number of allocated * pages is changed - * @ctx_nr: kernel context number - * @page_count: number of pages used by the context + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @page_count: Number of pages used by the context */ -#define KBASE_TLSTREAM_AUX_PAGESALLOC(ctx_nr, page_count) \ - __TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count) +#if !defined(KBASE_TLSTREAM_AUX_PAGESALLOC) +#define KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, ctx_nr, page_count) \ + __TRACE_IF_ENABLED(aux_pagesalloc, kbdev, ctx_nr, page_count) +#endif /** * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - timeline message: new target DVFS * frequency - * @target_freq: new target frequency + * @kbdev: Kbase device + * @target_freq: New target frequency */ -#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(target_freq) \ - __TRACE_IF_ENABLED(aux_devfreq_target, target_freq) +#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, target_freq) \ + __TRACE_IF_ENABLED(aux_devfreq_target, kbdev, target_freq) /** * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - The GPU has started transitioning * to protected mode - * @gpu: name of the GPU object + * @kbdev: Kbase device + * @gpu: Name of the GPU object * * Function emits a timeline message indicating the GPU is starting to * transition to protected mode. */ -#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(gpu) \ - __TRACE_IF_ENABLED_LATENCY(aux_protected_enter_start, gpu) +#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, gpu) \ + __TRACE_IF_ENABLED_LATENCY(aux_protected_enter_start, kbdev, gpu) /** * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - The GPU has finished transitioning * to protected mode - * @gpu: name of the GPU object + * @kbdev: Kbase device + * @gpu: Name of the GPU object * * Function emits a timeline message indicating the GPU has finished * transitioning to protected mode. */ -#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(gpu) \ - __TRACE_IF_ENABLED_LATENCY(aux_protected_enter_end, gpu) +#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, gpu) \ + __TRACE_IF_ENABLED_LATENCY(aux_protected_enter_end, kbdev, gpu) /** * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - The GPU has started transitioning * to non-protected mode - * @gpu: name of the GPU object + * @kbdev: Kbase device + * @gpu: Name of the GPU object * * Function emits a timeline message indicating the GPU is starting to * transition to non-protected mode. */ -#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(gpu) \ - __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_start, gpu) +#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev, gpu) \ + __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_start, kbdev, gpu) /** * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - The GPU has finished transitioning * to non-protected mode - * @gpu: name of the GPU object + * @kbdev: Kbase device + * @gpu: Name of the GPU object * * Function emits a timeline message indicating the GPU has finished * transitioning to non-protected mode. */ -#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \ - __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu) +#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev, gpu) \ + __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, kbdev, gpu) /** * KBASE_TLSTREAM_AUX_JIT_STATS - JIT allocations per bin statistics * - * @ctx_nr: kernel context number - * @bid: JIT bin id - * @max_allocs: maximum allocations allowed in this bin. - * UINT_MAX is a special value. It denotes that - * the parameter was not changed since the last time. - * @allocs: number of active allocations in this bin - * @va_pages: number of virtual pages allocated in this bin - * @ph_pages: number of physical pages allocated in this bin + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @bid: JIT bin id + * @max_allocs: Maximum allocations allowed in this bin. + * UINT_MAX is a special value. It denotes that + * the parameter was not changed since the last time. + * @allocs: Number of active allocations in this bin + * @va_pages: Number of virtual pages allocated in this bin + * @ph_pages: Number of physical pages allocated in this bin * * Function emits a timeline message indicating the JIT statistics * for a given bin have chaned. */ -#define KBASE_TLSTREAM_AUX_JIT_STATS(ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages) \ - __TRACE_IF_ENABLED(aux_jit_stats, ctx_nr, bid, \ +#define KBASE_TLSTREAM_AUX_JIT_STATS(kbdev, ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages) \ + __TRACE_IF_ENABLED(aux_jit_stats, kbdev, ctx_nr, bid, \ max_allocs, allocs, \ va_pages, ph_pages) -#endif /* _KBASE_TLSTREAM_H */ +/** + * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT - An event has happened on a job slot + * + * @kbdev: Kbase device + * @context: Kernel context pointer, NULL if event is not TL_JS_EVENT_START + * @slot_nr: Job slot number + * @atom_nr: Sequential number of an atom which has started + * execution on the job slot. Zero, if event is not TL_JS_EVENT_START. + * @event: Event type. One of TL_JS_EVENT values. + */ +#if !defined(KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT) +#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, context, slot_nr, atom_nr, event) \ + __TRACE_IF_ENABLED(aux_event_job_slot, kbdev, context, slot_nr, atom_nr, event) +#endif +#endif /* _KBASE_TLSTREAM_H */ diff --git a/mali_kbase/mali_linux_trace.h b/mali_kbase/mali_linux_trace.h index 0741dfc..96296ac 100644 --- a/mali_kbase/mali_linux_trace.h +++ b/mali_kbase/mali_linux_trace.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,159 +32,103 @@ #define MALI_JOB_SLOTS_EVENT_CHANGED /** - * mali_job_slots_event - called from mali_kbase_core_linux.c - * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro. + * mali_job_slots_event - Reports change of job slot status. + * @gpu_id: Kbase device id + * @event_id: ORed together bitfields representing a type of event, + * made with the GATOR_MAKE_EVENT() macro. */ TRACE_EVENT(mali_job_slots_event, - TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, - unsigned char job_id), - TP_ARGS(event_id, tgid, pid, job_id), + TP_PROTO(u32 gpu_id, u32 event_id, u32 tgid, u32 pid, + u8 job_id), + TP_ARGS(gpu_id, event_id, tgid, pid, job_id), TP_STRUCT__entry( - __field(unsigned int, event_id) - __field(unsigned int, tgid) - __field(unsigned int, pid) - __field(unsigned char, job_id) + __field(u32, gpu_id) + __field(u32, event_id) + __field(u32, tgid) + __field(u32, pid) + __field(u8, job_id) ), TP_fast_assign( + __entry->gpu_id = gpu_id; __entry->event_id = event_id; - __entry->tgid = tgid; - __entry->pid = pid; - __entry->job_id = job_id; + __entry->tgid = tgid; + __entry->pid = pid; + __entry->job_id = job_id; ), - TP_printk("event=%u tgid=%u pid=%u job_id=%u", - __entry->event_id, __entry->tgid, __entry->pid, __entry->job_id) + TP_printk("gpu=%u event=%u tgid=%u pid=%u job_id=%u", + __entry->gpu_id, __entry->event_id, + __entry->tgid, __entry->pid, __entry->job_id) ); /** - * mali_pm_status - Called by mali_kbase_pm_driver.c - * @event_id: core type (shader, tiler, l2 cache) - * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF) + * mali_pm_status - Reports change of power management status. + * @gpu_id: Kbase device id + * @event_id: Core type (shader, tiler, L2 cache) + * @value: 64bits bitmask reporting either power status of + * the cores (1-ON, 0-OFF) */ TRACE_EVENT(mali_pm_status, - TP_PROTO(unsigned int event_id, unsigned long long value), - TP_ARGS(event_id, value), + TP_PROTO(u32 gpu_id, u32 event_id, u64 value), + TP_ARGS(gpu_id, event_id, value), TP_STRUCT__entry( - __field(unsigned int, event_id) - __field(unsigned long long, value) + __field(u32, gpu_id) + __field(u32, event_id) + __field(u64, value) ), TP_fast_assign( + __entry->gpu_id = gpu_id; __entry->event_id = event_id; - __entry->value = value; + __entry->value = value; ), - TP_printk("event %u = %llu", __entry->event_id, __entry->value) + TP_printk("gpu=%u event %u = %llu", + __entry->gpu_id, __entry->event_id, __entry->value) ); /** - * mali_pm_power_on - Called by mali_kbase_pm_driver.c - * @event_id: core type (shader, tiler, l2 cache) - * @value: 64bits bitmask reporting the cores to power up - */ -TRACE_EVENT(mali_pm_power_on, - TP_PROTO(unsigned int event_id, unsigned long long value), - TP_ARGS(event_id, value), - TP_STRUCT__entry( - __field(unsigned int, event_id) - __field(unsigned long long, value) - ), - TP_fast_assign( - __entry->event_id = event_id; - __entry->value = value; - ), - TP_printk("event %u = %llu", __entry->event_id, __entry->value) -); - -/** - * mali_pm_power_off - Called by mali_kbase_pm_driver.c - * @event_id: core type (shader, tiler, l2 cache) - * @value: 64bits bitmask reporting the cores to power down - */ -TRACE_EVENT(mali_pm_power_off, - TP_PROTO(unsigned int event_id, unsigned long long value), - TP_ARGS(event_id, value), - TP_STRUCT__entry( - __field(unsigned int, event_id) - __field(unsigned long long, value) - ), - TP_fast_assign( - __entry->event_id = event_id; - __entry->value = value; - ), - TP_printk("event %u = %llu", __entry->event_id, __entry->value) -); - -/** - * mali_page_fault_insert_pages - Called by page_fault_worker() - * it reports an MMU page fault resulting in new pages being mapped. - * @event_id: MMU address space number. - * @value: number of newly allocated pages + * mali_page_fault_insert_pages - Reports an MMU page fault + * resulting in new pages being mapped. + * @gpu_id: Kbase device id + * @event_id: MMU address space number + * @value: Number of newly allocated pages */ TRACE_EVENT(mali_page_fault_insert_pages, - TP_PROTO(int event_id, unsigned long value), - TP_ARGS(event_id, value), - TP_STRUCT__entry( - __field(int, event_id) - __field(unsigned long, value) - ), - TP_fast_assign( - __entry->event_id = event_id; - __entry->value = value; - ), - TP_printk("event %d = %lu", __entry->event_id, __entry->value) -); - -/** - * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space() - * it reports that a certain MMU address space is in use now. - * @event_id: MMU address space number. - */ -TRACE_EVENT(mali_mmu_as_in_use, - TP_PROTO(int event_id), - TP_ARGS(event_id), - TP_STRUCT__entry( - __field(int, event_id) - ), - TP_fast_assign( - __entry->event_id = event_id; - ), - TP_printk("event=%d", __entry->event_id) -); - -/** - * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal() - * it reports that a certain MMU address space has been released now. - * @event_id: MMU address space number. - */ -TRACE_EVENT(mali_mmu_as_released, - TP_PROTO(int event_id), - TP_ARGS(event_id), + TP_PROTO(u32 gpu_id, s32 event_id, u64 value), + TP_ARGS(gpu_id, event_id, value), TP_STRUCT__entry( - __field(int, event_id) + __field(u32, gpu_id) + __field(s32, event_id) + __field(u64, value) ), TP_fast_assign( + __entry->gpu_id = gpu_id; __entry->event_id = event_id; + __entry->value = value; ), - TP_printk("event=%d", __entry->event_id) + TP_printk("gpu=%u event %d = %llu", + __entry->gpu_id, __entry->event_id, __entry->value) ); /** - * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages() - * and by kbase_atomic_sub_pages() - * it reports that the total number of allocated pages is changed. - * @event_id: number of pages to be added or subtracted (according to the sign). + * mali_total_alloc_pages_change - Reports that the total number of + * allocated pages has changed. + * @gpu_id: Kbase device id + * @event_id: Total number of pages allocated */ TRACE_EVENT(mali_total_alloc_pages_change, - TP_PROTO(long long int event_id), - TP_ARGS(event_id), + TP_PROTO(u32 gpu_id, s64 event_id), + TP_ARGS(gpu_id, event_id), TP_STRUCT__entry( - __field(long long int, event_id) + __field(u32, gpu_id) + __field(s64, event_id) ), TP_fast_assign( + __entry->gpu_id = gpu_id; __entry->event_id = event_id; ), - TP_printk("event=%lld", __entry->event_id) + TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id) ); -#endif /* _TRACE_MALI_H */ +#endif /* _TRACE_MALI_H */ #undef TRACE_INCLUDE_PATH #undef linux diff --git a/mali_kbase/mali_midg_regmap.h b/mali_kbase/mali_midg_regmap.h index 0f03e8d..223ce6a 100644 --- a/mali_kbase/mali_midg_regmap.h +++ b/mali_kbase/mali_midg_regmap.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,269 +25,178 @@ #include "mali_midg_coherency.h" #include "mali_kbase_gpu_id.h" +#include "mali_midg_regmap_jm.h" -/* - * Begin Register Offsets - */ +/* Begin Register Offsets */ +/* GPU control registers */ #define GPU_CONTROL_BASE 0x0000 #define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) -#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ -#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ -#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ -#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ -#define MEM_FEATURES 0x010 /* (RO) Memory system features */ -#define MMU_FEATURES 0x014 /* (RO) MMU features */ -#define AS_PRESENT 0x018 /* (RO) Address space slots present */ -#define JS_PRESENT 0x01C /* (RO) Job slots present */ -#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ -#define GPU_IRQ_CLEAR 0x024 /* (WO) */ -#define GPU_IRQ_MASK 0x028 /* (RW) */ -#define GPU_IRQ_STATUS 0x02C /* (RO) */ - - -/* IRQ flags */ -#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ -#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. Intended to use with SOFT_RESET - commands which may take time. */ -#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down - and the power manager is idle. */ +#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ +#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ +#define MEM_FEATURES 0x010 /* (RO) Memory system features */ +#define MMU_FEATURES 0x014 /* (RO) MMU features */ +#define AS_PRESENT 0x018 /* (RO) Address space slots present */ +#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ +#define GPU_IRQ_CLEAR 0x024 /* (WO) */ +#define GPU_IRQ_MASK 0x028 /* (RW) */ +#define GPU_IRQ_STATUS 0x02C /* (RO) */ + +#define GPU_COMMAND 0x030 /* (WO) */ +#define GPU_STATUS 0x034 /* (RO) */ + +#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ + +#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ +#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ +#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ + +#define PWR_KEY 0x050 /* (WO) Power manager key register */ +#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ +#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ + +#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory region base address, low word */ +#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory region base address, high word */ +#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */ +#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable flags for Job Manager */ +#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable flags for shader cores */ +#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable flags for tiler */ +#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable flags for MMU/L2 cache */ + +#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ +#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ +#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ +#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ + +#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ +#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ + +#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ +#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ +#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ +#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ -#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ -#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ +#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) -#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ - | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) - -#define GPU_COMMAND 0x030 /* (WO) */ -#define GPU_STATUS 0x034 /* (RO) */ -#define LATEST_FLUSH 0x038 /* (RO) */ - -#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ -#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ - -#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ -#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ -#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ - -#define PWR_KEY 0x050 /* (WO) Power manager key register */ -#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ -#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ - -#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory region base address, low word */ -#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory region base address, high word */ -#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */ -#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable flags for Job Manager */ -#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable flags for shader cores */ -#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable flags for tiler */ -#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable flags for MMU/L2 cache */ - -#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ -#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ -#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ -#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ - -#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ -#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ -#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ -#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ -#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that - * TLS must be allocated for - */ - -#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ -#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ -#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ -#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ +#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ -#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) +#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ -#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ -#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ -#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ -#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ -#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ -#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ -#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ -#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ -#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ -#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ -#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ -#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ -#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ -#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ -#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ -#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ - -#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) - -#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ -#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ - -#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ -#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ - -#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ -#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ +#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ #define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ #define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ -#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ -#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ -#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ -#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ - -#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ -#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ #define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ #define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ -#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ -#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ - -#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ -#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ -#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ -#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ #define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ #define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ +#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ -#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ -#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ +#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ -#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ -#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ - -#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ -#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ +#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ #define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ #define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ +#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ -#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ -#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ - -#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ -#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ +#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ -#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ -#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ +#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ #define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ #define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ +#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ -#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ -#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ +#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ -#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ -#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ +#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ -#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ -#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ +#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ -#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ +#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ +#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ +#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ -#define JM_CONFIG 0xF00 /* (RW) Job Manager configuration register (Implementation specific register) */ -#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ -#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration settings (Implementation specific register) */ -#define L2_MMU_CONFIG 0xF0C /* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */ +/* Job control registers */ #define JOB_CONTROL_BASE 0x1000 #define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) -#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ -#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ -#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ -#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ -#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ -#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ - -/* JOB IRQ flags */ -#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */ - -#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ -#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ -#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ -#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ -#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ -#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ -#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ -#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ -#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ -#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ -#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ -#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ -#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ -#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ -#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ -#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ - -#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) - -#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ -#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ -#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ -#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ -#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ -#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ -#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ -#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job - slot n */ - -#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ -#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ - -#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ -#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ - -#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ -#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ -#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ -#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for - job slot n */ - -#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ - -#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ +#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ +#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ + +/* MMU control registers */ #define MEMORY_MANAGEMENT_BASE 0x2000 #define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) -#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ -#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ -#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ -#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ - -#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ -#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ -#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ -#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ -#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ -#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ -#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ -#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ -#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ -#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ -#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ -#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ -#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ -#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ -#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ -#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ +#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ + +#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ +#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ +#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ +#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ +#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ +#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ +#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ +#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ +#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ +#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ +#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ +#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ +#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ +#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ +#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ +#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ + +/* MMU address space control registers */ #define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) @@ -303,7 +212,6 @@ #define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ #define AS_STATUS 0x28 /* (RO) Status flags for address space n */ - /* (RW) Translation table configuration for address space n, low word */ #define AS_TRANSCFG_LO 0x30 /* (RW) Translation table configuration for address space n, high word */ @@ -315,17 +223,30 @@ /* End Register Offsets */ +/* IRQ flags */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ + +#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ + +#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ + | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) + /* * MMU_IRQ_RAWSTAT register values. Values are valid also for - MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. + * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. */ -#define MMU_PAGE_FAULT_FLAGS 16 +#define MMU_PAGE_FAULT_FLAGS 16 /* Macros returning a bitmask to retrieve page fault or bus error flags from * MMU registers */ -#define MMU_PAGE_FAULT(n) (1UL << (n)) -#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) +#define MMU_PAGE_FAULT(n) (1UL << (n)) +#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) /* * Begin LPAE MMU TRANSTAB register values @@ -350,25 +271,23 @@ */ #define AS_STATUS_AS_ACTIVE 0x01 -#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) - -#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) -#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) +#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0<<8) +#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) +#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) +#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) /* * Begin MMU TRANSCFG register values */ - #define AS_TRANSCFG_ADRMODE_LEGACY 0 #define AS_TRANSCFG_ADRMODE_UNMAPPED 1 #define AS_TRANSCFG_ADRMODE_IDENTITY 2 @@ -377,7 +296,6 @@ #define AS_TRANSCFG_ADRMODE_MASK 0xF - /* * Begin TRANSCFG register values */ @@ -389,22 +307,11 @@ #define AS_TRANSCFG_PTW_SH_OS (2ull << 28) #define AS_TRANSCFG_PTW_SH_IS (3ull << 28) #define AS_TRANSCFG_R_ALLOCATE (1ull << 30) + /* * Begin Command Values */ -/* JS_COMMAND register commands */ -#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ -#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ -#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ -#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ -#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ -#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ -#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ -#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ - -#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ - /* AS_COMMAND register commands */ #define AS_COMMAND_NOP 0x00 /* NOP Operation */ #define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ @@ -416,90 +323,39 @@ #define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then flush all L2 caches then issue a flush region command to all MMUs */ -/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ -#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) -#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) -#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) -#define JS_CONFIG_START_MMU (1u << 10) -#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) -#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION -#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) -#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) -#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) -#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) -#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) - -/* JS_XAFFINITY register values */ -#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) -#define JS_XAFFINITY_TILER_ENABLE (1u << 8) -#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) - -/* JS_STATUS register values */ - -/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. - * The values are separated to avoid dependency of userspace and kernel code. - */ - -/* Group of values representing the job status insead a particular fault */ -#define JS_STATUS_NO_EXCEPTION_BASE 0x00 -#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ -#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ -#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ - -/* General fault values */ -#define JS_STATUS_FAULT_BASE 0x40 -#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ -#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ -#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ -#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ -#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ -#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ - -/* Instruction or data faults */ -#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 -#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ -#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ -#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ -#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ -#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ -#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ -#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ -/* NOTE: No fault with 0x57 code defined in spec. */ -#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ -#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ -#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ - -/* Other faults */ -#define JS_STATUS_MEMORY_FAULT_BASE 0x60 -#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ -#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ - /* GPU_COMMAND values */ -#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ -#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ -#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ -#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ -#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ -#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ -#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ -#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ +#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ +#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ +#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ +#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ +#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ +#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ +#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ +#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ /* End Command Values */ /* GPU_STATUS values */ -#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ -#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ +#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ /* PRFCNT_CONFIG register values */ -#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ -#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ -#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ - -#define PRFCNT_CONFIG_MODE_OFF 0 /* The performance counters are disabled. */ -#define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */ -#define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */ +#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ +#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + +/* The performance counters are disabled. */ +#define PRFCNT_CONFIG_MODE_OFF 0 +/* The performance counters are enabled, but are only written out when a + * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. + */ +#define PRFCNT_CONFIG_MODE_MANUAL 1 +/* The performance counters are enabled, and are written out each time a tile + * finishes rendering. + */ +#define PRFCNT_CONFIG_MODE_TILE 2 /* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */ /* Use GPU implementation-defined caching policy. */ @@ -554,20 +410,6 @@ /* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ #define AS_MEMATTR_INDEX_NON_CACHEABLE 5 -/* JS<n>_FEATURES register */ - -#define JS_FEATURE_NULL_JOB (1u << 1) -#define JS_FEATURE_SET_VALUE_JOB (1u << 2) -#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) -#define JS_FEATURE_COMPUTE_JOB (1u << 4) -#define JS_FEATURE_VERTEX_JOB (1u << 5) -#define JS_FEATURE_GEOMETRY_JOB (1u << 6) -#define JS_FEATURE_TILER_JOB (1u << 7) -#define JS_FEATURE_FUSED_JOB (1u << 8) -#define JS_FEATURE_FRAGMENT_JOB (1u << 9) - -/* End JS<n>_FEATURES register */ - /* L2_MMU_CONFIG register */ #define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) #define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) @@ -610,7 +452,6 @@ /* End THREAD_* registers */ /* SHADER_CONFIG register */ - #define SC_ALT_COUNTERS (1ul << 3) #define SC_OVERRIDE_FWD_PIXEL_KILL (1ul << 4) #define SC_SDC_DISABLE_OQ_DISCARD (1ul << 6) @@ -622,22 +463,7 @@ /* End SHADER_CONFIG register */ /* TILER_CONFIG register */ - #define TC_CLOCK_GATE_OVERRIDE (1ul << 0) - /* End TILER_CONFIG register */ -/* JM_CONFIG register */ - -#define JM_TIMESTAMP_OVERRIDE (1ul << 0) -#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) -#define JM_JOB_THROTTLE_ENABLE (1ul << 2) -#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) -#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) -#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) -#define JM_IDVS_GROUP_SIZE_SHIFT (16) -#define JM_MAX_IDVS_GROUP_SIZE (0x3F) -/* End JM_CONFIG register */ - - #endif /* _MIDGARD_REGMAP_H_ */ diff --git a/mali_kbase/mali_midg_regmap_jm.h b/mali_kbase/mali_midg_regmap_jm.h new file mode 100644 index 0000000..f03c8a6 --- /dev/null +++ b/mali_kbase/mali_midg_regmap_jm.h @@ -0,0 +1,198 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _MIDGARD_REGMAP_JM_H_ +#define _MIDGARD_REGMAP_JM_H_ + +/* GPU control registers */ + +#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ +#define JS_PRESENT 0x01C /* (RO) Job slots present */ +#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest clean-and-invalidate operation */ +#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ + +#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ +#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ +#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ +#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ +#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ +#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ +#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ +#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ +#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ +#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ +#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ +#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ +#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ +#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ +#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ +#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ + +#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) + +#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */ + +/* Job control registers */ + +#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ +#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ + +#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ +#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ +#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ +#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ +#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ +#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ +#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ +#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ +#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ +#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ +#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ +#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ +#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ +#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ +#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ +#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ + +#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) + +#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ +#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ +#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ +#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ +#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ +#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ +#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ +#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job + slot n */ + +#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ +#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ + +#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ +#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ + +#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ +#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ +#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ +#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for + job slot n */ + +#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ + +#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ + +/* No JM-specific MMU control registers */ +/* No JM-specific MMU address space control registers */ + +/* JS_COMMAND register commands */ +#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ +#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ +#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ +#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ +#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ +#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + +#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ + +/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ +#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) +#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) +#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) +#define JS_CONFIG_START_MMU (1u << 10) +#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) +#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION +#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) +#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) +#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) +#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) + +/* JS_XAFFINITY register values */ +#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) +#define JS_XAFFINITY_TILER_ENABLE (1u << 8) +#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) + +/* JS_STATUS register values */ + +/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. + * The values are separated to avoid dependency of userspace and kernel code. + */ + +/* Group of values representing the job status insead a particular fault */ +#define JS_STATUS_NO_EXCEPTION_BASE 0x00 +#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ +#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ +#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ + +/* General fault values */ +#define JS_STATUS_FAULT_BASE 0x40 +#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ +#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ +#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ +#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ +#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ +#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ + +/* Instruction or data faults */ +#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 +#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ +#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ +#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ +#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ +#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ +#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ +#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ +/* NOTE: No fault with 0x57 code defined in spec. */ +#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ +#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ +#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ + +/* Other faults */ +#define JS_STATUS_MEMORY_FAULT_BASE 0x60 +#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ +#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ + +/* JS<n>_FEATURES register */ +#define JS_FEATURE_NULL_JOB (1u << 1) +#define JS_FEATURE_SET_VALUE_JOB (1u << 2) +#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) +#define JS_FEATURE_COMPUTE_JOB (1u << 4) +#define JS_FEATURE_VERTEX_JOB (1u << 5) +#define JS_FEATURE_GEOMETRY_JOB (1u << 6) +#define JS_FEATURE_TILER_JOB (1u << 7) +#define JS_FEATURE_FUSED_JOB (1u << 8) +#define JS_FEATURE_FRAGMENT_JOB (1u << 9) + +/* JM_CONFIG register */ +#define JM_TIMESTAMP_OVERRIDE (1ul << 0) +#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) +#define JM_JOB_THROTTLE_ENABLE (1ul << 2) +#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) +#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) +#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) +#define JM_IDVS_GROUP_SIZE_SHIFT (16) +#define JM_MAX_IDVS_GROUP_SIZE (0x3F) + +#endif /* _MIDGARD_REGMAP_JM_H_ */ diff --git a/mali_kbase/sconscript b/mali_kbase/sconscript deleted file mode 100644 index f9d9c1b..0000000 --- a/mali_kbase/sconscript +++ /dev/null @@ -1,66 +0,0 @@ -# -# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -import sys -Import('env') - -SConscript( 'tests/sconscript' ) - -mock_test = 0 - -# Source files required for kbase. -kbase_src = [ - Glob('*.c'), - Glob('backend/*/*.c'), - Glob('internal/*/*.c'), - Glob('ipa/*.c'), - Glob('platform/%s/*.c' % env['platform_config']), - Glob('thirdparty/*.c'), -] - -if env['platform_config']=='juno_soc': - kbase_src += [Glob('platform/devicetree/*.c')] -else: - kbase_src += [Glob('platform/%s/*.c' % env['platform_config'])] - -if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1': - kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')] - mock_test = 1 - -make_args = env.kernel_get_config_defines(ret_list = True) + [ - 'PLATFORM=%s' % env['platform'], - 'MALI_KERNEL_TEST_API=%s' % env['debug'], - 'MALI_UNIT_TEST=%s' % env['unit'], - 'MALI_RELEASE_NAME=%s' % env['mali_release_name'], - 'MALI_CUSTOMER_RELEASE=%s' % env['release'], - 'MALI_USE_CSF=%s' % env['csf'], - 'MALI_COVERAGE=%s' % env['coverage'], -] - -kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, - make_args = make_args) - -if 'smc_protected_mode_switcher' in env: - env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/smc_protected_mode_switcher.ko') - -env.KernelObjTarget('kbase', kbase) - -env.AppendUnique(BASE=['cutils_linked_list']) diff --git a/mali_kbase/tests/kutf/sconscript b/mali_kbase/tests/kutf/sconscript deleted file mode 100644 index 4590d1a..0000000 --- a/mali_kbase/tests/kutf/sconscript +++ /dev/null @@ -1,27 +0,0 @@ -# -# (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -Import('kutf_env') - -make_args = kutf_env.kernel_get_config_defines(ret_list = True, extra_cflags = ['-DCONFIG_MALI_KUTF'], extra_configs = ['CONFIG_MALI_KUTF=m']) - -mod = kutf_env.BuildKernelModule('$STATIC_LIB_PATH/kutf.ko', Glob('*.c'), make_args = make_args) -kutf_env.KernelObjTarget('kutf', mod) diff --git a/mali_kbase/tests/mali_kutf_irq_test/sconscript b/mali_kbase/tests/mali_kutf_irq_test/sconscript deleted file mode 100644 index cefac0b..0000000 --- a/mali_kbase/tests/mali_kutf_irq_test/sconscript +++ /dev/null @@ -1,36 +0,0 @@ -# -# (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -import os -Import('env') - -src = [Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/*.c'), Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile')] - -if env.GetOption('clean') : - env.Execute(Action("make clean", '[CLEAN] mali_kutf_irq_test')) - cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, []) - env.KernelObjTarget('mali_kutf_irq_test', cmd) -else: - makeAction=Action("cd ${SOURCE.dir} && make MALI_UNIT_TEST=${unit} MALI_CUSTOMER_RELEASE=${release} MALI_USE_CSF=${csf} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(extra_cflags = ['-DCONFIG_MALI_IRQ_LATENCY'], extra_configs = ['CONFIG_MALI_IRQ_LATENCY=m']), '$MAKECOMSTR') - cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [makeAction]) - env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/kutf.ko') - env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/mali_kbase.ko') - env.KernelObjTarget('mali_kutf_irq_test', cmd) diff --git a/mali_kbase/tests/sconscript b/mali_kbase/tests/sconscript deleted file mode 100644 index ca64e83..0000000 --- a/mali_kbase/tests/sconscript +++ /dev/null @@ -1,43 +0,0 @@ -# -# (C) COPYRIGHT 2010-2011, 2013, 2017-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -Import ('env') - -kutf_env = env.Clone() -kutf_env.Append(CPPPATH = '#kernel/drivers/gpu/arm/midgard/tests/include') -Export('kutf_env') - -if Glob('internal/sconscript'): - SConscript('internal/sconscript') - -if kutf_env['debug'] == '1': - SConscript('kutf/sconscript') - SConscript('mali_kutf_irq_test/sconscript') - - if Glob('kutf_test/sconscript'): - SConscript('kutf_test/sconscript') - - if Glob('kutf_test_runner/sconscript'): - SConscript('kutf_test_runner/sconscript') - -if env['unit'] == '1': - SConscript('mali_kutf_ipa_unit_test/sconscript') - SConscript('mali_kutf_vinstr_test/sconscript') diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c index 3aab51a..9cb0465 100644 --- a/mali_kbase/thirdparty/mali_kbase_mmap.c +++ b/mali_kbase/thirdparty/mali_kbase_mmap.c @@ -303,12 +303,15 @@ unsigned long kbase_get_unmapped_area(struct file *filp, if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) && (PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) { int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); - struct kbase_va_region *reg = - kctx->pending_regions[cookie]; + struct kbase_va_region *reg; - if (!reg) + /* Need to hold gpu vm lock when using reg */ + kbase_gpu_vm_lock(kctx); + reg = kctx->pending_regions[cookie]; + if (!reg) { + kbase_gpu_vm_unlock(kctx); return -EINVAL; - + } if (!(reg->flags & KBASE_REG_GPU_NX)) { if (cpu_va_bits > gpu_pc_bits) { align_offset = 1ULL << gpu_pc_bits; @@ -331,6 +334,7 @@ unsigned long kbase_get_unmapped_area(struct file *filp, } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) { is_same_4gb_page = true; } + kbase_gpu_vm_unlock(kctx); #ifndef CONFIG_64BIT } else { return current->mm->get_unmapped_area(filp, addr, len, pgoff, |