diff options
author | Sidath Senanayake <sidaths@google.com> | 2020-04-14 14:55:25 +0200 |
---|---|---|
committer | Sidath Senanayake <sidaths@google.com> | 2020-04-14 14:55:25 +0200 |
commit | b64f568f943e567534694cc993270adca96dcd06 (patch) | |
tree | ff175812d02016e7e630217ecf8de53d6ac2d6a2 /mali_kbase | |
parent | b2b1764ee0fe59773c1c8f621ad2955c35cd9d92 (diff) | |
download | gpu-b64f568f943e567534694cc993270adca96dcd06.tar.gz |
Mali Valhall DDK r24p0 KMD
Provenance:
c5a37f014 (collaborate/EAC/v_r24p0)
VX504X08X-BU-00000-r24p0-01rel0 - Android DDK
VX504X08X-BU-60000-r24p0-01rel0 - Android Document Bundle
Signed-off-by: Sidath Senanayake <sidaths@google.com>
Change-Id: I1536474b6a18731cd377251c6dc947811ba0c787
Diffstat (limited to 'mali_kbase')
81 files changed, 6856 insertions, 3884 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index 400ebe0..7abe8d3 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -21,7 +21,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r23p0-01rel0" +MALI_RELEASE_NAME ?= "r24p0-01rel0" # Paths required for build KBASE_PATH = $(src) @@ -34,8 +34,15 @@ MALI_USE_CSF ?= 0 MALI_UNIT_TEST ?= 0 MALI_KERNEL_TEST_API ?= 0 MALI_COVERAGE ?= 0 -MALI_CS_EXPERIMENTAL ?= 0 CONFIG_MALI_PLATFORM_NAME ?= "devicetree" +# Experimental features (corresponding -D definition should be appended to +# DEFINES below, e.g. for MALI_EXPERIMENTAL_FEATURE, +# -DMALI_EXPERIMENTAL_FEATURE=$(MALI_EXPERIMENTAL_FEATURE) should be appended) +# +# Experimental features must default to disabled, e.g.: +# MALI_EXPERIMENTAL_FEATURE ?= 0 +MALI_JIT_PRESSURE_LIMIT ?= 0 +MALI_INCREMENTAL_RENDERING ?= 0 # Set up our defines, which will be passed to gcc DEFINES = \ @@ -45,7 +52,8 @@ DEFINES = \ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ -DMALI_COVERAGE=$(MALI_COVERAGE) \ -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ - -DMALI_CS_EXPERIMENTAL=$(MALI_CS_EXPERIMENTAL) + -DMALI_JIT_PRESSURE_LIMIT=$(MALI_JIT_PRESSURE_LIMIT) \ + -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) ifeq ($(KBUILD_EXTMOD),) # in-tree @@ -74,7 +82,6 @@ SRC := \ mali_kbase_jm.c \ mali_kbase_gpuprops.c \ mali_kbase_js.c \ - mali_kbase_event.c \ mali_kbase_pm.c \ mali_kbase_config.c \ mali_kbase_vinstr.c \ @@ -85,8 +92,6 @@ SRC := \ mali_kbase_hwcnt_types.c \ mali_kbase_hwcnt_virtualizer.c \ mali_kbase_softjobs.c \ - mali_kbase_10969_workaround.c \ - mali_kbase_dummy_job_wa.c \ mali_kbase_hw.c \ mali_kbase_debug.c \ mali_kbase_gpu_memory_debugfs.c \ @@ -122,7 +127,9 @@ ifeq ($(MALI_USE_CSF),1) context/backend/mali_kbase_context_csf.c else SRC += \ + mali_kbase_dummy_job_wa.c \ mali_kbase_debug_job_fault.c \ + mali_kbase_event.c \ mali_kbase_jd.c \ mali_kbase_jd_debugfs.c \ mali_kbase_js_ctx_attr.c \ diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig index a739363..a46305d 100644 --- a/mali_kbase/Kconfig +++ b/mali_kbase/Kconfig @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -265,5 +265,20 @@ config MALI_PRFCNT_SET_SECONDARY If unsure, say N. +config MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + bool "Use secondary set of performance counters" + depends on MALI_MIDGARD && MALI_EXPERT && !MALI_PRFCNT_SET_SECONDARY && DEBUG_FS + default n + help + Select this option to make the secondary set of performance counters + available at runtime via debugfs. Kernel features that depend on an + access to the primary set of counters may become unavailable. + + This feature is unsupported and unstable, and may break at any time. + Enabling this option will prevent power management from working + optimally and may cause instrumentation tools to return bogus results. + + If unsure, say N. + source "drivers/gpu/arm/midgard/platform/Kconfig" source "drivers/gpu/arm/midgard/tests/Kconfig" diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild index 8fe7aba..2449e80 100644 --- a/mali_kbase/backend/gpu/Kbuild +++ b/mali_kbase/backend/gpu/Kbuild @@ -25,7 +25,6 @@ BACKEND += \ backend/gpu/mali_kbase_gpuprops_backend.c \ backend/gpu/mali_kbase_irq_linux.c \ backend/gpu/mali_kbase_instr_backend.c \ - backend/gpu/mali_kbase_jm_as.c \ backend/gpu/mali_kbase_js_backend.c \ backend/gpu/mali_kbase_pm_backend.c \ backend/gpu/mali_kbase_pm_driver.c \ @@ -41,6 +40,7 @@ ifeq ($(MALI_USE_CSF),1) # empty else BACKEND += \ + backend/gpu/mali_kbase_jm_as.c \ backend/gpu/mali_kbase_debug_job_fault_backend.c \ backend/gpu/mali_kbase_jm_hw.c \ backend/gpu/mali_kbase_jm_rb.c diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index e0c108c..2806f05 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/backend/gpu/mali_kbase_device_internal.h b/mali_kbase/backend/gpu/mali_kbase_device_internal.h index c3e5c03..5ddc4a5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_device_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_device_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014,2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014,2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -90,7 +90,7 @@ void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); * Return: 0 if successful or a negative error code on failure. */ int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, - unsigned int wait_timeout_ms); + unsigned int wait_timeout_ms); /** * kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index 724c664..cb3e1d3 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,7 +71,11 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, /* Configure */ prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + if (kbdev->hwcnt.backend.use_secondary_override) +#else if (enable->use_secondary) +#endif prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), @@ -380,6 +384,10 @@ int kbase_instr_backend_init(struct kbase_device *kbdev) kbdev->hwcnt.backend.triggered = 0; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + kbdev->hwcnt.backend.use_secondary_override = false; +#endif + kbdev->hwcnt.backend.cache_clean_wq = alloc_workqueue("Mali cache cleaning workqueue", 0, 1); if (NULL == kbdev->hwcnt.backend.cache_clean_wq) @@ -392,3 +400,12 @@ void kbase_instr_backend_term(struct kbase_device *kbdev) { destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); } + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_bool("hwcnt_use_secondary", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->hwcnt.backend.use_secondary_override); +} +#endif diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h index b7d9d31..9930968 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2016, 2018, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2018, 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,6 +47,9 @@ enum kbase_instr_state { struct kbase_instr_backend { wait_queue_head_t wait; int triggered; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + bool use_secondary_override; +#endif enum kbase_instr_state state; struct workqueue_struct *cache_clean_wq; diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 2692f05..819edaf 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -84,6 +84,17 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, kbdev->pm.debug_core_mask[js]; } + if (unlikely(!affinity)) { +#ifdef CONFIG_MALI_DEBUG + u64 shaders_ready = + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + + WARN_ON(!(shaders_ready & kbdev->pm.backend.shaders_avail)); +#endif + + affinity = kbdev->pm.backend.shaders_avail; + } + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), affinity & 0xFFFFFFFF); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), @@ -92,13 +103,86 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, return affinity; } +/** + * select_job_chain() - Select which job chain to submit to the GPU + * @katom: Pointer to the atom about to be submitted to the GPU + * + * Selects one of the fragment job chains attached to the special atom at the + * end of a renderpass, or returns the address of the single job chain attached + * to any other type of atom. + * + * Which job chain is selected depends upon whether the tiling phase of the + * renderpass completed normally or was soft-stopped because it used too + * much memory. It also depends upon whether one of the fragment job chains + * has already been run as part of the same renderpass. + * + * Return: GPU virtual address of the selected job chain + */ +static u64 select_job_chain(struct kbase_jd_atom *katom) +{ + struct kbase_context *const kctx = katom->kctx; + u64 jc = katom->jc; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) + return jc; + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + /* We can read a subset of renderpass state without holding + * higher-level locks (but not end_katom, for example). + * If the end-of-renderpass atom is running with as-yet indeterminate + * OOM state then assume that the start atom was not soft-stopped. + */ + switch (rp->state) { + case KBASE_JD_RP_OOM: + /* Tiling ran out of memory. + * Start of incremental rendering, used once. + */ + jc = katom->jc_fragment.norm_read_forced_write; + break; + case KBASE_JD_RP_START: + case KBASE_JD_RP_PEND_OOM: + /* Tiling completed successfully first time. + * Single-iteration rendering, used once. + */ + jc = katom->jc_fragment.norm_read_norm_write; + break; + case KBASE_JD_RP_RETRY_OOM: + /* Tiling ran out of memory again. + * Continuation of incremental rendering, used as + * many times as required. + */ + jc = katom->jc_fragment.forced_read_forced_write; + break; + case KBASE_JD_RP_RETRY: + case KBASE_JD_RP_RETRY_PEND_OOM: + /* Tiling completed successfully this time. + * End of incremental rendering, used once. + */ + jc = katom->jc_fragment.forced_read_norm_write; + break; + default: + WARN_ON(1); + break; + } + + dev_dbg(kctx->kbdev->dev, + "Selected job chain 0x%llx for end atom %p in state %d\n", + jc, (void *)katom, (int)rp->state); + + katom->jc = jc; + return jc; +} + void kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js) { struct kbase_context *kctx; u32 cfg; - u64 jc_head = katom->jc; + u64 const jc_head = select_job_chain(katom); u64 affinity; KBASE_DEBUG_ASSERT(kbdev); @@ -109,6 +193,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, /* Command register must be available */ KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); + dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n", + jc_head, (void *)katom); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), jc_head & 0xFFFFFFFF); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), @@ -139,7 +226,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, cfg |= JS_CONFIG_THREAD_PRI(8); - if (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) + if ((katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) || + (katom->core_req & BASE_JD_REQ_END_RENDERPASS)) cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; if (kbase_hw_has_feature(kbdev, @@ -492,7 +580,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, /* We are about to issue a soft stop, so mark the atom as having * been soft stopped */ - target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED; /* Mark the point where we issue the soft-stop command */ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom); @@ -656,6 +744,70 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, } } +static int softstop_start_rp_nolock( + struct kbase_context *kctx, struct kbase_va_region *reg) +{ + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_atom *katom; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom = kbase_gpu_inspect(kbdev, 1, 0); + + if (!katom) { + dev_dbg(kctx->kbdev->dev, "No atom on job slot\n"); + return -ESRCH; + } + + if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { + dev_dbg(kctx->kbdev->dev, + "Atom %p on job slot is not start RP\n", (void *)katom); + return -EPERM; + } + + if (WARN_ON(katom->renderpass_id >= + ARRAY_SIZE(kctx->jctx.renderpasses))) + return -EINVAL; + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + if (WARN_ON(rp->state != KBASE_JD_RP_START && + rp->state != KBASE_JD_RP_RETRY)) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n", + (int)rp->state, (void *)reg); + + if (WARN_ON(katom != rp->start_katom)) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n", + (void *)reg, (void *)&rp->oom_reg_list); + list_move_tail(®->link, &rp->oom_reg_list); + dev_dbg(kctx->kbdev->dev, "Added region to list\n"); + + rp->state = (rp->state == KBASE_JD_RP_START ? + KBASE_JD_RP_PEND_OOM : KBASE_JD_RP_RETRY_PEND_OOM); + + kbase_job_slot_softstop(kbdev, 1, katom); + + return 0; +} + +int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx, + struct kbase_va_region *const reg) +{ + struct kbase_device *const kbdev = kctx->kbdev; + int err; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + err = softstop_start_rp_nolock(kctx, reg); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return err; +} + void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; @@ -745,6 +897,9 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term); void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom, u32 sw_flags) { + dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n", + target_katom, sw_flags, js); + KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, JS_COMMAND_SOFT_STOP | sw_flags); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index 880a89b..d1ed42d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index c860bde..6daea01 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,7 +32,6 @@ #include <mali_kbase_js.h> #include <tl/mali_kbase_tracepoints.h> #include <mali_kbase_hwcnt_context.h> -#include <mali_kbase_10969_workaround.h> #include <mali_kbase_reset_gpu.h> #include <backend/gpu/mali_kbase_cache_policy_backend.h> #include <backend/gpu/mali_kbase_device_internal.h> @@ -832,8 +831,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) break; case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: - if (katom[idx]->atom_flags & - KBASE_KATOM_FLAG_X_DEP_BLOCKED) + if (kbase_js_atom_blocked_on_x_dep(katom[idx])) break; katom[idx]->gpu_rb_state = @@ -1007,6 +1005,8 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Backend running atom %p\n", (void *)katom); + kbase_gpu_enqueue_atom(kbdev, katom); kbase_backend_slot_update(kbdev); } @@ -1065,6 +1065,10 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); struct kbase_context *kctx = katom->kctx; + dev_dbg(kbdev->dev, + "Atom %p completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", + (void *)katom, completion_code, job_tail, js); + lockdep_assert_held(&kbdev->hwaccess_lock); /* @@ -1179,19 +1183,19 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, js, completion_code); if (job_tail != 0 && job_tail != katom->jc) { - bool was_updated = (job_tail != katom->jc); + /* Some of the job has been executed */ + dev_dbg(kbdev->dev, + "Update job chain address of atom %p to resume from 0x%llx\n", + (void *)katom, job_tail); - /* Some of the job has been executed, so we update the job chain - * address to where we should resume from */ katom->jc = job_tail; - if (was_updated) - KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, - katom, job_tail, js); + KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, + katom, job_tail, js); } /* Only update the event code for jobs that weren't cancelled */ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) - katom->event_code = (base_jd_event_code)completion_code; + katom->event_code = (enum base_jd_event_code)completion_code; /* Complete the job, and start new ones * @@ -1241,8 +1245,9 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, katom = kbase_jm_complete(kbdev, katom, end_timestamp); if (katom) { - /* Cross-slot dependency has now become runnable. Try to submit - * it. */ + dev_dbg(kbdev->dev, + "Cross-slot dependency %p has become runnable.\n", + (void *)katom); /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h index 15b1f86..f4bcf3e 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -301,9 +301,17 @@ union kbase_pm_policy_data { * @l2_always_on: If true, disable powering down of l2 cache. * @shaders_state: The current state of the shader state machine. * @shaders_avail: This is updated by the state machine when it is in a state - * where it can handle changes to the core availability. This - * is internal to the shader state machine and should *not* be - * modified elsewhere. + * where it can write to the SHADER_PWRON or PWROFF registers + * to have the same set of available cores as specified by + * @shaders_desired_mask. So it would eventually have the same + * value as @shaders_desired_mask and would precisely indicate + * the cores that are currently available. This is internal to + * shader state machine and should *not* be modified elsewhere. + * @shaders_desired_mask: This is updated by the state machine when it is in + * a state where it can handle changes to the core + * availability (either by DVFS or sysfs). This is + * internal to the shader state machine and should + * *not* be modified elsewhere. * @shaders_desired: True if the PM active count or power policy requires the * shader cores to be on. This is used as an input to the * shader power state machine. The current state of the @@ -401,6 +409,7 @@ struct kbase_pm_backend_data { enum kbase_l2_core_state l2_state; enum kbase_shader_core_state shaders_state; u64 shaders_avail; + u64 shaders_desired_mask; bool l2_desired; bool l2_always_on; bool shaders_desired; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index d53acb2..b04d705 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -319,7 +319,8 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, if (kbase_dummy_job_wa_enabled(kbdev) && action == ACTION_PWRON && core_type == KBASE_PM_CORE_SHADER && - !(kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) { + !(kbdev->dummy_job_wa.flags & + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) { kbase_dummy_job_wa_execute(kbdev, cores); } else { if (lo != 0) @@ -938,7 +939,8 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) * except at certain points where we can handle it, * i.e. off and SHADERS_ON_CORESTACK_ON. */ - backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = + kbase_pm_ca_get_core_mask(kbdev); backend->pm_shaders_core_mask = 0; if (backend->shaders_desired && @@ -965,6 +967,8 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_OFF_CORESTACK_PEND_ON: if (!stacks_trans && stacks_ready == stacks_avail) { + backend->shaders_avail = + backend->shaders_desired_mask; kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail, ACTION_PWRON); @@ -990,11 +994,12 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) break; case KBASE_SHADERS_ON_CORESTACK_ON: - backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = + kbase_pm_ca_get_core_mask(kbdev); /* If shaders to change state, trigger a counter dump */ if (!backend->shaders_desired || - (backend->shaders_avail != shaders_ready)) { + (backend->shaders_desired_mask != shaders_ready)) { backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) kbase_pm_trigger_hwcnt_disable(kbdev); @@ -1004,7 +1009,7 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) break; case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: - backend->shaders_avail = + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); if (!backend->hwcnt_disabled) { @@ -1038,19 +1043,20 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; } - } else if (backend->shaders_avail & ~shaders_ready) { + } else if (backend->shaders_desired_mask & ~shaders_ready) { /* set cores ready but not available to * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON * check pass */ - backend->shaders_avail |= shaders_ready; + backend->shaders_avail = + (backend->shaders_desired_mask | shaders_ready); kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail & ~shaders_ready, ACTION_PWRON); backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; - } else if (shaders_ready & ~backend->shaders_avail) { + } else if (shaders_ready & ~backend->shaders_desired_mask) { backend->shaders_state = KBASE_SHADERS_WAIT_GPU_IDLE; } else { @@ -1111,7 +1117,15 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON * check pass */ - backend->shaders_avail &= shaders_ready; + + /* shaders_desired_mask shall be a subset of + * shaders_ready + */ + WARN_ON(backend->shaders_desired_mask & ~shaders_ready); + WARN_ON(!(backend->shaders_desired_mask & shaders_ready)); + + backend->shaders_avail = + backend->shaders_desired_mask; kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, shaders_ready & ~backend->shaders_avail, ACTION_PWROFF); backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp index d331dd2..94189b1 100644 --- a/mali_kbase/build.bp +++ b/mali_kbase/build.bp @@ -1,13 +1,16 @@ /* - * Copyright: - * ---------------------------------------------------------------------------- - * This confidential and proprietary software may be used only as authorized - * by a licensing agreement from ARM Limited. - * (C) COPYRIGHT 2017-2019 ARM Limited, ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorized copies and - * copies may only be made to the extent permitted by a licensing agreement - * from ARM Limited. - * ---------------------------------------------------------------------------- + * + * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * */ /* Kernel-side tests may include mali_kbase's headers. Therefore any config @@ -121,6 +124,9 @@ bob_kernel_module { cinstr_secondary_hwc: { kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"], }, + cinstr_secondary_hwc_via_debug_fs: { + kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS=y"], + }, mali_2mb_alloc: { kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], }, diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c index 0fe61c4..2cd2551 100644 --- a/mali_kbase/context/backend/mali_kbase_context_jm.c +++ b/mali_kbase/context/backend/mali_kbase_context_jm.c @@ -56,6 +56,18 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx) kbase_debug_job_fault_context_term(kctx); } KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); +#else +void kbase_context_debugfs_init(struct kbase_context *const kctx) +{ + CSTD_UNUSED(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + +void kbase_context_debugfs_term(struct kbase_context *const kctx) +{ + CSTD_UNUSED(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); #endif /* CONFIG_DEBUG_FS */ static int kbase_context_kbase_timer_setup(struct kbase_context *kctx) diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c index 1ae149d..a539edb 100644 --- a/mali_kbase/context/mali_kbase_context.c +++ b/mali_kbase/context/mali_kbase_context.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,8 +51,6 @@ int kbase_context_common_init(struct kbase_context *kctx) kctx->process_mm = NULL; atomic_set(&kctx->nonmapped_pages, 0); atomic_set(&kctx->permanent_mapped_pages, 0); - kctx->slots_pullable = 0; - kctx->tgid = current->tgid; kctx->pid = current->pid; @@ -67,6 +65,8 @@ int kbase_context_common_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->waiting_soft_jobs); init_waitqueue_head(&kctx->event_queue); + atomic_set(&kctx->event_count, 0); + atomic_set(&kctx->event_closed, false); bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); diff --git a/mali_kbase/context/mali_kbase_context.h b/mali_kbase/context/mali_kbase_context.h index 12b8e4f..e4ed894 100644 --- a/mali_kbase/context/mali_kbase_context.h +++ b/mali_kbase/context/mali_kbase_context.h @@ -35,7 +35,6 @@ #include <linux/atomic.h> -#ifdef CONFIG_DEBUG_FS /** * kbase_context_debugfs_init - Initialize the kctx platform * specific debugfs @@ -57,7 +56,6 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx); * is compiled for. */ void kbase_context_debugfs_term(struct kbase_context *const kctx); -#endif /* CONFIG_DEBUG_FS */ /** * kbase_create_context() - Create a kernel base context. diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c index 8eb3153..4c77929 100644 --- a/mali_kbase/device/mali_kbase_device.c +++ b/mali_kbase/device/mali_kbase_device.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -270,7 +270,12 @@ void kbase_device_id_init(struct kbase_device *kbdev) { scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, kbase_dev_nr); - kbdev->id = kbase_dev_nr++; + kbdev->id = kbase_dev_nr; +} + +void kbase_increment_device_id(void) +{ + kbase_dev_nr++; } int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev) diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h index b1a3e1b..16f1d70 100644 --- a/mali_kbase/device/mali_kbase_device.h +++ b/mali_kbase/device/mali_kbase_device.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -41,6 +41,13 @@ const struct list_head *kbase_device_get_list(void); void kbase_device_put_list(const struct list_head *dev_list); /** + * Kbase_increment_device_id - increment device id. + * + * Used to increment device id on successful initialization of the device. + */ +void kbase_increment_device_id(void); + +/** * kbase_device_init - Device initialisation. * * This is called from device probe to initialise various other diff --git a/mali_kbase/gpu/mali_kbase_gpu_coherency.h b/mali_kbase/gpu/mali_kbase_gpu_coherency.h index 5ab67db..bb2b161 100644 --- a/mali_kbase/gpu/mali_kbase_gpu_coherency.h +++ b/mali_kbase/gpu/mali_kbase_gpu_coherency.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/gpu/mali_kbase_gpu_fault.h b/mali_kbase/gpu/mali_kbase_gpu_fault.h index 88d9d0f..b59b9d1 100644 --- a/mali_kbase/gpu/mali_kbase_gpu_fault.h +++ b/mali_kbase/gpu/mali_kbase_gpu_fault.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,6 +33,17 @@ */ const char *kbase_gpu_exception_name(u32 exception_code); +/** Returns the name associated with a Mali fatal exception code + * + * @fatal_exception_code: fatal exception code + * + * This function is called from the interrupt handler when a GPU fatal + * exception occurs. + * + * Return: name associated with the fatal exception code + */ +const char *kbase_gpu_fatal_exception_name(u32 const fatal_exception_code); + /** * kbase_gpu_access_type_name - Convert MMU_AS_CONTROL.FAULTSTATUS.ACCESS_TYPE * into string. diff --git a/mali_kbase/gpu/mali_kbase_gpu_id.h b/mali_kbase/gpu/mali_kbase_gpu_id.h index ec883cb..9f3d6b1 100644 --- a/mali_kbase/gpu/mali_kbase_gpu_id.h +++ b/mali_kbase/gpu/mali_kbase_gpu_id.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,7 +102,7 @@ #define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2) #define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3) #define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) -#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 5) +#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7) #define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2) #define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3) #define GPU_ID2_PRODUCT_TE2X GPU_ID2_MODEL_MAKE(11, 1) diff --git a/mali_kbase/jm/mali_base_jm_kernel.h b/mali_kbase/jm/mali_base_jm_kernel.h new file mode 100644 index 0000000..b61e612 --- /dev/null +++ b/mali_kbase/jm/mali_base_jm_kernel.h @@ -0,0 +1,1002 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#ifndef _BASE_JM_KERNEL_H_ +#define _BASE_JM_KERNEL_H_ + +/* Memory allocation, access/hint flags. + * + * See base_mem_alloc_flags. + */ + +/* IN */ +/* Read access CPU side + */ +#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) + +/* Write access CPU side + */ +#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) + +/* Read access GPU side + */ +#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) + +/* Write access GPU side + */ +#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) + +/* Execute allowed on the GPU side + */ +#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) + +/* Will be permanently mapped in kernel space. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) + +/* The allocation will completely reside within the same 4GB chunk in the GPU + * virtual space. + * Since this flag is primarily required only for the TLS memory which will + * not be used to contain executable code and also not used for Tiler heap, + * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. + */ +#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) + +/* Userspace is not allowed to free this memory. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) + +#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) + +/* Grow backing store on GPU Page Fault + */ +#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) + +/* Page coherence Outer shareable, if available + */ +#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) + +/* Page coherence Inner shareable + */ +#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + +/* Should be cached on the CPU + */ +#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + +/* IN/OUT */ +/* Must have same VA on both the GPU and the CPU + */ +#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) + +/* OUT */ +/* Must call mmap to acquire a GPU address for the allocation + */ +#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) + +/* IN */ +/* Page coherence Outer shareable, required. + */ +#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) + +/* Protected memory + */ +#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) + +/* Not needed physical memory + */ +#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) + +/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the + * addresses to be the same + */ +#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) + +/** + * Bit 19 is reserved. + * + * Do not remove, use the next unreserved bit for new flags + */ +#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) + +/** + * Memory starting from the end of the initial commit is aligned to 'extent' + * pages, where 'extent' must be a power of 2 and no more than + * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES + */ +#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) + +/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu + * mode. Some components within the GPU might only be able to access memory + * that is GPU cacheable. Refer to the specific GPU implementation for more + * details. The 3 shareability flags will be ignored for GPU uncached memory. + * If used while importing USER_BUFFER type memory, then the import will fail + * if the memory is not aligned to GPU and CPU cache line width. + */ +#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) + +/* + * Bits [22:25] for group_id (0~15). + * + * base_mem_group_id_set() should be used to pack a memory group ID into a + * base_mem_alloc_flags value instead of accessing the bits directly. + * base_mem_group_id_get() should be used to extract the memory group ID from + * a base_mem_alloc_flags value. + */ +#define BASEP_MEM_GROUP_ID_SHIFT 22 +#define BASE_MEM_GROUP_ID_MASK \ + ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) + +/* Must do CPU cache maintenance when imported memory is mapped/unmapped + * on GPU. Currently applicable to dma-buf type only. + */ +#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) + +/* Use the GPU VA chosen by the kernel client */ +#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27) + +/* Number of bits used as flags for base memory management + * + * Must be kept in sync with the base_mem_alloc_flags flags + */ +#define BASE_MEM_FLAGS_NR_BITS 28 + +/* A mask of all the flags which are only valid for allocations within kbase, + * and may not be passed from user space. + */ +#define BASEP_MEM_FLAGS_KERNEL_ONLY \ + (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ + BASE_MEM_FLAG_MAP_FIXED) + +/* A mask for all output bits, excluding IN/OUT bits. + */ +#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP + +/* A mask for all input bits, including IN/OUT bits. + */ +#define BASE_MEM_FLAGS_INPUT_MASK \ + (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) + +/* A mask of all currently reserved flags + */ +#define BASE_MEM_FLAGS_RESERVED \ + (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) + +#define BASEP_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */ +#define BASE_MEM_COOKIE_BASE (64ul << 12) +#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ + BASE_MEM_COOKIE_BASE) + +/** + * typedef base_context_create_flags - Flags to pass to ::base_context_init. + * + * Flags can be ORed together to enable multiple things. + * + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must + * not collide with them. + */ +typedef u32 base_context_create_flags; + +/* No flags set */ +#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) + +/* Base context is embedded in a cctx object (flag used for CINSTR + * software counter macros) + */ +#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) + +/* Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. + */ +#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ + ((base_context_create_flags)1 << 1) + +/* Bit-shift used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) + +/* Bitmask used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ + ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) + +/* Bitpattern describing the base_context_create_flags that can be + * passed to the kernel + */ +#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ + (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ + BASEP_CONTEXT_MMU_GROUP_ID_MASK) + +/* Bitpattern describing the ::base_context_create_flags that can be + * passed to base_context_init() + */ +#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ + (BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS) + +/* + * Private flags used on the base context + * + * These start at bit 31, and run down to zero. + * + * They share the same space as base_context_create_flags, and so must + * not collide with them. + */ + +/* Private flag tracking whether job descriptor dumping is disabled */ +#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \ + ((base_context_create_flags)(1 << 31)) + +/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, + * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) + */ +#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) + +/* Indicate that job dumping is enabled. This could affect certain timers + * to account for the performance impact. + */ +#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) + +#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ + BASE_TLSTREAM_JOB_DUMPING_ENABLED) + +/* + * Dependency stuff, keep it private for now. May want to expose it if + * we decide to make the number of semaphores a configurable + * option. + */ +#define BASE_JD_ATOM_COUNT 256 + +/* Maximum number of concurrent render passes. + */ +#define BASE_JD_RP_COUNT (256) + +/* Set/reset values for a software event */ +#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) +#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) + +/** + * struct base_jd_udata - Per-job data + * + * This structure is used to store per-job data, and is completely unused + * by the Base driver. It can be used to store things such as callback + * function pointer, data to handle job completion. It is guaranteed to be + * untouched by the Base driver. + * + * @blob: per-job data array + */ +struct base_jd_udata { + u64 blob[2]; +}; + +/** + * typedef base_jd_dep_type - Job dependency type. + * + * A flags field will be inserted into the atom structure to specify whether a + * dependency is a data or ordering dependency (by putting it before/after + * 'core_req' in the structure it should be possible to add without changing + * the structure size). + * When the flag is set for a particular dependency to signal that it is an + * ordering only dependency then errors will not be propagated. + */ +typedef u8 base_jd_dep_type; + +#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ +#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ +#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ + +/** + * typedef base_jd_core_req - Job chain hardware requirements. + * + * A job chain must specify what GPU features it needs to allow the + * driver to schedule the job correctly. By not specifying the + * correct settings can/will cause an early job termination. Multiple + * values can be ORed together to specify multiple requirements. + * Special case is ::BASE_JD_REQ_DEP, which is used to express complex + * dependencies, and that doesn't execute anything on the hardware. + */ +typedef u32 base_jd_core_req; + +/* Requirements that come from the HW */ + +/* No requirement, dependency only + */ +#define BASE_JD_REQ_DEP ((base_jd_core_req)0) + +/* Requires fragment shaders + */ +#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) + +/* Requires compute shaders + * + * This covers any of the following GPU job types: + * - Vertex Shader Job + * - Geometry Shader Job + * - An actual Compute Shader Job + * + * Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the + * job is specifically just the "Compute Shader" job type, and not the "Vertex + * Shader" nor the "Geometry Shader" job type. + */ +#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) + +/* Requires tiling */ +#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) + +/* Requires cache flushes */ +#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) + +/* Requires value writeback */ +#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) + +/* SW-only requirements - the HW does not expose these as part of the job slot + * capabilities + */ + +/* Requires fragment job with AFBC encoding */ +#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) + +/* SW-only requirement: coalesce completion events. + * If this bit is set then completion of this atom will not cause an event to + * be sent to userspace, whether successful or not; completion events will be + * deferred until an atom completes which does not have this bit set. + * + * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. + */ +#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) + +/* SW Only requirement: the job chain requires a coherent core group. We don't + * mind which coherent core group is used. + */ +#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) + +/* SW Only requirement: The performance counters should be enabled only when + * they are needed, to reduce power consumption. + */ +#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) + +/* SW Only requirement: External resources are referenced by this atom. + * + * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and + * BASE_JD_REQ_SOFT_EVENT_WAIT. + */ +#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) + +/* SW Only requirement: Software defined job. Jobs with this bit set will not be + * submitted to the hardware but will cause some action to happen within the + * driver + */ +#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) + +#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) +#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) +#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) + +/* 0x4 RESERVED for now */ + +/* SW only requirement: event wait/trigger job. + * + * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. + * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the + * other waiting jobs. It completes immediately. + * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it + * possible for other jobs to wait upon. It completes immediately. + */ +#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) +#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) +#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) + +#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) + +/* SW only requirement: Just In Time allocation + * + * This job requests a single or multiple just-in-time allocations through a + * list of base_jit_alloc_info structure which is passed via the jc element of + * the atom. The number of base_jit_alloc_info structures present in the + * list is passed via the nr_extres element of the atom + * + * It should be noted that the id entry in base_jit_alloc_info must not + * be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE. + * + * Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE + * soft job to free the JIT allocation is still made. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) + +/* SW only requirement: Just In Time free + * + * This job requests a single or multiple just-in-time allocations created by + * BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time + * allocations is passed via the jc element of the atom. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) + +/* SW only requirement: Map external resource + * + * This job requests external resource(s) are mapped once the dependencies + * of the job have been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) + +/* SW only requirement: Unmap external resource + * + * This job requests external resource(s) are unmapped once the dependencies + * of the job has been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) + +/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) + * + * This indicates that the Job Chain contains GPU jobs of the 'Compute + * Shaders' type. + * + * In contrast to BASE_JD_REQ_CS, this does not indicate that the Job + * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. + */ +#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) + +/* HW Requirement: Use the base_jd_atom::device_nr field to specify a + * particular core group + * + * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag + * takes priority + * + * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. + * + * If the core availability policy is keeping the required core group turned + * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code. + */ +#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) + +/* SW Flag: If this bit is set then the successful completion of this atom + * will not cause an event to be sent to userspace + */ +#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) + +/* SW Flag: If this bit is set then completion of this atom will not cause an + * event to be sent to userspace, whether successful or not. + */ +#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) + +/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job starts which does not have this bit set or a job completes + * which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use + * if the CPU may have written to memory addressed by the job since the last job + * without this bit set was submitted. + */ +#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) + +/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job completes which does not have this bit set or a job starts + * which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use + * if the CPU may read from or partially overwrite memory addressed by the job + * before the next job without this bit set completes. + */ +#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) + +/* Request the atom be executed on a specific job slot. + * + * When this flag is specified, it takes precedence over any existing job slot + * selection logic. + */ +#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17) + +/* SW-only requirement: The atom is the start of a renderpass. + * + * If this bit is set then the job chain will be soft-stopped if it causes the + * GPU to write beyond the end of the physical pages backing the tiler heap, and + * committing more memory to the heap would exceed an internal threshold. It may + * be resumed after running one of the job chains attached to an atom with + * BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be + * resumed multiple times until it completes without memory usage exceeding the + * threshold. + * + * Usually used with BASE_JD_REQ_T. + */ +#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18) + +/* SW-only requirement: The atom is the end of a renderpass. + * + * If this bit is set then the atom incorporates the CPU address of a + * base_jd_fragment object instead of the GPU address of a job chain. + * + * Which job chain is run depends upon whether the atom with the same renderpass + * ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or + * was soft-stopped when it exceeded an upper threshold for tiler heap memory + * usage. + * + * It also depends upon whether one of the job chains attached to the atom has + * already been run as part of the same renderpass (in which case it would have + * written unresolved multisampled and otherwise-discarded output to temporary + * buffers that need to be read back). The job chain for doing a forced read and + * forced write (from/to temporary buffers) is run as many times as necessary. + * + * Usually used with BASE_JD_REQ_FS. + */ +#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19) + +/* These requirement bits are currently unused in base_jd_core_req + */ +#define BASEP_JD_REQ_RESERVED \ + (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ + BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ + BASE_JD_REQ_EVENT_COALESCE | \ + BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ + BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ + BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \ + BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \ + BASE_JD_REQ_END_RENDERPASS)) + +/* Mask of all bits in base_jd_core_req that control the type of the atom. + * + * This allows dependency only atoms to have flags set + */ +#define BASE_JD_REQ_ATOM_TYPE \ + (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ + BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) + +/** + * Mask of all bits in base_jd_core_req that control the type of a soft job. + */ +#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) + +/* Returns non-zero value if core requirements passed define a soft job or + * a dependency only job. + */ +#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ + (((core_req) & BASE_JD_REQ_SOFT_JOB) || \ + ((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) + +/** + * enum kbase_jd_atom_state + * + * @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used. + * @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD. + * @KBASE_JD_ATOM_STATE_IN_JS: Atom has been given to JS (is runnable/running). + * @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet + * handed back to job dispatcher for + * dependency resolution. + * @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed + * back to userspace. + */ +enum kbase_jd_atom_state { + KBASE_JD_ATOM_STATE_UNUSED, + KBASE_JD_ATOM_STATE_QUEUED, + KBASE_JD_ATOM_STATE_IN_JS, + KBASE_JD_ATOM_STATE_HW_COMPLETED, + KBASE_JD_ATOM_STATE_COMPLETED +}; + +/** + * typedef base_atom_id - Type big enough to store an atom number in. + */ +typedef u8 base_atom_id; + +/** + * struct base_dependency - + * + * @atom_id: An atom number + * @dependency_type: Dependency type + */ +struct base_dependency { + base_atom_id atom_id; + base_jd_dep_type dependency_type; +}; + +/** + * struct base_jd_fragment - Set of GPU fragment job chains used for rendering. + * + * @norm_read_norm_write: Job chain for full rendering. + * GPU address of a fragment job chain to render in the + * circumstance where the tiler job chain did not exceed + * its memory usage threshold and no fragment job chain + * was previously run for the same renderpass. + * It is used no more than once per renderpass. + * @norm_read_forced_write: Job chain for starting incremental + * rendering. + * GPU address of a fragment job chain to render in + * the circumstance where the tiler job chain exceeded + * its memory usage threshold for the first time and + * no fragment job chain was previously run for the + * same renderpass. + * Writes unresolved multisampled and normally- + * discarded output to temporary buffers that must be + * read back by a subsequent forced_read job chain + * before the renderpass is complete. + * It is used no more than once per renderpass. + * @forced_read_forced_write: Job chain for continuing incremental + * rendering. + * GPU address of a fragment job chain to render in + * the circumstance where the tiler job chain + * exceeded its memory usage threshold again + * and a fragment job chain was previously run for + * the same renderpass. + * Reads unresolved multisampled and + * normally-discarded output from temporary buffers + * written by a previous forced_write job chain and + * writes the same to temporary buffers again. + * It is used as many times as required until + * rendering completes. + * @forced_read_norm_write: Job chain for ending incremental rendering. + * GPU address of a fragment job chain to render in the + * circumstance where the tiler job chain did not + * exceed its memory usage threshold this time and a + * fragment job chain was previously run for the same + * renderpass. + * Reads unresolved multisampled and normally-discarded + * output from temporary buffers written by a previous + * forced_write job chain in order to complete a + * renderpass. + * It is used no more than once per renderpass. + * + * This structure is referenced by the main atom structure if + * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req. + */ +struct base_jd_fragment { + u64 norm_read_norm_write; + u64 norm_read_forced_write; + u64 forced_read_forced_write; + u64 forced_read_norm_write; +}; + +/** + * typedef base_jd_prio - Base Atom priority. + * + * Only certain priority levels are actually implemented, as specified by the + * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority + * level that is not one of those defined below. + * + * Priority levels only affect scheduling after the atoms have had dependencies + * resolved. For example, a low priority atom that has had its dependencies + * resolved might run before a higher priority atom that has not had its + * dependencies resolved. + * + * In general, fragment atoms do not affect non-fragment atoms with + * lower priorities, and vice versa. One exception is that there is only one + * priority value for each context. So a high-priority (e.g.) fragment atom + * could increase its context priority, causing its non-fragment atoms to also + * be scheduled sooner. + * + * The atoms are scheduled as follows with respect to their priorities: + * * Let atoms 'X' and 'Y' be for the same job slot who have dependencies + * resolved, and atom 'X' has a higher priority than atom 'Y' + * * If atom 'Y' is currently running on the HW, then it is interrupted to + * allow atom 'X' to run soon after + * * If instead neither atom 'Y' nor atom 'X' are running, then when choosing + * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' + * * Any two atoms that have the same priority could run in any order with + * respect to each other. That is, there is no ordering constraint between + * atoms of the same priority. + * + * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are + * scheduled between contexts. The default value, 0, will cause higher-priority + * atoms to be scheduled first, regardless of their context. The value 1 will + * use a round-robin algorithm when deciding which context's atoms to schedule + * next, so higher-priority atoms can only preempt lower priority atoms within + * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and + * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. + */ +typedef u8 base_jd_prio; + +/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ +#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) +/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and + * BASE_JD_PRIO_LOW + */ +#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) +/* Low atom priority. */ +#define BASE_JD_PRIO_LOW ((base_jd_prio)2) + +/* Count of the number of priority levels. This itself is not a valid + * base_jd_prio setting + */ +#define BASE_JD_NR_PRIO_LEVELS 3 + +/** + * struct base_jd_atom_v2 - Node of a dependency graph used to submit a + * GPU job chain or soft-job to the kernel driver. + * + * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS + * is set in the base_jd_core_req) the CPU address of a + * base_jd_fragment object. + * @udata: User data. + * @extres_list: List of external resources. + * @nr_extres: Number of external resources or JIT allocations. + * @jit_id: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @pre_dep: Pre-dependencies. One need to use SETTER function to assign + * this field; this is done in order to reduce possibility of + * improper assignment of a dependency field. + * @atom_number: Unique number to identify the atom. + * @prio: Atom priority. Refer to base_jd_prio for more details. + * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP + * specified. + * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. + * @core_req: Core requirements. + * @renderpass_id: Renderpass identifier used to associate an atom that has + * BASE_JD_REQ_START_RENDERPASS set in its core requirements + * with an atom that has BASE_JD_REQ_END_RENDERPASS set. + * @padding: Unused. Must be zero. + * + * This structure has changed since UK 10.2 for which base_jd_core_req was a + * u16 value. + * + * In UK 10.3 a core_req field of a u32 type was added to the end of the + * structure, and the place in the structure previously occupied by u16 + * core_req was kept but renamed to compat_core_req. + * + * From UK 11.20 - compat_core_req is now occupied by u8 jit_id[2]. + * Compatibility with UK 10.x from UK 11.y is not handled because + * the major version increase prevents this. + * + * For UK 11.20 jit_id[2] must be initialized to zero. + */ +struct base_jd_atom_v2 { + u64 jc; + struct base_jd_udata udata; + u64 extres_list; + u16 nr_extres; + u8 jit_id[2]; + struct base_dependency pre_dep[2]; + base_atom_id atom_number; + base_jd_prio prio; + u8 device_nr; + u8 jobslot; + base_jd_core_req core_req; + u8 renderpass_id; + u8 padding[7]; +}; + +/* Job chain event code bits + * Defines the bits used to create ::base_jd_event_code + */ +enum { + BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */ + BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */ + /* Event indicates success (SW events only) */ + BASE_JD_SW_EVENT_SUCCESS = (1u << 13), + BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */ + BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */ + BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */ + BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */ + /* Mask to extract the type from an event code */ + BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) +}; + +/** + * enum base_jd_event_code - Job chain event codes + * + * @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status + * codes. + * Obscurely, BASE_JD_EVENT_TERMINATED + * indicates a real fault, because the + * job was hard-stopped. + * @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as + * 'previous job done'. + * @BASE_JD_EVENT_STOPPED: Can't be seen by userspace, becomes + * TERMINATED, DONE or JOB_CANCELLED. + * @BASE_JD_EVENT_TERMINATED: This is actually a fault status code - the job + * was hard stopped. + * @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on + * complete/fail/cancel. + * @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes. + * Obscurely, BASE_JD_EVENT_TERMINATED + * indicates a real fault, + * because the job was hard-stopped. + * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and + * software error status codes. + * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and + * software error status codes. + * @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status + * codes. + * @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes. + * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes. + * Such codes are never returned to + * user-space. + * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes. + * + * HW and low-level SW events are represented by event codes. + * The status of jobs which succeeded are also represented by + * an event code (see @BASE_JD_EVENT_DONE). + * Events are usually reported as part of a &struct base_jd_event. + * + * The event codes are encoded in the following way: + * * 10:0 - subtype + * * 12:11 - type + * * 13 - SW success (only valid if the SW bit is set) + * * 14 - SW event (HW event if not set) + * * 15 - Kernel event (should never be seen in userspace) + * + * Events are split up into ranges as follows: + * * BASE_JD_EVENT_RANGE_<description>_START + * * BASE_JD_EVENT_RANGE_<description>_END + * + * code is in <description>'s range when: + * BASE_JD_EVENT_RANGE_<description>_START <= code < + * BASE_JD_EVENT_RANGE_<description>_END + * + * Ranges can be asserted for adjacency by testing that the END of the previous + * is equal to the START of the next. This is useful for optimizing some tests + * for range. + * + * A limitation is that the last member of this enum must explicitly be handled + * (with an assert-unreachable statement) in switch statements that use + * variables of this type. Otherwise, the compiler warns that we have not + * handled that enum value. + */ +enum base_jd_event_code { + /* HW defined exceptions */ + BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, + + /* non-fatal exceptions */ + BASE_JD_EVENT_NOT_STARTED = 0x00, + BASE_JD_EVENT_DONE = 0x01, + BASE_JD_EVENT_STOPPED = 0x03, + BASE_JD_EVENT_TERMINATED = 0x04, + BASE_JD_EVENT_ACTIVE = 0x08, + + BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, + + /* job exceptions */ + BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, + BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, + BASE_JD_EVENT_JOB_READ_FAULT = 0x42, + BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, + BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, + BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, + BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, + BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, + BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, + BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, + BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, + BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, + BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, + BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, + BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, + BASE_JD_EVENT_STATE_FAULT = 0x5A, + BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, + BASE_JD_EVENT_UNKNOWN = 0x7F, + + /* GPU exceptions */ + BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, + BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, + + /* MMU exceptions */ + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, + BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, + BASE_JD_EVENT_ACCESS_FLAG = 0xD8, + + /* SW defined exceptions */ + BASE_JD_EVENT_MEM_GROWTH_FAILED = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_TIMED_OUT = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, + BASE_JD_EVENT_JOB_CANCELLED = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, + BASE_JD_EVENT_JOB_INVALID = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, + BASE_JD_EVENT_PM_EVENT = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, + + BASE_JD_EVENT_BAG_INVALID = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, + + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | 0x000, + + BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | + BASE_JD_SW_EVENT_BAG | 0x000, + BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, + + BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | 0x000, + BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x001, + + BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF +}; + +/** + * struct base_jd_event_v2 - Event reporting structure + * + * @event_code: event code. + * @atom_number: the atom number that has completed. + * @udata: user data. + * + * This structure is used by the kernel driver to report information + * about GPU events. They can either be HW-specific events or low-level + * SW events, such as job-chain completion. + * + * The event code contains an event type field which can be extracted + * by ANDing with BASE_JD_SW_EVENT_TYPE_MASK. + */ +struct base_jd_event_v2 { + enum base_jd_event_code event_code; + base_atom_id atom_number; + struct base_jd_udata udata; +}; + +/** + * struct base_dump_cpu_gpu_counters - Structure for + * BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS + * jobs. + * + * This structure is stored into the memory pointed to by the @jc field + * of &struct base_jd_atom_v2. + * + * It must not occupy the same CPU cache line(s) as any neighboring data. + * This is to avoid cases where access to pages containing the structure + * is shared between cached and un-cached memory regions, which would + * cause memory corruption. + */ + +struct base_dump_cpu_gpu_counters { + u64 system_time; + u64 cycle_counter; + u64 sec; + u32 usec; + u8 padding[36]; +}; + +#endif /* _BASE_JM_KERNEL_H_ */ diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h new file mode 100644 index 0000000..172217f --- /dev/null +++ b/mali_kbase/jm/mali_kbase_jm_defs.h @@ -0,0 +1,807 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Definitions (types, defines, etcs) specific to Job Manager Kbase. + * They are placed here to allow the hierarchy of header files to work. + */ + +#ifndef _KBASE_JM_DEFS_H_ +#define _KBASE_JM_DEFS_H_ + +/* Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */ +#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1 + +/* + * Number of milliseconds before resetting the GPU when a job cannot be "zapped" + * from the hardware. Note that the time is actually + * ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and + * the GPU actually being reset to give other contexts time for their jobs + * to be soft-stopped and removed from the hardware before resetting. + */ +#define ZAP_TIMEOUT 1000 + +/* + * Prevent soft-stops from occurring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more + * predictable. + * + * Therefore, soft stop may still be disabled due to HW issues. + * + * Soft stop will still be used for non-scheduling purposes e.g. when + * terminating a context. + * + * if not in use, define this value to 0 instead of being undefined. + */ +#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 + +/* + * Prevent hard-stops from occurring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more + * predictable. + * + * Hard stop will still be used for non-scheduling purposes e.g. when + * terminating a context. + * + * if not in use, define this value to 0 instead of being undefined. + */ +#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 + +/* Atom has been previously soft-stopped */ +#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED (1<<1) +/* Atom has been previously retried to execute */ +#define KBASE_KATOM_FLAGS_RERUN (1<<2) +/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps + * to disambiguate short-running job chains during soft/hard stopping of jobs + */ +#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) +/* Atom has been previously hard-stopped. */ +#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) +/* Atom has caused us to enter disjoint state */ +#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) +/* Atom blocked on cross-slot dependency */ +#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) +/* Atom has fail dependency on cross-slot dependency */ +#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) +/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ +#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) +/* Atom is currently holding a context reference */ +#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) +/* Atom requires GPU to be in protected mode */ +#define KBASE_KATOM_FLAG_PROTECTED (1<<11) +/* Atom has been stored in runnable_tree */ +#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) +/* Atom is waiting for L2 caches to power up in order to enter protected mode */ +#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13) + +/* SW related flags about types of JS_COMMAND action + * NOTE: These must be masked off by JS_COMMAND_MASK + */ + +/* This command causes a disjoint event */ +#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 + +/* Bitmask of all SW related flags */ +#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) + +#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) +#error "JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK." \ + "Must update JS_COMMAND_SW_<..> bitmasks" +#endif + +/* Soft-stop command that causes a Disjoint event. This of course isn't + * entirely masked off by JS_COMMAND_MASK + */ +#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ + (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) + +#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT + +/* Serialize atoms within a slot (ie only one atom per job slot) */ +#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0) +/* Serialize atoms between slots (ie only one job slot running at any time) */ +#define KBASE_SERIALIZE_INTER_SLOT (1 << 1) +/* Reset the GPU after each atom completion */ +#define KBASE_SERIALIZE_RESET (1 << 2) + +#ifdef CONFIG_DEBUG_FS +/** + * struct base_job_fault_event - keeps track of the atom which faulted or which + * completed after the faulty atom but before the + * debug data for faulty atom was dumped. + * + * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for + * the atom which faulted. + * @katom: pointer to the atom for which job fault occurred or which + * completed after the faulty atom. + * @job_fault_work: work item, queued only for the faulty atom, which waits for + * the dumping to get completed and then does the bottom half + * of job done for the atoms which followed the faulty atom. + * @head: List head used to store the atom in the global list of + * faulty atoms or context specific list of atoms which got + * completed during the dump. + * @reg_offset: offset of the register to be dumped next, only applicable + * for the faulty atom. + */ +struct base_job_fault_event { + + u32 event_code; + struct kbase_jd_atom *katom; + struct work_struct job_fault_work; + struct list_head head; + int reg_offset; +}; +#endif + +/** + * struct kbase_jd_atom_dependency - Contains the dependency info for an atom. + * @atom: pointer to the dependee atom. + * @dep_type: type of dependency on the dependee @atom, i.e. order or data + * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency. + */ +struct kbase_jd_atom_dependency { + struct kbase_jd_atom *atom; + u8 dep_type; +}; + +/** + * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the + * dependee atom. + * @dep: pointer to the dependency info structure. + * + * Return: readonly reference to dependee atom. + */ +static inline const struct kbase_jd_atom * +kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) +{ + LOCAL_ASSERT(dep != NULL); + + return (const struct kbase_jd_atom *)(dep->atom); +} + +/** + * kbase_jd_katom_dep_type - Retrieves the dependency type info + * + * @dep: pointer to the dependency info structure. + * + * Return: the type of dependency there is on the dependee atom. + */ +static inline u8 kbase_jd_katom_dep_type( + const struct kbase_jd_atom_dependency *dep) +{ + LOCAL_ASSERT(dep != NULL); + + return dep->dep_type; +} + +/** + * kbase_jd_katom_dep_set - sets up the dependency info structure + * as per the values passed. + * @const_dep: pointer to the dependency info structure to be setup. + * @a: pointer to the dependee atom. + * @type: type of dependency there is on the dependee atom. + */ +static inline void kbase_jd_katom_dep_set( + const struct kbase_jd_atom_dependency *const_dep, + struct kbase_jd_atom *a, u8 type) +{ + struct kbase_jd_atom_dependency *dep; + + LOCAL_ASSERT(const_dep != NULL); + + dep = (struct kbase_jd_atom_dependency *)const_dep; + + dep->atom = a; + dep->dep_type = type; +} + +/** + * kbase_jd_katom_dep_clear - resets the dependency info structure + * + * @const_dep: pointer to the dependency info structure to be setup. + */ +static inline void kbase_jd_katom_dep_clear( + const struct kbase_jd_atom_dependency *const_dep) +{ + struct kbase_jd_atom_dependency *dep; + + LOCAL_ASSERT(const_dep != NULL); + + dep = (struct kbase_jd_atom_dependency *)const_dep; + + dep->atom = NULL; + dep->dep_type = BASE_JD_DEP_TYPE_INVALID; +} + +/** + * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it + * becomes runnable, with respect to job slot + * ringbuffer/fifo. + * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, + * which implies that either atom has not become + * runnable due to dependency or has completed + * the execution on GPU. + * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is + * blocked due to cross slot dependency, + * can't be submitted to GPU. + * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot + * fifo but is waiting for the completion of + * previously added atoms in current & other + * slots, as their protected mode requirements + * do not match with the current atom. + * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo + * and is waiting for completion of protected + * mode transition, needed before the atom is + * submitted to GPU. + * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is + * waiting for the cores, which are needed to + * execute the job chain represented by the atom, + * to become available + * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to + * GPU. + * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted + * to GPU. + * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some + * failure, but only after the previously added + * atoms in fifo have completed or have also + * been returned to JS. + */ +enum kbase_atom_gpu_rb_state { + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, + KBASE_ATOM_GPU_RB_WAITING_BLOCKED, + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, + KBASE_ATOM_GPU_RB_READY, + KBASE_ATOM_GPU_RB_SUBMITTED, + KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 +}; + +/** + * enum kbase_atom_enter_protected_state - The state of an atom with respect to + * the preparation for GPU's entry into protected mode, + * becomes pertinent only after atom's state with respect + * to slot ringbuffer is + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any + * atoms currently submitted to GPU and protected mode + * transition is not already in progress. + * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to + * become disabled before entry into protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in + * preparation for the coherency change. L2 shall be + * powered down and GPU shall come out of fully + * coherent mode before entering protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change; + * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on + * so that coherency register contains correct value when + * GPU enters protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for + * BASE_HW_ISSUE_TGOX_R1_1234 check + * that L2 is powered up and switch GPU to protected mode. + */ +enum kbase_atom_enter_protected_state { + /* + * NOTE: The integer value of this must match + * KBASE_ATOM_EXIT_PROTECTED_CHECK. + */ + KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, + KBASE_ATOM_ENTER_PROTECTED_HWCNT, + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY, + KBASE_ATOM_ENTER_PROTECTED_FINISHED, +}; + +/** + * enum kbase_atom_exit_protected_state - The state of an atom with respect to + * the preparation for GPU's exit from protected mode, + * becomes pertinent only after atom's state with respect + * to slot ngbuffer is + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any + * atoms currently submitted to GPU and protected mode + * transition is not already in progress. + * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in + * preparation for the reset, as exiting protected mode + * requires a reset. + * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from + * protected mode + * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to + * complete + */ +enum kbase_atom_exit_protected_state { + /* + * NOTE: The integer value of this must match + * KBASE_ATOM_ENTER_PROTECTED_CHECK. + */ + KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, + KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, + KBASE_ATOM_EXIT_PROTECTED_RESET, + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, +}; + +/** + * struct kbase_ext_res - Contains the info for external resources referred + * by an atom, which have been mapped on GPU side. + * @gpu_address: Start address of the memory region allocated for + * the resource from GPU virtual address space. + * @alloc: pointer to physical pages tracking object, set on + * mapping the external resource on GPU side. + */ +struct kbase_ext_res { + u64 gpu_address; + struct kbase_mem_phy_alloc *alloc; +}; + +/** + * struct kbase_jd_atom - object representing the atom, containing the complete + * state and attributes of an atom. + * @work: work item for the bottom half processing of the atom, + * by JD or JS, after it got executed on GPU or the + * input fence got signaled + * @start_timestamp: time at which the atom was submitted to the GPU, by + * updating the JS_HEAD_NEXTn register. + * @udata: copy of the user data sent for the atom in + * base_jd_submit. + * @kctx: Pointer to the base context with which the atom is + * associated. + * @dep_head: Array of 2 list heads, pointing to the two list of + * atoms + * which are blocked due to dependency on this atom. + * @dep_item: Array of 2 list heads, used to store the atom in the + * list of other atoms depending on the same dependee + * atom. + * @dep: Array containing the dependency info for the 2 atoms + * on which the atom depends upon. + * @jd_item: List head used during job dispatch job_done + * processing - as dependencies may not be entirely + * resolved at this point, + * we need to use a separate list head. + * @in_jd_list: flag set to true if atom's @jd_item is currently on + * a list, prevents atom being processed twice. + * @jit_ids: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @nr_extres: number of external resources referenced by the atom. + * @extres: pointer to the location containing info about + * @nr_extres external resources referenced by the atom. + * @device_nr: indicates the coregroup with which the atom is + * associated, when + * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified. + * @jc: GPU address of the job-chain. + * @softjob_data: Copy of data read from the user space buffer that @jc + * points to. + * @fence: Stores either an input or output sync fence, + * depending on soft-job type + * @sync_waiter: Pointer to the sync fence waiter structure passed to + * the callback function on signaling of the input + * fence. + * @dma_fence: object containing pointers to both input & output + * fences and other related members used for explicit + * sync through soft jobs and for the implicit + * synchronization required on access to external + * resources. + * @event_code: Event code for the job chain represented by the atom, + * both HW and low-level SW events are represented by + * event codes. + * @core_req: bitmask of BASE_JD_REQ_* flags specifying either + * Hw or Sw requirements for the job chain represented + * by the atom. + * @ticks: Number of scheduling ticks for which atom has been + * running on the GPU. + * @sched_priority: Priority of the atom for Job scheduling, as per the + * KBASE_JS_ATOM_SCHED_PRIO_*. + * @completed: Wait queue to wait upon for the completion of atom. + * @status: Indicates at high level at what stage the atom is in, + * as per KBASE_JD_ATOM_STATE_*, that whether it is not + * in use or its queued in JD or given to JS or + * submitted to Hw or it completed the execution on Hw. + * @work_id: used for GPU tracepoints, its a snapshot of the + * 'work_id' counter in kbase_jd_context which is + * incremented on every call to base_jd_submit. + * @slot_nr: Job slot chosen for the atom. + * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the + * excat low level state of the atom. + * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely + * tracking atom's state after it has entered + * Job scheduler on becoming runnable. Atom + * could be blocked due to cross slot dependency + * or waiting for the shader cores to become available + * or waiting for protected mode transitions to + * complete. + * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU + * cache is needed for the atom and the shader cores + * used for atom have been kept on. + * @blocked: flag indicating that atom's resubmission to GPU is + * blocked till the work item is scheduled to return the + * atom to JS. + * @pre_dep: Pointer to atom that this atom has same-slot + * dependency on + * @post_dep: Pointer to atom that has same-slot dependency on + * this atom + * @x_pre_dep: Pointer to atom that this atom has cross-slot + * dependency on + * @x_post_dep: Pointer to atom that has cross-slot dependency on + * this atom + * @flush_id: The GPU's flush count recorded at the time of + * submission, + * used for the cache flush optimization + * @fault_event: Info for dumping the debug data on Job fault. + * @queue: List head used for 4 different purposes : + * Adds atom to the list of dma-buf fence waiting atoms. + * Adds atom to the list of atoms blocked due to cross + * slot dependency. + * Adds atom to the list of softjob atoms for which JIT + * allocation has been deferred + * Adds atom to the list of softjob atoms waiting for + * the signaling of fence. + * @jit_node: Used to keep track of all JIT free/alloc jobs in + * submission order + * @jit_blocked: Flag indicating that JIT allocation requested through + * softjob atom will be reattempted after the impending + * free of other active JIT allocations. + * @will_fail_event_code: If non-zero, this indicates that the atom will fail + * with the set event_code when the atom is processed. + * Used for special handling of atoms, which have a data + * dependency on the failed atoms. + * @protected_state: State of the atom, as per + * KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*, + * when transitioning into or out of protected mode. + * Atom will be either entering or exiting the + * protected mode. + * @runnable_tree_node: The node added to context's job slot specific rb tree + * when the atom becomes runnable. + * @age: Age of atom relative to other atoms in the context, + * is snapshot of the age_count counter in kbase + * context. + */ +struct kbase_jd_atom { + struct work_struct work; + ktime_t start_timestamp; + + struct base_jd_udata udata; + struct kbase_context *kctx; + + struct list_head dep_head[2]; + struct list_head dep_item[2]; + const struct kbase_jd_atom_dependency dep[2]; + struct list_head jd_item; + bool in_jd_list; + +#if MALI_JIT_PRESSURE_LIMIT + u8 jit_ids[2]; +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + u16 nr_extres; + struct kbase_ext_res *extres; + + u32 device_nr; + u64 jc; + void *softjob_data; +#if defined(CONFIG_SYNC) + struct sync_fence *fence; + struct sync_fence_waiter sync_waiter; +#endif /* CONFIG_SYNC */ +#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + struct { + /* Use the functions/API defined in mali_kbase_fence.h to + * when working with this sub struct + */ +#if defined(CONFIG_SYNC_FILE) + /* Input fence */ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence_in; +#else + struct dma_fence *fence_in; +#endif +#endif + /* This points to the dma-buf output fence for this atom. If + * this is NULL then there is no fence for this atom and the + * following fields related to dma_fence may have invalid data. + * + * The context and seqno fields contain the details for this + * fence. + * + * This fence is signaled when the katom is completed, + * regardless of the event_code of the katom (signal also on + * failure). + */ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + /* The dma-buf fence context number for this atom. A unique + * context number is allocated to each katom in the context on + * context creation. + */ + unsigned int context; + /* The dma-buf fence sequence number for this atom. This is + * increased every time this katom uses dma-buf fence. + */ + atomic_t seqno; + /* This contains a list of all callbacks set up to wait on + * other fences. This atom must be held back from JS until all + * these callbacks have been called and dep_count have reached + * 0. The initial value of dep_count must be equal to the + * number of callbacks on this list. + * + * This list is protected by jctx.lock. Callbacks are added to + * this list when the atom is built and the wait are set up. + * All the callbacks then stay on the list until all callbacks + * have been called and the atom is queued, or cancelled, and + * then all callbacks are taken off the list and freed. + */ + struct list_head callbacks; + /* Atomic counter of number of outstandind dma-buf fence + * dependencies for this atom. When dep_count reaches 0 the + * atom may be queued. + * + * The special value "-1" may only be set after the count + * reaches 0, while holding jctx.lock. This indicates that the + * atom has been handled, either queued in JS or cancelled. + * + * If anyone but the dma-fence worker sets this to -1 they must + * ensure that any potentially queued worker must have + * completed before allowing the atom to be marked as unused. + * This can be done by flushing the fence work queue: + * kctx->dma_fence.wq. + */ + atomic_t dep_count; + } dma_fence; +#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */ + + /* Note: refer to kbasep_js_atom_retained_state, which will take a copy + * of some of the following members + */ + enum base_jd_event_code event_code; + base_jd_core_req core_req; + u8 jobslot; + u8 renderpass_id; + struct base_jd_fragment jc_fragment; + + u32 ticks; + int sched_priority; + + wait_queue_head_t completed; + enum kbase_jd_atom_state status; +#ifdef CONFIG_GPU_TRACEPOINTS + int work_id; +#endif + int slot_nr; + + u32 atom_flags; + + int retry_count; + + enum kbase_atom_gpu_rb_state gpu_rb_state; + + bool need_cache_flush_cores_retained; + + atomic_t blocked; + + struct kbase_jd_atom *pre_dep; + struct kbase_jd_atom *post_dep; + + struct kbase_jd_atom *x_pre_dep; + struct kbase_jd_atom *x_post_dep; + + u32 flush_id; + +#ifdef CONFIG_DEBUG_FS + struct base_job_fault_event fault_event; +#endif + struct list_head queue; + + struct list_head jit_node; + bool jit_blocked; + + enum base_jd_event_code will_fail_event_code; + + union { + enum kbase_atom_enter_protected_state enter; + enum kbase_atom_exit_protected_state exit; + } protected_state; + + struct rb_node runnable_tree_node; + + u32 age; +}; + +static inline bool kbase_jd_katom_is_protected( + const struct kbase_jd_atom *katom) +{ + return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); +} + +/* + * Theory of operations: + * + * Atom objects are statically allocated within the context structure. + * + * Each atom is the head of two lists, one for the "left" set of dependencies, + * one for the "right" set. + */ + +#define KBASE_JD_DEP_QUEUE_SIZE 256 + +/** + * enum kbase_jd_renderpass_state - State of a renderpass + * @KBASE_JD_RP_COMPLETE: Unused or completed renderpass. Can only transition to + * START. + * @KBASE_JD_RP_START: Renderpass making a first attempt at tiling. + * Can transition to PEND_OOM or COMPLETE. + * @KBASE_JD_RP_PEND_OOM: Renderpass whose first attempt at tiling used too much + * memory and has a soft-stop pending. Can transition to + * OOM or COMPLETE. + * @KBASE_JD_RP_OOM: Renderpass whose first attempt at tiling used too much + * memory and therefore switched to incremental + * rendering. The fragment job chain is forced to run. + * Can only transition to RETRY. + * @KBASE_JD_RP_RETRY: Renderpass making a second or subsequent attempt at + * tiling. Can transition to RETRY_PEND_OOM or COMPLETE. + * @KBASE_JD_RP_RETRY_PEND_OOM: Renderpass whose second or subsequent attempt at + * tiling used too much memory again and has a + * soft-stop pending. Can transition to RETRY_OOM + * or COMPLETE. + * @KBASE_JD_RP_RETRY_OOM: Renderpass whose second or subsequent attempt at + * tiling used too much memory again. The fragment job + * chain is forced to run. Can only transition to RETRY. + * + * A state machine is used to control incremental rendering. + */ +enum kbase_jd_renderpass_state { + KBASE_JD_RP_COMPLETE, /* COMPLETE => START */ + KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */ + KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */ + KBASE_JD_RP_OOM, /* OOM => RETRY */ + KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or + * COMPLETE + */ + KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or + * COMPLETE + */ + KBASE_JD_RP_RETRY_OOM, /* RETRY_OOM => RETRY */ +}; + +/** + * struct kbase_jd_renderpass - Data for a renderpass + * @state: Current state of the renderpass. If KBASE_JD_RP_COMPLETE then + * all other members are invalid. + * Both the job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @start_katom: Address of the atom that is the start of a renderpass. + * Both the job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @end_katom: Address of the atom that is the end of a renderpass, or NULL + * if that atom hasn't been added to the job scheduler yet. + * The job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @oom_reg_list: A list of region structures which triggered out-of-memory. + * The hwaccess_lock must be locked to access this. + * + * Atoms tagged with BASE_JD_REQ_START_RENDERPASS or BASE_JD_REQ_END_RENDERPASS + * are associated with an object of this type, which is created and maintained + * by kbase to keep track of each renderpass. + */ +struct kbase_jd_renderpass { + enum kbase_jd_renderpass_state state; + struct kbase_jd_atom *start_katom; + struct kbase_jd_atom *end_katom; + struct list_head oom_reg_list; +}; + +/** + * struct kbase_jd_context - per context object encapsulating all the + * Job dispatcher related state. + * @lock: lock to serialize the updates made to the + * Job dispatcher state and kbase_jd_atom objects. + * @sched_info: Structure encapsulating all the Job scheduling + * info. + * @atoms: Array of the objects representing atoms, + * containing the complete state and attributes + * of an atom. + * @renderpasses: Array of renderpass state for incremental + * rendering, indexed by user-specified renderpass + * ID. + * @job_nr: Tracks the number of atoms being processed by the + * kbase. This includes atoms that are not tracked by + * scheduler: 'not ready to run' & 'dependency-only' + * jobs. + * @zero_jobs_wait: Waitq that reflects whether there are no jobs + * (including SW-only dependency jobs). This is set + * when no jobs are present on the ctx, and clear + * when there are jobs. + * This must be updated atomically with @job_nr. + * note: Job Dispatcher knows about more jobs than + * the Job Scheduler as it is unaware of jobs that + * are blocked on dependencies and SW-only dependency + * jobs. This waitq can be waited upon to find out + * when the context jobs are all done/cancelled + * (including those that might've been blocked + * on dependencies) - and so, whether it can be + * terminated. However, it should only be terminated + * once it is not present in the run-pool. + * Since the waitq is only set under @lock, + * the waiter should also briefly obtain and drop + * @lock to guarantee that the setter has completed + * its work on the kbase_context + * @job_done_wq: Workqueue to which the per atom work item is + * queued for bottom half processing when the + * atom completes + * execution on GPU or the input fence get signaled. + * @tb_lock: Lock to serialize the write access made to @tb to + * to store the register access trace messages. + * @tb: Pointer to the Userspace accessible buffer storing + * the trace messages for register read/write + * accesses made by the Kbase. The buffer is filled + * in circular fashion. + * @tb_wrap_offset: Offset to the end location in the trace buffer, + * the write pointer is moved to the beginning on + * reaching this offset. + * @work_id: atomic variable used for GPU tracepoints, + * incremented on every call to base_jd_submit. + */ +struct kbase_jd_context { + struct mutex lock; + struct kbasep_js_kctx_info sched_info; + struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; + struct kbase_jd_renderpass renderpasses[BASE_JD_RP_COUNT]; + struct workqueue_struct *job_done_wq; + + wait_queue_head_t zero_jobs_wait; + spinlock_t tb_lock; + u32 *tb; + u32 job_nr; + size_t tb_wrap_offset; + +#ifdef CONFIG_GPU_TRACEPOINTS + atomic_t work_id; +#endif +}; + +/** + * struct jsctx_queue - JS context atom queue + * @runnable_tree: Root of RB-tree containing currently runnable atoms on this + * job slot. + * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot + * dependencies. Atoms on this list will be moved to the + * runnable_tree when the blocking atom completes. + * + * hwaccess_lock must be held when accessing this structure. + */ +struct jsctx_queue { + struct rb_root runnable_tree; + struct list_head x_dep_head; +}; + +#endif /* _KBASE_JM_DEFS_H_ */ diff --git a/mali_kbase/jm/mali_kbase_jm_ioctl.h b/mali_kbase/jm/mali_kbase_jm_ioctl.h new file mode 100644 index 0000000..127d990 --- /dev/null +++ b/mali_kbase/jm/mali_kbase_jm_ioctl.h @@ -0,0 +1,134 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_JM_IOCTL_H_ +#define _KBASE_JM_IOCTL_H_ + +#include <asm-generic/ioctl.h> +#include <linux/types.h> + +/* + * 11.1: + * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags + * 11.2: + * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED, + * which some user-side clients prior to 11.2 might fault if they received + * them + * 11.3: + * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and + * KBASE_IOCTL_STICKY_RESOURCE_UNMAP + * 11.4: + * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET + * 11.5: + * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD) + * 11.6: + * - Added flags field to base_jit_alloc_info structure, which can be used to + * specify pseudo chunked tiler alignment for JIT allocations. + * 11.7: + * - Removed UMP support + * 11.8: + * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags + * 11.9: + * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY + * under base_mem_alloc_flags + * 11.10: + * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for + * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations + * with one softjob. + * 11.11: + * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags + * 11.12: + * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS + * 11.13: + * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT + * 11.14: + * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set + * under base_mem_alloc_flags + * 11.15: + * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags. + * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be + * passed to mmap(). + * 11.16: + * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf. + * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for + * dma-buf. Now, buffers are mapped on GPU when first imported, no longer + * requiring external resource or sticky resource tracking. UNLESS, + * CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled. + * 11.17: + * - Added BASE_JD_REQ_JOB_SLOT. + * - Reused padding field in base_jd_atom_v2 to pass job slot number. + * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO + * 11.18: + * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags + * 11.19: + * - Extended base_jd_atom_v2 to allow a renderpass ID to be specified. + * 11.20: + * - Added new phys_pages member to kbase_ioctl_mem_jit_init for + * KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2 + * (replacing '_OLD') and _11_5 suffixes + * - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in + * base_jd_atom_v2. It must currently be initialized to zero. + * - Added heap_info_gpu_addr to base_jit_alloc_info, and + * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's + * flags member. Previous variants of this structure are kept and given _10_2 + * and _11_5 suffixes. + * - The above changes are checked for safe values in usual builds + */ +#define BASE_UK_VERSION_MAJOR 11 +#define BASE_UK_VERSION_MINOR 20 + +/** + * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel + * + * @addr: Memory address of an array of struct base_jd_atom_v2 + * @nr_atoms: Number of entries in the array + * @stride: sizeof(struct base_jd_atom_v2) + */ +struct kbase_ioctl_job_submit { + __u64 addr; + __u32 nr_atoms; + __u32 stride; +}; + +#define KBASE_IOCTL_JOB_SUBMIT \ + _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) + +#define KBASE_IOCTL_POST_TERM \ + _IO(KBASE_IOCTL_TYPE, 4) + +/** + * struct kbase_ioctl_soft_event_update - Update the status of a soft-event + * @event: GPU address of the event which has been updated + * @new_status: The new status to set + * @flags: Flags for future expansion + */ +struct kbase_ioctl_soft_event_update { + __u64 event; + __u32 new_status; + __u32 flags; +}; + +#define KBASE_IOCTL_SOFT_EVENT_UPDATE \ + _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) + + +#endif /* _KBASE_JM_IOCTL_H_ */ diff --git a/mali_kbase/jm/mali_kbase_jm_js.h b/mali_kbase/jm/mali_kbase_jm_js.h new file mode 100644 index 0000000..6c222ce --- /dev/null +++ b/mali_kbase/jm/mali_kbase_jm_js.h @@ -0,0 +1,892 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Job Scheduler Interface. + * These interfaces are Internal to KBase. + */ + +#ifndef _KBASE_JM_JS_H_ +#define _KBASE_JM_JS_H_ + +#include "mali_kbase_js_ctx_attr.h" + +/** + * kbasep_js_devdata_init - Initialize the Job Scheduler + * + * The struct kbasep_js_device_data sub-structure of kbdev must be zero + * initialized before passing to the kbasep_js_devdata_init() function. This is + * to give efficient error path code. + */ +int kbasep_js_devdata_init(struct kbase_device * const kbdev); + +/** + * kbasep_js_devdata_halt - Halt the Job Scheduler. + * + * It is safe to call this on kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the struct kbasep_js_device_data sub-structure of kbdev + * must be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a programming error to call this whilst there are still kbase_context + * structures registered with this scheduler. + * + */ +void kbasep_js_devdata_halt(struct kbase_device *kbdev); + +/** + * kbasep_js_devdata_term - Terminate the Job Scheduler + * + * It is safe to call this on kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the struct kbasep_js_device_data sub-structure of kbdev + * must be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a programming error to call this whilst there are still kbase_context + * structures registered with this scheduler. + */ +void kbasep_js_devdata_term(struct kbase_device *kbdev); + +/** + * kbasep_js_kctx_init - Initialize the Scheduling Component of a + * struct kbase_context on the Job Scheduler. + * + * This effectively registers a struct kbase_context with a Job Scheduler. + * + * It does not register any jobs owned by the struct kbase_context with + * the scheduler. Those must be separately registered by kbasep_js_add_job(). + * + * The struct kbase_context must be zero initialized before passing to the + * kbase_js_init() function. This is to give efficient error path code. + */ +int kbasep_js_kctx_init(struct kbase_context *const kctx); + +/** + * kbasep_js_kctx_term - Terminate the Scheduling Component of a + * struct kbase_context on the Job Scheduler + * + * This effectively de-registers a struct kbase_context from its Job Scheduler + * + * It is safe to call this on a struct kbase_context that has never had or + * failed initialization of its jctx.sched_info member, to give efficient + * error-path code. + * + * For this to work, the struct kbase_context must be zero intitialized before + * passing to the kbase_js_init() function. + * + * It is a Programming Error to call this whilst there are still jobs + * registered with this context. + */ +void kbasep_js_kctx_term(struct kbase_context *kctx); + +/** + * kbasep_js_add_job - Add a job chain to the Job Scheduler, + * and take necessary actions to + * schedule the context/run the job. + * + * This atomically does the following: + * * Update the numbers of jobs information + * * Add the job to the run pool if necessary (part of init_job) + * + * Once this is done, then an appropriate action is taken: + * * If the ctx is scheduled, it attempts to start the next job (which might be + * this added job) + * * Otherwise, and if this is the first job on the context, it enqueues it on + * the Policy Queue + * + * The Policy's Queue can be updated by this in the following ways: + * * In the above case that this is the first job on the context + * * If the context is high priority and the context is not scheduled, then it + * could cause the Policy to schedule out a low-priority context, allowing + * this context to be scheduled in. + * + * If the context is already scheduled on the RunPool, then adding a job to it + * is guaranteed not to update the Policy Queue. And so, the caller is + * guaranteed to not need to try scheduling a context from the Run Pool - it + * can safely assert that the result is false. + * + * It is a programming error to have more than U32_MAX jobs in flight at a time. + * + * The following locking conditions are made on the caller: + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold hwaccess_lock (as this will be obtained internally) + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used + * internally). + * + * Return: true indicates that the Policy Queue was updated, and so the + * caller will need to try scheduling a context onto the Run Pool, + * false indicates that no updates were made to the Policy Queue, + * so no further action is required from the caller. This is always returned + * when the context is currently scheduled. + */ +bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** + * kbasep_js_remove_job - Remove a job chain from the Job Scheduler, + * except for its 'retained state'. + * + * Completely removing a job requires several calls: + * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of + * the atom + * * kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler + * * kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the + * remaining state held as part of the job having been run. + * + * In the common case of atoms completing normally, this set of actions is more + * optimal for spinlock purposes than having kbasep_js_remove_job() handle all + * of the actions. + * + * In the case of canceling atoms, it is easier to call + * kbasep_js_remove_cancelled_job(), which handles all the necessary actions. + * + * It is a programming error to call this when: + * * a atom is not a job belonging to kctx. + * * a atom has already been removed from the Job Scheduler. + * * a atom is still in the runpool + * + * Do not use this for removing jobs being killed by kbase_jd_cancel() - use + * kbasep_js_remove_cancelled_job() instead. + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * + */ +void kbasep_js_remove_job(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** + * kbasep_js_remove_cancelled_job - Completely remove a job chain from the + * Job Scheduler, in the case + * where the job chain was cancelled. + * + * This is a variant of kbasep_js_remove_job() that takes care of removing all + * of the retained state too. This is generally useful for cancelled atoms, + * which need not be handled in an optimal way. + * + * It is a programming error to call this when: + * * a atom is not a job belonging to kctx. + * * a atom has already been removed from the Job Scheduler. + * * a atom is still in the runpool: + * * it is not being killed with kbasep_jd_cancel() + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold the hwaccess_lock, (as this will be obtained + * internally) + * * it must not hold kbasep_js_device_data::runpool_mutex (as this could be + * obtained internally) + * + * Return: true indicates that ctx attributes have changed and the caller + * should call kbase_js_sched_all() to try to run more jobs and + * false otherwise. + */ +bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * kbasep_js_runpool_requeue_or_kill_ctx - Handling the requeuing/killing of a + * context that was evicted from the + * policy queue or runpool. + * + * This should be used whenever handing off a context that has been evicted + * from the policy queue or the runpool: + * * If the context is not dying and has jobs, it gets re-added to the policy + * queue + * * Otherwise, it is not added + * + * In addition, if the context is dying the jobs are killed asynchronously. + * + * In all cases, the Power Manager active reference is released + * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. + * has_pm_ref must be set to false whenever the context was not previously in + * the runpool and does not hold a Power Manager active refcount. Note that + * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an + * active refcount even though they weren't in the runpool. + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + */ +void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, bool has_pm_ref); + +/** + * kbasep_js_runpool_release_ctx - Release a refcount of a context being busy, + * allowing it to be scheduled out. + * + * When the refcount reaches zero and the context might be scheduled out + * (depending on whether the Scheduling Policy has deemed it so, or if it has + * run out of jobs). + * + * If the context does get scheduled out, then The following actions will be + * taken as part of deschduling a context: + * For the context being descheduled: + * * If the context is in the processing of dying (all the jobs are being + * removed from it), then descheduling also kills off any jobs remaining in the + * context. + * * If the context is not dying, and any jobs remain after descheduling the + * context then it is re-enqueued to the Policy's Queue. + * * Otherwise, the context is still known to the scheduler, but remains absent + * from the Policy Queue until a job is next added to it. + * * In all descheduling cases, the Power Manager active reference (obtained + * during kbasep_js_try_schedule_head_ctx()) is released + * (kbase_pm_context_idle()). + * + * Whilst the context is being descheduled, this also handles actions that + * cause more atoms to be run: + * * Attempt submitting atoms when the Context Attributes on the Runpool have + * changed. This is because the context being scheduled out could mean that + * there are more opportunities to run atoms. + * * Attempt submitting to a slot that was previously blocked due to affinity + * restrictions. This is usually only necessary when releasing a context + * happens as part of completing a previous job, but is harmless nonetheless. + * * Attempt scheduling in a new context (if one is available), and if + * necessary, running a job from that new context. + * + * Unlike retaining a context in the runpool, this function cannot be called + * from IRQ context. + * + * It is a programming error to call this on a kctx that is not currently + * scheduled, or that already has a zero refcount. + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + * + */ +void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of + * kbasep_js_runpool_release_ctx() that handles additional + * actions from completing an atom. + * + * This is usually called as part of completing an atom and releasing the + * refcount on the context held by the atom. + * + * Therefore, the extra actions carried out are part of handling actions queued + * on a completed atom, namely: + * * Releasing the atom's context attributes + * * Retrying the submission on a particular slot, because we couldn't submit + * on that slot from an IRQ handler. + * + * The locking conditions of this function are the same as those for + * kbasep_js_runpool_release_ctx() + */ +void kbasep_js_runpool_release_ctx_and_katom_retained_state( + struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state); + +/** + * kbasep_js_runpool_release_ctx_nolock - Variant of + * kbase_js_runpool_release_ctx() that assumes that + * kbasep_js_device_data::runpool_mutex and + * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not + * attempt to schedule new contexts. + */ +void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_schedule_privileged_ctx - Schedule in a privileged context + * + * This schedules a context in regardless of the context priority. + * If the runpool is full, a context will be forced out of the runpool and the + * function will wait for the new context to be scheduled in. + * The context will be kept scheduled in (and the corresponding address space + * reserved) until kbasep_js_release_privileged_ctx is called). + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used + * internally). + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will + * be used internally. + * + */ +void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_release_privileged_ctx - Release a privileged context, + * allowing it to be scheduled out. + * + * See kbasep_js_runpool_release_ctx for potential side effects. + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * + */ +void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_js_try_run_jobs - Try to submit the next job on each slot + * + * The following locks may be used: + * * kbasep_js_device_data::runpool_mutex + * * hwaccess_lock + */ +void kbase_js_try_run_jobs(struct kbase_device *kbdev); + +/** + * kbasep_js_suspend - Suspend the job scheduler during a Power Management + * Suspend event. + * + * Causes all contexts to be removed from the runpool, and prevents any + * contexts from (re)entering the runpool. + * + * This does not handle suspending the one privileged context: the caller must + * instead do this by by suspending the GPU HW Counter Instrumentation. + * + * This will eventually cause all Power Management active references held by + * contexts on the runpool to be released, without running any more atoms. + * + * The caller must then wait for all Power Management active refcount to become + * zero before completing the suspend. + * + * The emptying mechanism may take some time to complete, since it can wait for + * jobs to complete naturally instead of forcing them to end quickly. However, + * this is bounded by the Job Scheduler's Job Timeouts. Hence, this + * function is guaranteed to complete in a finite time. + */ +void kbasep_js_suspend(struct kbase_device *kbdev); + +/** + * kbasep_js_resume - Resume the Job Scheduler after a Power Management + * Resume event. + * + * This restores the actions from kbasep_js_suspend(): + * * Schedules contexts back into the runpool + * * Resumes running atoms on the GPU + */ +void kbasep_js_resume(struct kbase_device *kbdev); + +/** + * kbase_js_dep_resolved_submit - Submit an atom to the job scheduler. + * + * @kctx: Context pointer + * @atom: Pointer to the atom to submit + * + * The atom is enqueued on the context's ringbuffer. The caller must have + * ensured that all dependencies can be represented in the ringbuffer. + * + * Caller must hold jctx->lock + * + * Return: true if the context requires to be enqueued, otherwise false. + */ +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. + * @kctx: Context Pointer + * @prio: Priority (specifies the queue together with js). + * @js: Job slot (specifies the queue together with prio). + * + * Pushes all possible atoms from the linked list to the ringbuffer. + * Number of atoms are limited to free space in the ringbuffer and + * number of available atoms in the linked list. + * + */ +void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); + +/** + * kbase_js_pull - Pull an atom from a context in the job scheduler for + * execution. + * + * @kctx: Context to pull from + * @js: Job slot to pull from + * + * The atom will not be removed from the ringbuffer at this stage. + * + * The HW access lock must be held when calling this function. + * + * Return: a pointer to an atom, or NULL if there are no atoms for this + * slot that can be currently run. + */ +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); + +/** + * kbase_js_unpull - Return an atom to the job scheduler ringbuffer. + * + * @kctx: Context pointer + * @atom: Pointer to the atom to unpull + * + * An atom is 'unpulled' if execution is stopped but intended to be returned to + * later. The most common reason for this is that the atom has been + * soft-stopped. Another reason is if an end-of-renderpass atom completed + * but will need to be run again as part of the same renderpass. + * + * Note that if multiple atoms are to be 'unpulled', they must be returned in + * the reverse order to which they were originally pulled. It is a programming + * error to return atoms in any other order. + * + * The HW access lock must be held when calling this function. + * + */ +void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** + * kbase_js_complete_atom_wq - Complete an atom from jd_done_worker(), + * removing it from the job + * scheduler ringbuffer. + * @kctx: Context pointer + * @katom: Pointer to the atom to complete + * + * If the atom failed then all dependee atoms marked for failure propagation + * will also fail. + * + * Return: true if the context is now idle (no jobs pulled) false otherwise. + */ +bool kbase_js_complete_atom_wq(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * kbase_js_complete_atom - Complete an atom. + * + * @katom: Pointer to the atom to complete + * @end_timestamp: The time that the atom completed (may be NULL) + * + * Most of the work required to complete an atom will be performed by + * jd_done_worker(). + * + * The HW access lock must be held when calling this function. + * + * Return: a atom that has now been unblocked and can now be run, or NULL + * if none + */ +struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, + ktime_t *end_timestamp); + +/** + * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot + * dependency + * @katom: Pointer to an atom in the slot ringbuffer + * + * A cross-slot dependency is ignored if necessary to unblock incremental + * rendering. If the atom at the start of a renderpass used too much memory + * and was soft-stopped then the atom at the end of a renderpass is submitted + * to hardware regardless of its dependency on the start-of-renderpass atom. + * This can happen multiple times for the same pair of atoms. + * + * Return: true to block the atom or false to allow it to be submitted to + * hardware. + */ +bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom); + +/** + * kbase_js_sched - Submit atoms from all available contexts. + * + * @kbdev: Device pointer + * @js_mask: Mask of job slots to submit to + * + * This will attempt to submit as many jobs as possible to the provided job + * slots. It will exit when either all job slots are full, or all contexts have + * been used. + * + */ +void kbase_js_sched(struct kbase_device *kbdev, int js_mask); + +/** + * kbase_jd_zap_context - Attempt to deschedule a context that is being + * destroyed + * @kctx: Context pointer + * + * This will attempt to remove a context from any internal job scheduler queues + * and perform any other actions to ensure a context will not be submitted + * from. + * + * If the context is currently scheduled, then the caller must wait for all + * pending jobs to complete before taking any further action. + */ +void kbase_js_zap_context(struct kbase_context *kctx); + +/** + * kbase_js_is_atom_valid - Validate an atom + * + * @kbdev: Device pointer + * @katom: Atom to validate + * + * This will determine whether the atom can be scheduled onto the GPU. Atoms + * with invalid combinations of core requirements will be rejected. + * + * Return: true if atom is valid false otherwise. + */ +bool kbase_js_is_atom_valid(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_js_set_timeouts - update all JS timeouts with user specified data + * + * @kbdev: Device pointer + * + * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is + * set to a positive number then that becomes the new value used, if a timeout + * is negative then the default is set. + */ +void kbase_js_set_timeouts(struct kbase_device *kbdev); + +/** + * kbase_js_set_ctx_priority - set the context priority + * + * @kctx: Context pointer + * @new_priority: New priority value for the Context + * + * The context priority is set to a new value and it is moved to the + * pullable/unpullable list as per the new priority. + */ +void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); + + +/** + * kbase_js_update_ctx_priority - update the context priority + * + * @kctx: Context pointer + * + * The context priority gets updated as per the priority of atoms currently in + * use for that context, but only if system priority mode for context scheduling + * is being used. + */ +void kbase_js_update_ctx_priority(struct kbase_context *kctx); + +/* + * Helpers follow + */ + +/** + * kbasep_js_is_submit_allowed - Check that a context is allowed to submit + * jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * As with any bool, never test the return value with true. + * + * The caller must hold hwaccess_lock. + */ +static inline bool kbasep_js_is_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 test_bit; + bool is_allowed; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + test_bit = (u16) (1u << kctx->as_nr); + + is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); + dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %p (as=%d)", + is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); + return is_allowed; +} + +/** + * kbasep_js_set_submit_allowed - Allow a context to submit jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * The caller must hold hwaccess_lock. + */ +static inline void kbasep_js_set_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 set_bit; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + set_bit = (u16) (1u << kctx->as_nr); + + dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", + kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed |= set_bit; +} + +/** + * kbasep_js_clear_submit_allowed - Prevent a context from submitting more + * jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * The caller must hold hwaccess_lock. + */ +static inline void kbasep_js_clear_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 clear_bit; + u16 clear_mask; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + clear_bit = (u16) (1u << kctx->as_nr); + clear_mask = ~clear_bit; + + dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", + kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed &= clear_mask; +} + +/** + * Create an initial 'invalid' atom retained state, that requires no + * atom-related work to be done on releasing with + * kbasep_js_runpool_release_ctx_and_katom_retained_state() + */ +static inline void kbasep_js_atom_retained_state_init_invalid( + struct kbasep_js_atom_retained_state *retained_state) +{ + retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; + retained_state->core_req = + KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; +} + +/** + * Copy atom state that can be made available after jd_done_nolock() is called + * on that atom. + */ +static inline void kbasep_js_atom_retained_state_copy( + struct kbasep_js_atom_retained_state *retained_state, + const struct kbase_jd_atom *katom) +{ + retained_state->event_code = katom->event_code; + retained_state->core_req = katom->core_req; + retained_state->sched_priority = katom->sched_priority; + retained_state->device_nr = katom->device_nr; +} + +/** + * kbasep_js_has_atom_finished - Determine whether an atom has finished + * (given its retained state), + * and so should be given back to + * userspace/removed from the system. + * + * @katom_retained_state: the retained state of the atom to check + * + * Reasons for an atom not finishing include: + * * Being soft-stopped (and so, the atom should be resubmitted sometime later) + * * It is an end of renderpass atom that was run to consume the output of a + * start-of-renderpass atom that was soft-stopped because it used too much + * memory. In this case, it will have to be run again later. + * + * Return: false if the atom has not finished, true otherwise. + */ +static inline bool kbasep_js_has_atom_finished( + const struct kbasep_js_atom_retained_state *katom_retained_state) +{ + return (bool) (katom_retained_state->event_code != + BASE_JD_EVENT_STOPPED && + katom_retained_state->event_code != + BASE_JD_EVENT_REMOVED_FROM_NEXT && + katom_retained_state->event_code != + BASE_JD_EVENT_END_RP_DONE); +} + +/** + * kbasep_js_atom_retained_state_is_valid - Determine whether a struct + * kbasep_js_atom_retained_state + * is valid + * @katom_retained_state the atom's retained state to check + * + * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates + * that the code should just ignore it. + * + * Return: false if the retained state is invalid, true otherwise. + */ +static inline bool kbasep_js_atom_retained_state_is_valid( + const struct kbasep_js_atom_retained_state *katom_retained_state) +{ + return (bool) (katom_retained_state->core_req != + KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); +} + +/** + * kbase_js_runpool_inc_context_count - Increment number of running contexts. + * + * The following locking conditions are made on the caller: + * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * * The caller must hold the kbasep_js_device_data::runpool_mutex + */ +static inline void kbase_js_runpool_inc_context_count( + struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Track total contexts */ + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); + ++(js_devdata->nr_all_contexts_running); + + if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + /* Track contexts that can submit jobs */ + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < + S8_MAX); + ++(js_devdata->nr_user_contexts_running); + } +} + +/** + * kbase_js_runpool_dec_context_count - decrement number of running contexts. + * + * The following locking conditions are made on the caller: + * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * * The caller must hold the kbasep_js_device_data::runpool_mutex + */ +static inline void kbase_js_runpool_dec_context_count( + struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Track total contexts */ + --(js_devdata->nr_all_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); + + if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + /* Track contexts that can submit jobs */ + --(js_devdata->nr_user_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); + } +} + +/** + * kbase_js_sched_all - Submit atoms from all available contexts to all + * job slots. + * + * @kbdev: Device pointer + * + * This will attempt to submit as many jobs as possible. It will exit when + * either all job slots are full, or all contexts have been used. + */ +static inline void kbase_js_sched_all(struct kbase_device *kbdev) +{ + kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); +} + +extern const int +kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; + +extern const base_jd_prio +kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + +/** + * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio) + * to relative ordering + * @atom_prio: Priority ID to translate. + * + * Atom priority values for @ref base_jd_prio cannot be compared directly to + * find out which are higher or lower. + * + * This function will convert base_jd_prio values for successively lower + * priorities into a monotonically increasing sequence. That is, the lower the + * base_jd_prio priority, the higher the value produced by this function. This + * is in accordance with how the rest of the kernel treats priority. + * + * The mapping is 1:1 and the size of the valid input range is the same as the + * size of the valid output range, i.e. + * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS + * + * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions + * + * Return: On success: a value in the inclusive range + * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: + * KBASE_JS_ATOM_SCHED_PRIO_INVALID + */ +static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) +{ + if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) + return KBASE_JS_ATOM_SCHED_PRIO_INVALID; + + return kbasep_js_atom_priority_to_relative[atom_prio]; +} + +static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) +{ + unsigned int prio_idx; + + KBASE_DEBUG_ASSERT(sched_prio >= 0 && + sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT); + + prio_idx = (unsigned int)sched_prio; + + return kbasep_js_relative_priority_to_atom[prio_idx]; +} + +#endif /* _KBASE_JM_JS_H_ */ diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h index 399d1b6..94c89fa 100644 --- a/mali_kbase/mali_base_hwconfig_issues.h +++ b/mali_kbase/mali_base_hwconfig_issues.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -57,6 +57,7 @@ enum base_hw_issue { BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -209,6 +210,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { BASE_HW_ISSUE_TSIX_1792, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -221,6 +223,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { BASE_HW_ISSUE_TSIX_1792, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -232,6 +235,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -242,6 +246,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -251,6 +256,7 @@ static const enum base_hw_issue base_hw_issues_model_tSIx[] = { BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -261,6 +267,7 @@ static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -270,6 +277,7 @@ static const enum base_hw_issue base_hw_issues_model_tDVx[] = { BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -281,6 +289,7 @@ static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -290,6 +299,7 @@ static const enum base_hw_issue base_hw_issues_model_tNOx[] = { BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -301,6 +311,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -312,6 +323,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { BASE_HW_ISSUE_TGOX_R1_1234, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -321,6 +333,7 @@ static const enum base_hw_issue base_hw_issues_model_tGOx[] = { BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -335,6 +348,7 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -349,6 +363,22 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -360,6 +390,7 @@ static const enum base_hw_issue base_hw_issues_model_tTRx[] = { BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -374,6 +405,7 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -388,6 +420,7 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -399,6 +432,7 @@ static const enum base_hw_issue base_hw_issues_model_tNAx[] = { BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -411,6 +445,20 @@ static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -423,6 +471,20 @@ static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -434,6 +496,7 @@ static const enum base_hw_issue base_hw_issues_model_tBEx[] = { BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -444,7 +507,6 @@ static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_END }; @@ -455,7 +517,6 @@ static const enum base_hw_issue base_hw_issues_model_tDUx[] = { BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_END }; @@ -463,7 +524,6 @@ static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_END }; @@ -472,7 +532,6 @@ static const enum base_hw_issue base_hw_issues_model_tODx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_END }; @@ -480,7 +539,6 @@ static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_END }; @@ -489,7 +547,6 @@ static const enum base_hw_issue base_hw_issues_model_tGRx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_END }; @@ -497,7 +554,6 @@ static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_END }; @@ -506,7 +562,6 @@ static const enum base_hw_issue base_hw_issues_model_tVAx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_END }; @@ -532,7 +587,6 @@ static const enum base_hw_issue base_hw_issues_tE2x_r0p0[] = { BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_END }; @@ -543,7 +597,6 @@ static const enum base_hw_issue base_hw_issues_model_tE2x[] = { BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_END }; diff --git a/mali_kbase/mali_base_kernel.h b/mali_kbase/mali_base_kernel.h index 8687736..1e2744d 100644 --- a/mali_kbase/mali_base_kernel.h +++ b/mali_kbase/mali_base_kernel.h @@ -29,27 +29,16 @@ #ifndef _BASE_KERNEL_H_ #define _BASE_KERNEL_H_ -typedef struct base_mem_handle { +struct base_mem_handle { struct { u64 handle; } basep; -} base_mem_handle; +}; #include "mali_base_mem_priv.h" #include "gpu/mali_kbase_gpu_coherency.h" #include "gpu/mali_kbase_gpu_id.h" -/* - * Dependency stuff, keep it private for now. May want to expose it if - * we decide to make the number of semaphores a configurable - * option. - */ -#define BASE_JD_ATOM_COUNT 256 - -/* Set/reset values for a software event */ -#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) -#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) - #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 #define BASE_MAX_COHERENT_GROUPS 16 @@ -76,16 +65,6 @@ typedef struct base_mem_handle { #endif #endif -/** - * @addtogroup base_user_api User-side Base APIs - * @{ - */ - -/** - * @addtogroup base_user_api_memory User-side Base Memory APIs - * @{ - */ - /* Physical memory group ID for normal usage. */ #define BASE_MEM_GROUP_DEFAULT (0) @@ -108,193 +87,6 @@ typedef struct base_mem_handle { */ typedef u32 base_mem_alloc_flags; -/* Memory allocation, access/hint flags. - * - * See base_mem_alloc_flags. - */ - -/* IN */ -/* Read access CPU side - */ -#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) - -/* Write access CPU side - */ -#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) - -/* Read access GPU side - */ -#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) - -/* Write access GPU side - */ -#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) - -/* Execute allowed on the GPU side - */ -#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) - -/* Will be permanently mapped in kernel space. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) - -/* The allocation will completely reside within the same 4GB chunk in the GPU - * virtual space. - * Since this flag is primarily required only for the TLS memory which will - * not be used to contain executable code and also not used for Tiler heap, - * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. - */ -#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) - -/* Userspace is not allowed to free this memory. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) - -#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) - -/* Grow backing store on GPU Page Fault - */ -#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) - -/* Page coherence Outer shareable, if available - */ -#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) - -/* Page coherence Inner shareable - */ -#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) - -/* Should be cached on the CPU - */ -#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) - -/* IN/OUT */ -/* Must have same VA on both the GPU and the CPU - */ -#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) - -/* OUT */ -/* Must call mmap to acquire a GPU address for the alloc - */ -#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) - -/* IN */ -/* Page coherence Outer shareable, required. - */ -#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) - -/* Protected memory - */ -#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) - -/* Not needed physical memory - */ -#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) - -/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the - * addresses to be the same - */ -#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) - -/** - * Bit 19 is reserved. - * - * Do not remove, use the next unreserved bit for new flags - */ -#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) -#define BASE_MEM_MAYBE_RESERVED_BIT_19 BASE_MEM_RESERVED_BIT_19 - -/** - * Memory starting from the end of the initial commit is aligned to 'extent' - * pages, where 'extent' must be a power of 2 and no more than - * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES - */ -#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) - -/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu mode. - * Some components within the GPU might only be able to access memory that is - * GPU cacheable. Refer to the specific GPU implementation for more details. - * The 3 shareability flags will be ignored for GPU uncached memory. - * If used while importing USER_BUFFER type memory, then the import will fail - * if the memory is not aligned to GPU and CPU cache line width. - */ -#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) - -/* - * Bits [22:25] for group_id (0~15). - * - * base_mem_group_id_set() should be used to pack a memory group ID into a - * base_mem_alloc_flags value instead of accessing the bits directly. - * base_mem_group_id_get() should be used to extract the memory group ID from - * a base_mem_alloc_flags value. - */ -#define BASEP_MEM_GROUP_ID_SHIFT 22 -#define BASE_MEM_GROUP_ID_MASK \ - ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) - -/* Must do CPU cache maintenance when imported memory is mapped/unmapped - * on GPU. Currently applicable to dma-buf type only. - */ -#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) - -/* Use the GPU VA chosen by the kernel client */ -#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27) - -/** - * Number of bits used as flags for base memory management - * - * Must be kept in sync with the base_mem_alloc_flags flags - */ -#define BASE_MEM_FLAGS_NR_BITS 28 - -/* A mask for all output bits, excluding IN/OUT bits. - */ -#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP - -/* A mask for all input bits, including IN/OUT bits. - */ -#define BASE_MEM_FLAGS_INPUT_MASK \ - (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) - -/** - * base_mem_group_id_get() - Get group ID from flags - * @flags: Flags to pass to base_mem_alloc - * - * This inline function extracts the encoded group ID from flags - * and converts it into numeric value (0~15). - * - * Return: group ID(0~15) extracted from the parameter - */ -static inline int base_mem_group_id_get(base_mem_alloc_flags flags) -{ - LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); - return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> - BASEP_MEM_GROUP_ID_SHIFT); -} - -/** - * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags - * @id: group ID(0~15) you want to encode - * - * This inline function encodes specific group ID into base_mem_alloc_flags. - * Parameter 'id' should lie in-between 0 to 15. - * - * Return: base_mem_alloc_flags with the group ID (id) encoded - * - * The return value can be combined with other flags against base_mem_alloc - * to identify a specific memory group. - */ -static inline base_mem_alloc_flags base_mem_group_id_set(int id) -{ - LOCAL_ASSERT(id >= 0); - LOCAL_ASSERT(id < BASE_MEM_GROUP_COUNT); - - return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) & - BASE_MEM_GROUP_ID_MASK; -} - /* A mask for all the flags which are modifiable via the base_mem_set_flags * interface. */ @@ -302,19 +94,6 @@ static inline base_mem_alloc_flags base_mem_group_id_set(int id) (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ BASE_MEM_COHERENT_LOCAL) - -/* A mask of all currently reserved flags - */ -#define BASE_MEM_FLAGS_RESERVED \ - (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_MAYBE_RESERVED_BIT_19) - -/* A mask of all the flags which are only valid for allocations within kbase, - * and may not be passed from user space. - */ -#define BASEP_MEM_FLAGS_KERNEL_ONLY \ - (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ - BASE_MEM_FLAG_MAP_FIXED) - /* A mask of all the flags that can be returned via the base_mem_get_flags() * interface. */ @@ -340,14 +119,14 @@ static inline base_mem_alloc_flags base_mem_group_id_set(int id) * as future releases from ARM might include other new types * which could clash with your custom types. */ -typedef enum base_mem_import_type { +enum base_mem_import_type { BASE_MEM_IMPORT_TYPE_INVALID = 0, /** * Import type with value 1 is deprecated. */ BASE_MEM_IMPORT_TYPE_UMM = 2, BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 -} base_mem_import_type; +}; /** * struct base_mem_import_user_buffer - Handle of an imported user buffer @@ -363,45 +142,12 @@ struct base_mem_import_user_buffer { u64 length; }; -/** - * @brief Invalid memory handle. - * - * Return value from functions returning @ref base_mem_handle on error. - * - * @warning @ref base_mem_handle_new_invalid must be used instead of this macro - * in C++ code or other situations where compound literals cannot be used. - */ -#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} }) - -/** - * @brief Special write-alloc memory handle. - * - * A special handle is used to represent a region where a special page is mapped - * with a write-alloc cache setup, typically used when the write result of the - * GPU isn't needed, but the GPU must write anyway. - * - * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro - * in C++ code or other situations where compound literals cannot be used. - */ -#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} }) - -#define BASEP_MEM_INVALID_HANDLE (0ull << 12) -#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) -#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) -#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) -/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */ -#define BASE_MEM_COOKIE_BASE (64ul << 12) -#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ - BASE_MEM_COOKIE_BASE) - /* Mask to detect 4GB boundary alignment */ #define BASE_MEM_MASK_4GB 0xfffff000UL /* Mask to detect 4GB boundary (in page units) alignment */ #define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) -/** - * Limit on the 'extent' parameter for an allocation with the +/* Limit on the 'extent' parameter for an allocation with the * BASE_MEM_TILER_ALIGN_TOP flag set * * This is the same as the maximum limit for a Buffer Descriptor's chunk size @@ -417,82 +163,22 @@ struct base_mem_import_user_buffer { /* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ #define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ - -/** - * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs - * @{ - */ - -/** - * @brief a basic memory operation (sync-set). - * - * The content of this structure is private, and should only be used - * by the accessors. - */ -typedef struct base_syncset { - struct basep_syncset basep_sset; -} base_syncset; - -/** @} end group base_user_api_memory_defered */ - -/** - * Handle to represent imported memory object. - * Simple opague handle to imported memory, can't be used - * with anything but base_external_resource_init to bind to an atom. - */ -typedef struct base_import_handle { - struct { - u64 handle; - } basep; -} base_import_handle; - -/** @} end group base_user_api_memory */ - -/** - * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs - * @{ - */ - -typedef int platform_fence_type; -#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1) - /** - * Base stream handle. + * struct base_fence - Cross-device synchronisation fence. * - * References an underlying base stream object. + * A fence is used to signal when the GPU has finished accessing a resource that + * may be shared with other devices, and also to delay work done asynchronously + * by the GPU until other devices have finished accessing a shared resource. */ -typedef struct base_stream { - struct { - int fd; - } basep; -} base_stream; - -/** - * Base fence handle. - * - * References an underlying base fence object. - */ -typedef struct base_fence { +struct base_fence { struct { int fd; int stream_fd; } basep; -} base_fence; - -/** - * @brief Per-job data - * - * This structure is used to store per-job data, and is completely unused - * by the Base driver. It can be used to store things such as callback - * function pointer, data to handle job completion. It is guaranteed to be - * untouched by the Base driver. - */ -typedef struct base_jd_udata { - u64 blob[2]; /**< per-job data array */ -} base_jd_udata; +}; /** - * @brief Memory aliasing info + * struct base_mem_aliasing_info - Memory aliasing info * * Describes a memory handle to be aliased. * A subset of the handle can be chosen for aliasing, given an offset and a @@ -506,26 +192,99 @@ typedef struct base_jd_udata { * Offset must be within the size of the handle. * Offset+length must not overrun the size of the handle. * - * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE - * @offset Offset within the handle to start aliasing from, in pages. - * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. - * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE - * specifies the number of times the special page is needed. + * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * @offset: Offset within the handle to start aliasing from, in pages. + * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. + * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * specifies the number of times the special page is needed. */ struct base_mem_aliasing_info { - base_mem_handle handle; + struct base_mem_handle handle; u64 offset; u64 length; }; -/** - * Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the +/* Maximum percentage of just-in-time memory allocation trimming to perform + * on free. + */ +#define BASE_JIT_MAX_TRIM_LEVEL (100) + +/* Maximum number of concurrent just-in-time memory allocations. + */ +#define BASE_JIT_ALLOC_COUNT (255) + +/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the * initial commit is aligned to 'extent' pages, where 'extent' must be a power * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES */ #define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) /** + * If set, the heap info address points to a u32 holding the used size in bytes; + * otherwise it points to a u64 holding the lowest address of unused memory. + */ +#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) + +/** + * Valid set of just-in-time memory allocation flags + * + * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr + * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set + * and heap_info_gpu_addr being 0 will be rejected). + */ +#define BASE_JIT_ALLOC_VALID_FLAGS \ + (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + +/* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5 + * + * jit_version is 1 + * + * Due to the lack of padding specified, user clients between 32 and 64-bit + * may have assumed a different size of the struct + * + * An array of structures was not supported + */ +struct base_jit_alloc_info_10_2 { + u64 gpu_alloc_addr; + u64 va_pages; + u64 commit_pages; + u64 extent; + u8 id; +}; + +/* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up + * to 11.19 + * + * This structure had a number of modifications during and after kernel driver + * version 11.5, but remains size-compatible throughout its version history, and + * with earlier variants compatible with future variants by requiring + * zero-initialization to the unused space in the structure. + * + * jit_version is 2 + * + * Kernel driver version history: + * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes + * must be zero. Kbase minor version was not incremented, so some + * versions of 11.5 do not have this change. + * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase + * minor version not incremented) + * 11.6: Added 'flags', replacing 1 padding byte + * 11.10: Arrays of this structure are supported + */ +struct base_jit_alloc_info_11_5 { + u64 gpu_alloc_addr; + u64 va_pages; + u64 commit_pages; + u64 extent; + u8 id; + u8 bin_id; + u8 max_allocations; + u8 flags; + u8 padding[2]; + u16 usage_id; +}; + +/** * struct base_jit_alloc_info - Structure which describes a JIT allocation * request. * @gpu_alloc_addr: The GPU virtual address to write the JIT @@ -543,13 +302,29 @@ struct base_mem_aliasing_info { * type of JIT allocation. * @max_allocations: The maximum number of allocations allowed within * the bin specified by @bin_id. Should be the same - * for all JIT allocations within the same bin. + * for all allocations within the same bin. * @flags: flags specifying the special requirements for - * the JIT allocation. + * the JIT allocation, see + * %BASE_JIT_ALLOC_VALID_FLAGS * @padding: Expansion space - should be initialised to zero * @usage_id: A hint about which allocation should be reused. * The kernel should attempt to use a previous * allocation with the same usage_id + * @heap_info_gpu_addr: Pointer to an object in GPU memory describing + * the actual usage of the region. + * + * jit_version is 3. + * + * When modifications are made to this structure, it is still compatible with + * jit_version 3 when: a) the size is unchanged, and b) new members only + * replace the padding bytes. + * + * Previous jit_version history: + * jit_version == 1, refer to &base_jit_alloc_info_10_2 + * jit_version == 2, refer to &base_jit_alloc_info_11_5 + * + * Kbase version history: + * 11.20: added @heap_info_gpu_addr */ struct base_jit_alloc_info { u64 gpu_alloc_addr; @@ -562,379 +337,17 @@ struct base_jit_alloc_info { u8 flags; u8 padding[2]; u16 usage_id; + u64 heap_info_gpu_addr; }; -/** - * @brief Job dependency type. - * - * A flags field will be inserted into the atom structure to specify whether a dependency is a data or - * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without - * changing the structure size). - * When the flag is set for a particular dependency to signal that it is an ordering only dependency then - * errors will not be propagated. - */ -typedef u8 base_jd_dep_type; - - -#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ -#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ -#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ - -/** - * @brief Job chain hardware requirements. - * - * A job chain must specify what GPU features it needs to allow the - * driver to schedule the job correctly. By not specifying the - * correct settings can/will cause an early job termination. Multiple - * values can be ORed together to specify multiple requirements. - * Special case is ::BASE_JD_REQ_DEP, which is used to express complex - * dependencies, and that doesn't execute anything on the hardware. - */ -typedef u32 base_jd_core_req; - -/* Requirements that come from the HW */ - -/** - * No requirement, dependency only - */ -#define BASE_JD_REQ_DEP ((base_jd_core_req)0) - -/** - * Requires fragment shaders - */ -#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) - -/** - * Requires compute shaders - * This covers any of the following Midgard Job types: - * - Vertex Shader Job - * - Geometry Shader Job - * - An actual Compute Shader Job - * - * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the - * job is specifically just the "Compute Shader" job type, and not the "Vertex - * Shader" nor the "Geometry Shader" job type. - */ -#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) -#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) /**< Requires tiling */ -#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) /**< Requires cache flushes */ -#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) /**< Requires value writeback */ - -/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */ - -/* Requires fragment job with AFBC encoding */ -#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) - -/** - * SW-only requirement: coalesce completion events. - * If this bit is set then completion of this atom will not cause an event to - * be sent to userspace, whether successful or not; completion events will be - * deferred until an atom completes which does not have this bit set. - * - * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. - */ -#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) - -/** - * SW Only requirement: the job chain requires a coherent core group. We don't - * mind which coherent core group is used. - */ -#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) - -/** - * SW Only requirement: The performance counters should be enabled only when - * they are needed, to reduce power consumption. - */ - -#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) - -/** - * SW Only requirement: External resources are referenced by this atom. - * When external resources are referenced no syncsets can be bundled with the atom - * but should instead be part of a NULL jobs inserted into the dependency tree. - * The first pre_dep object must be configured for the external resouces to use, - * the second pre_dep object can be used to create other dependencies. - * - * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and - * BASE_JD_REQ_SOFT_EVENT_WAIT. - */ -#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) - -/** - * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted - * to the hardware but will cause some action to happen within the driver - */ -#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) - -#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) -#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) -#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) - -/* 0x4 RESERVED for now */ - -/** - * SW only requirement: event wait/trigger job. - * - * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. - * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the - * other waiting jobs. It completes immediately. - * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it - * possible for other jobs to wait upon. It completes immediately. - */ -#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) -#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) -#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) - -#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) - -/** - * SW only requirement: Just In Time allocation - * - * This job requests a single or multiple JIT allocations through a list - * of @base_jit_alloc_info structure which is passed via the jc element of - * the atom. The number of @base_jit_alloc_info structures present in the - * list is passed via the nr_extres element of the atom - * - * It should be noted that the id entry in @base_jit_alloc_info must not - * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE. - * - * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE - * soft job to free the JIT allocation is still made. - * - * The job will complete immediately. - */ -#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) -/** - * SW only requirement: Just In Time free - * - * This job requests a single or multiple JIT allocations created by - * @BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the JIT - * allocations is passed via the jc element of the atom. - * - * The job will complete immediately. - */ -#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) - -/** - * SW only requirement: Map external resource - * - * This job requests external resource(s) are mapped once the dependencies - * of the job have been satisfied. The list of external resources are - * passed via the jc element of the atom which is a pointer to a - * @base_external_resource_list. - */ -#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) -/** - * SW only requirement: Unmap external resource - * - * This job requests external resource(s) are unmapped once the dependencies - * of the job has been satisfied. The list of external resources are - * passed via the jc element of the atom which is a pointer to a - * @base_external_resource_list. - */ -#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) - -/** - * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) - * - * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type. - * - * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job - * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. - */ -#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) - -/** - * HW Requirement: Use the base_jd_atom::device_nr field to specify a - * particular core group - * - * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority - * - * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms. - * - * If the core availability policy is keeping the required core group turned off, then - * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code. - */ -#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) - -/** - * SW Flag: If this bit is set then the successful completion of this atom - * will not cause an event to be sent to userspace - */ -#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) - -/** - * SW Flag: If this bit is set then completion of this atom will not cause an - * event to be sent to userspace, whether successful or not. - */ -#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) - -/** - * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. - * - * If this bit is set then the GPU's cache will not be cleaned and invalidated - * until a GPU job starts which does not have this bit set or a job completes - * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if - * the CPU may have written to memory addressed by the job since the last job - * without this bit set was submitted. - */ -#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) - -/** - * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. - * - * If this bit is set then the GPU's cache will not be cleaned and invalidated - * until a GPU job completes which does not have this bit set or a job starts - * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if - * the CPU may read from or partially overwrite memory addressed by the job - * before the next job without this bit set completes. - */ -#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) - -/** - * Request the atom be executed on a specific job slot. - * - * When this flag is specified, it takes precedence over any existing job slot - * selection logic. - */ -#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17) - -/** - * These requirement bits are currently unused in base_jd_core_req - */ -#define BASEP_JD_REQ_RESERVED \ - (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ - BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ - BASE_JD_REQ_EVENT_COALESCE | \ - BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ - BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ - BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \ - BASE_JD_REQ_JOB_SLOT)) - -/** - * Mask of all bits in base_jd_core_req that control the type of the atom. - * - * This allows dependency only atoms to have flags set - */ -#define BASE_JD_REQ_ATOM_TYPE \ - (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ - BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) - -/** - * Mask of all bits in base_jd_core_req that control the type of a soft job. - */ -#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) - -/* - * Returns non-zero value if core requirements passed define a soft job or - * a dependency only job. - */ -#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ - ((core_req & BASE_JD_REQ_SOFT_JOB) || \ - (core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) - -/* - * Base Atom priority - * - * Only certain priority levels are actually implemented, as specified by the - * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority - * level that is not one of those defined below. - * - * Priority levels only affect scheduling after the atoms have had dependencies - * resolved. For example, a low priority atom that has had its dependencies - * resolved might run before a higher priority atom that has not had its - * dependencies resolved. - * - * In general, fragment atoms do not affect non-fragment atoms with - * lower priorities, and vice versa. One exception is that there is only one - * priority value for each context. So a high-priority (e.g.) fragment atom - * could increase its context priority, causing its non-fragment atoms to also - * be scheduled sooner. - * - * The atoms are scheduled as follows with respect to their priorities: - * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies - * resolved, and atom 'X' has a higher priority than atom 'Y' - * - If atom 'Y' is currently running on the HW, then it is interrupted to - * allow atom 'X' to run soon after - * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing - * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' - * - Any two atoms that have the same priority could run in any order with - * respect to each other. That is, there is no ordering constraint between - * atoms of the same priority. - * - * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are - * scheduled between contexts. The default value, 0, will cause higher-priority - * atoms to be scheduled first, regardless of their context. The value 1 will - * use a round-robin algorithm when deciding which context's atoms to schedule - * next, so higher-priority atoms can only preempt lower priority atoms within - * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and - * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. - */ -typedef u8 base_jd_prio; - -/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ -#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) -/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and - * BASE_JD_PRIO_LOW */ -#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) -/* Low atom priority. */ -#define BASE_JD_PRIO_LOW ((base_jd_prio)2) - -/* Count of the number of priority levels. This itself is not a valid - * base_jd_prio setting */ -#define BASE_JD_NR_PRIO_LEVELS 3 - -enum kbase_jd_atom_state { - /** Atom is not used */ - KBASE_JD_ATOM_STATE_UNUSED, - /** Atom is queued in JD */ - KBASE_JD_ATOM_STATE_QUEUED, - /** Atom has been given to JS (is runnable/running) */ - KBASE_JD_ATOM_STATE_IN_JS, - /** Atom has been completed, but not yet handed back to job dispatcher - * for dependency resolution */ - KBASE_JD_ATOM_STATE_HW_COMPLETED, - /** Atom has been completed, but not yet handed back to userspace */ - KBASE_JD_ATOM_STATE_COMPLETED -}; - -typedef u8 base_atom_id; /**< Type big enough to store an atom number in */ - -struct base_dependency { - base_atom_id atom_id; /**< An atom number */ - base_jd_dep_type dependency_type; /**< Dependency type */ -}; - -/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value. - * In order to keep the size of the structure same, padding field has been adjusted - * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines) - * is added at the end of the structure. Place in the structure previously occupied by u16 core_req - * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission - * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left - * for possible future use. */ -typedef struct base_jd_atom_v2 { - u64 jc; /**< job-chain GPU address */ - struct base_jd_udata udata; /**< user data */ - u64 extres_list; /**< list of external resources */ - u16 nr_extres; /**< nr of external resources or JIT allocations */ - u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */ - struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, - this is done in order to reduce possibility of improper assigment of a dependency field */ - base_atom_id atom_number; /**< unique number to identify the atom */ - base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */ - u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ - u8 jobslot; /**< Job slot to use when BASE_JD_REQ_JOB_SLOT is specified */ - base_jd_core_req core_req; /**< core requirements */ -} base_jd_atom_v2; - -typedef enum base_external_resource_access { +enum base_external_resource_access { BASE_EXT_RES_ACCESS_SHARED, BASE_EXT_RES_ACCESS_EXCLUSIVE -} base_external_resource_access; +}; -typedef struct base_external_resource { +struct base_external_resource { u64 ext_resource; -} base_external_resource; +}; /** @@ -961,351 +374,32 @@ struct base_jd_debug_copy_buffer { struct base_external_resource extres; }; -/** - * @brief Setter for a dependency structure - * - * @param[in] dep The kbase jd atom dependency to be initialized. - * @param id The atom_id to be assigned. - * @param dep_type The dep_type to be assigned. - * - */ -static inline void base_jd_atom_dep_set(struct base_dependency *dep, - base_atom_id id, base_jd_dep_type dep_type) -{ - LOCAL_ASSERT(dep != NULL); - - /* - * make sure we don't set not allowed combinations - * of atom_id/dependency_type. - */ - LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) || - (id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID)); - - dep->atom_id = id; - dep->dependency_type = dep_type; -} - -/** - * @brief Make a copy of a dependency structure - * - * @param[in,out] dep The kbase jd atom dependency to be written. - * @param[in] from The dependency to make a copy from. - * - */ -static inline void base_jd_atom_dep_copy(struct base_dependency *dep, - const struct base_dependency *from) -{ - LOCAL_ASSERT(dep != NULL); - - base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type); -} - -/** - * @brief Soft-atom fence trigger setup. - * - * Sets up an atom to be a SW-only atom signaling a fence - * when it reaches the run state. - * - * Using the existing base dependency system the fence can - * be set to trigger when a GPU job has finished. - * - * The base fence object must not be terminated until the atom - * has been submitted to @ref base_jd_submit and @ref base_jd_submit - * has returned. - * - * @a fence must be a valid fence set up with @a base_fence_init. - * Calling this function with a uninitialized fence results in undefined behavior. - * - * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom - * @param[in] fence The base fence object to trigger. - * - * @pre @p fence must reference a @ref base_fence successfully initialized by - * calling @ref base_fence_init. - * @pre @p fence was @e not initialized by calling @ref base_fence_import, nor - * is it associated with a fence-trigger job that was already submitted - * by calling @ref base_jd_submit. - * @post @p atom can be submitted by calling @ref base_jd_submit. - */ -static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) -{ - LOCAL_ASSERT(atom); - LOCAL_ASSERT(fence); - LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE); - LOCAL_ASSERT(fence->basep.stream_fd >= 0); - atom->jc = (uintptr_t) fence; - atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER; -} - -/** - * @brief Soft-atom fence wait setup. - * - * Sets up an atom to be a SW-only atom waiting on a fence. - * When the fence becomes triggered the atom becomes runnable - * and completes immediately. - * - * Using the existing base dependency system the fence can - * be set to block a GPU job until it has been triggered. - * - * The base fence object must not be terminated until the atom - * has been submitted to @ref base_jd_submit and - * @ref base_jd_submit has returned. - * - * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom - * @param[in] fence The base fence object to wait on - * - * @pre @p fence must reference a @ref base_fence successfully initialized by - * calling @ref base_fence_import, or it must be associated with a - * fence-trigger job that was already submitted by calling - * @ref base_jd_submit. - * @post @p atom can be submitted by calling @ref base_jd_submit. - */ -static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) -{ - LOCAL_ASSERT(atom); - LOCAL_ASSERT(fence); - LOCAL_ASSERT(fence->basep.fd >= 0); - atom->jc = (uintptr_t) fence; - atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT; -} - -/** - * @brief External resource info initialization. - * - * Sets up an external resource object to reference - * a memory allocation and the type of access requested. - * - * @param[in] res The resource object to initialize - * @param handle The handle to the imported memory object, must be - * obtained by calling @ref base_mem_as_import_handle(). - * @param access The type of access requested - */ -static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access) -{ - u64 address; - - address = handle.basep.handle; - - LOCAL_ASSERT(res != NULL); - LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB)); - LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE); - - res->ext_resource = address | (access & LOCAL_PAGE_LSB); -} - -/** - * @brief Job chain event code bits - * Defines the bits used to create ::base_jd_event_code - */ -enum { - BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */ - BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */ - BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */ - BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */ - BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */ - BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */ - BASE_JD_SW_EVENT_RESERVED = (3u << 11), /**< Reserved event type */ - BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) /**< Mask to extract the type from an event code */ -}; - -/** - * @brief Job chain event codes - * - * HW and low-level SW events are represented by event codes. - * The status of jobs which succeeded are also represented by - * an event code (see ::BASE_JD_EVENT_DONE). - * Events are usually reported as part of a ::base_jd_event. - * - * The event codes are encoded in the following way: - * @li 10:0 - subtype - * @li 12:11 - type - * @li 13 - SW success (only valid if the SW bit is set) - * @li 14 - SW event (HW event if not set) - * @li 15 - Kernel event (should never be seen in userspace) - * - * Events are split up into ranges as follows: - * - BASE_JD_EVENT_RANGE_\<description\>_START - * - BASE_JD_EVENT_RANGE_\<description\>_END - * - * \a code is in \<description\>'s range when: - * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt> - * - * Ranges can be asserted for adjacency by testing that the END of the previous - * is equal to the START of the next. This is useful for optimizing some tests - * for range. - * - * A limitation is that the last member of this enum must explicitly be handled - * (with an assert-unreachable statement) in switch statements that use - * variables of this type. Otherwise, the compiler warns that we have not - * handled that enum value. - */ -typedef enum base_jd_event_code { - /* HW defined exceptions */ - - /** Start of HW Non-fault status codes - * - * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, - * because the job was hard-stopped - */ - BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, - - /* non-fatal exceptions */ - BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */ - BASE_JD_EVENT_DONE = 0x01, - BASE_JD_EVENT_STOPPED = 0x03, /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */ - BASE_JD_EVENT_TERMINATED = 0x04, /**< This is actually a fault status code - the job was hard stopped */ - BASE_JD_EVENT_ACTIVE = 0x08, /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */ - - /** End of HW Non-fault status codes - * - * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, - * because the job was hard-stopped - */ - BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, - - /** Start of HW fault and SW Error status codes */ - BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, - - /* job exceptions */ - BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, - BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, - BASE_JD_EVENT_JOB_READ_FAULT = 0x42, - BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, - BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, - BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, - BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, - BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, - BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, - BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, - BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, - BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, - BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, - BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, - BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, - BASE_JD_EVENT_STATE_FAULT = 0x5A, - BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, - BASE_JD_EVENT_UNKNOWN = 0x7F, - - /* GPU exceptions */ - BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, - BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, - - /* MMU exceptions */ - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, - BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, - BASE_JD_EVENT_ACCESS_FLAG = 0xD8, - - /* SW defined exceptions */ - BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_TIMED_OUT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, - BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, - BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, - BASE_JD_EVENT_PM_EVENT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, - - BASE_JD_EVENT_BAG_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, - - /** End of HW fault and SW Error status codes */ - BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF, - - /** Start of SW Success status codes */ - BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000, - - BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000, - BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, - - /** End of SW Success status codes */ - BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, - - /** Start of Kernel-only status codes. Such codes are never returned to user-space */ - BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000, - BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, - - /** End of Kernel-only status codes. */ - BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF -} base_jd_event_code; - -/** - * @brief Event reporting structure - * - * This structure is used by the kernel driver to report information - * about GPU events. The can either be HW-specific events or low-level - * SW events, such as job-chain completion. - * - * The event code contains an event type field which can be extracted - * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK. - * - * Based on the event type base_jd_event::data holds: - * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed - * job-chain - * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has - * been completed (ie all contained job-chains have been completed). - * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used - */ -typedef struct base_jd_event_v2 { - base_jd_event_code event_code; /**< event code */ - base_atom_id atom_number; /**< the atom number that has completed */ - struct base_jd_udata udata; /**< user data */ -} base_jd_event_v2; - -/** - * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs. - * - * This structure is stored into the memory pointed to by the @c jc field - * of @ref base_jd_atom. - * - * It must not occupy the same CPU cache line(s) as any neighboring data. - * This is to avoid cases where access to pages containing the structure - * is shared between cached and un-cached memory regions, which would - * cause memory corruption. - */ - -typedef struct base_dump_cpu_gpu_counters { - u64 system_time; - u64 cycle_counter; - u64 sec; - u32 usec; - u8 padding[36]; -} base_dump_cpu_gpu_counters; - -/** @} end group base_user_api_job_dispatch */ - #define GPU_MAX_JOB_SLOTS 16 /** - * @page page_base_user_api_gpuprops User-side Base GPU Property Query API + * User-side Base GPU Property Queries * - * The User-side Base GPU Property Query API encapsulates two + * The User-side Base GPU Property Query interface encapsulates two * sub-modules: * - * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties" - * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties" - * - * There is a related third module outside of Base, which is owned by the MIDG - * module: - * - @ref gpu_props_static "Midgard Compile-time GPU Properties" + * - "Dynamic GPU Properties" + * - "Base Platform Config GPU Properties" * - * Base only deals with properties that vary between different Midgard + * Base only deals with properties that vary between different GPU * implementations - the Dynamic GPU properties and the Platform Config * properties. * - * For properties that are constant for the Midgard Architecture, refer to the - * MIDG module. However, we will discuss their relevance here <b>just to - * provide background information.</b> + * For properties that are constant for the GPU Architecture, refer to the + * GPU module. However, we will discuss their relevance here just to + * provide background information. * - * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules + * About the GPU Properties in Base and GPU modules * - * The compile-time properties (Platform Config, Midgard Compile-time + * The compile-time properties (Platform Config, GPU Compile-time * properties) are exposed as pre-processor macros. * * Complementing the compile-time properties are the Dynamic GPU - * Properties, which act as a conduit for the Midgard Configuration + * Properties, which act as a conduit for the GPU Configuration * Discovery. * * In general, the dynamic properties are present to verify that the platform @@ -1314,13 +408,13 @@ typedef struct base_dump_cpu_gpu_counters { * * As a consistent guide across the entire DDK, the choice for dynamic or * compile-time should consider the following, in order: - * -# Can the code be written so that it doesn't need to know the + * 1. Can the code be written so that it doesn't need to know the * implementation limits at all? - * -# If you need the limits, get the information from the Dynamic Property + * 2. If you need the limits, get the information from the Dynamic Property * lookup. This should be done once as you fetch the context, and then cached * as part of the context data structure, so it's cheap to access. - * -# If there's a clear and arguable inefficiency in using Dynamic Properties, - * then use a Compile-Time Property (Platform Config, or Midgard Compile-time + * 3. If there's a clear and arguable inefficiency in using Dynamic Properties, + * then use a Compile-Time Property (Platform Config, or GPU Compile-time * property). Examples of where this might be sensible follow: * - Part of a critical inner-loop * - Frequent re-use throughout the driver, causing significant extra load @@ -1333,25 +427,25 @@ typedef struct base_dump_cpu_gpu_counters { * devices would benefit much more from a single DDK binary, instead of * insignificant micro-optimizations. * - * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties + * Dynamic GPU Properties * * Dynamic GPU properties are presented in two sets: - * -# the commonly used properties in @ref base_gpu_props, which have been + * 1. the commonly used properties in @ref base_gpu_props, which have been * unpacked from GPU register bitfields. - * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props - * (also a member of @ref base_gpu_props). All of these are presented in + * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props + * (also a member of base_gpu_props). All of these are presented in * the packed form, as presented by the GPU registers themselves. * - * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to + * The raw properties in gpu_raw_gpu_props are necessary to * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device * behaving differently?". In this case, all information about the - * configuration is potentially useful, but it <b>does not need to be processed - * by the driver</b>. Instead, the raw registers can be processed by the Mali + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali * Tools software on the host PC. * - * The properties returned extend the Midgard Configuration Discovery - * registers. For example, GPU clock speed is not specified in the Midgard - * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>. + * The properties returned extend the GPU Configuration Discovery + * registers. For example, GPU clock speed is not specified in the GPU + * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. * * The GPU properties are obtained by a call to * base_get_gpu_props(). This simply returns a pointer to a const @@ -1365,12 +459,12 @@ typedef struct base_dump_cpu_gpu_counters { * base_context. * * - * @section sec_base_user_api_gpuprops_kernel Kernel Operation + * Kernel Operation * * During Base Context Create time, user-side makes a single kernel call: * - A call to fill user memory with GPU information structures * - * The kernel-side will fill the provided the entire processed @ref base_gpu_props + * The kernel-side will fill the provided the entire processed base_gpu_props * structure, because this information is required in both * user and kernel side; it does not make sense to decode it twice. * @@ -1379,7 +473,8 @@ typedef struct base_dump_cpu_gpu_counters { * be known kernel-side, to support chains that specify a 'Only Coherent Group' * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. * - * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation + * Coherency Group calculation + * * Creation of the coherent group data is done at device-driver startup, and so * is one-time. This will most likely involve a loop with CLZ, shifting, and * bit clearing on the L2_PRESENT mask, depending on whether the @@ -1393,16 +488,6 @@ typedef struct base_dump_cpu_gpu_counters { * 16 coherent groups, since core groups are typically 4 cores. */ -/** - * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs - * @{ - */ - -/** - * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties - * @{ - */ - #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 #define BASE_MAX_COHERENT_GROUPS 16 @@ -1507,7 +592,7 @@ struct mali_base_gpu_thread_props { }; /** - * @brief descriptor for a coherent group + * struct mali_base_gpu_coherent_group - descriptor for a coherent group * * \c core_mask exposes all cores in that coherent group, and \c num_cores * provides a cached population-count for that mask. @@ -1524,7 +609,7 @@ struct mali_base_gpu_coherent_group { }; /** - * @brief Coherency group information + * struct mali_base_gpu_coherent_group_info - Coherency group information * * Note that the sizes of the members could be reduced. However, the \c group * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte @@ -1549,7 +634,7 @@ struct mali_base_gpu_coherent_group_info { u32 num_core_groups; /** - * Coherency features of the memory, accessed by @ref gpu_mem_features + * Coherency features of the memory, accessed by gpu_mem_features * methods */ u32 coherency; @@ -1563,18 +648,18 @@ struct mali_base_gpu_coherent_group_info { }; /** - * A complete description of the GPU's Hardware Configuration Discovery - * registers. + * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware + * Configuration Discovery registers. * * The information is presented inefficiently for access. For frequent access, * the values should be better expressed in an unpacked form in the * base_gpu_props structure. * - * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to + * The raw properties in gpu_raw_gpu_props are necessary to * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device * behaving differently?". In this case, all information about the - * configuration is potentially useful, but it <b>does not need to be processed - * by the driver</b>. Instead, the raw registers can be processed by the Mali + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali * Tools software on the host PC. * */ @@ -1613,95 +698,65 @@ struct gpu_raw_gpu_props { }; /** - * Return structure for base_get_gpu_props(). + * struct base_gpu_props - Return structure for base_get_gpu_props(). * * NOTE: the raw_props member in this data structure contains the register * values from which the value of the other members are derived. The derived * members exist to allow for efficient access and/or shielding the details * of the layout of the registers. * + * @unused_1: Keep for backwards compatibility. + * @raw_props: This member is large, likely to be 128 bytes. + * @coherency_info: This must be last member of the structure. */ -typedef struct base_gpu_props { +struct base_gpu_props { struct mali_base_gpu_core_props core_props; struct mali_base_gpu_l2_cache_props l2_props; - u64 unused_1; /* keep for backwards compatibility */ + u64 unused_1; struct mali_base_gpu_tiler_props tiler_props; struct mali_base_gpu_thread_props thread_props; - - /** This member is large, likely to be 128 bytes */ struct gpu_raw_gpu_props raw_props; - - /** This must be last member of the structure */ struct mali_base_gpu_coherent_group_info coherency_info; -} base_gpu_props; - -/** @} end group base_user_api_gpuprops_dyn */ - -/** @} end group base_user_api_gpuprops */ +}; -/** - * @addtogroup base_user_api_core User-side Base core APIs - * @{ - */ +#include "jm/mali_base_jm_kernel.h" /** - * Flags to pass to ::base_context_init. - * Flags can be ORed together to enable multiple things. + * base_mem_group_id_get() - Get group ID from flags + * @flags: Flags to pass to base_mem_alloc * - * These share the same space as BASEP_CONTEXT_FLAG_*, and so must - * not collide with them. - */ -typedef u32 base_context_create_flags; - -/** No flags set */ -#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) - -/** Base context is embedded in a cctx object (flag used for CINSTR - * software counter macros) - */ -#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) - -/** Base context is a 'System Monitor' context for Hardware counters. + * This inline function extracts the encoded group ID from flags + * and converts it into numeric value (0~15). * - * One important side effect of this is that job submission is disabled. - */ -#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ - ((base_context_create_flags)1 << 1) - - -/* Bit-shift used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) - -/* Bitmask used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ - ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) - -/* Bitpattern describing the base_context_create_flags that can be - * passed to the kernel - */ -#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ - (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ - BASEP_CONTEXT_MMU_GROUP_ID_MASK) - -/* Bitpattern describing the ::base_context_create_flags that can be - * passed to base_context_init() + * Return: group ID(0~15) extracted from the parameter */ -#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ - (BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS) +static inline int base_mem_group_id_get(base_mem_alloc_flags flags) +{ + LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); + return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> + BASEP_MEM_GROUP_ID_SHIFT); +} -/* - * Private flags used on the base context +/** + * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags + * @id: group ID(0~15) you want to encode * - * These start at bit 31, and run down to zero. + * This inline function encodes specific group ID into base_mem_alloc_flags. + * Parameter 'id' should lie in-between 0 to 15. + * + * Return: base_mem_alloc_flags with the group ID (id) encoded * - * They share the same space as @ref base_context_create_flags, and so must - * not collide with them. + * The return value can be combined with other flags against base_mem_alloc + * to identify a specific memory group. */ -/** Private flag tracking whether job descriptor dumping is disabled */ -#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \ - ((base_context_create_flags)(1 << 31)) +static inline base_mem_alloc_flags base_mem_group_id_set(int id) +{ + LOCAL_ASSERT(id >= 0); + LOCAL_ASSERT(id < BASE_MEM_GROUP_COUNT); + + return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) & + BASE_MEM_GROUP_ID_MASK; +} /** * base_context_mmu_group_id_set - Encode a memory group ID in @@ -1741,46 +796,7 @@ static inline int base_context_mmu_group_id_get( BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); } -/** @} end group base_user_api_core */ - -/** @} end group base_user_api */ - -/** - * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties - * @{ - * - * C Pre-processor macros are exposed here to do with Platform - * Config. - * - * These include: - * - GPU Properties that are constant on a particular Midgard Family - * Implementation e.g. Maximum samples per pixel on Mali-T600. - * - General platform config for the GPU, such as the GPU major and minor - * revison. - */ - -/** @} end group base_plat_config_gpuprops */ - -/** - * @addtogroup base_api Base APIs - * @{ - */ - -/** @} end group base_api */ - -/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, - * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */ -#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) - -/* Indicate that job dumping is enabled. This could affect certain timers - * to account for the performance impact. */ -#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) - - -#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ - BASE_TLSTREAM_JOB_DUMPING_ENABLED) - -/** +/* * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These * flags are also used, where applicable, for specifying which fields * are valid following the request operation. @@ -1804,5 +820,4 @@ static inline int base_context_mmu_group_id_get( BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ BASE_TIMEINFO_USER_SOURCE_FLAG) - #endif /* _BASE_KERNEL_H_ */ diff --git a/mali_kbase/mali_base_mem_priv.h b/mali_kbase/mali_base_mem_priv.h index 52c8a4f..844a025 100644 --- a/mali_kbase/mali_base_mem_priv.h +++ b/mali_kbase/mali_base_mem_priv.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,7 +47,7 @@ * - offset is ignored. */ struct basep_syncset { - base_mem_handle mem_handle; + struct base_mem_handle mem_handle; u64 user_addr; u64 size; u8 type; diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h index 8a5088c..66e4349 100644 --- a/mali_kbase/mali_kbase.h +++ b/mali_kbase/mali_kbase.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -77,6 +77,8 @@ #include <trace/events/gpu.h> #endif +#include "mali_linux_trace.h" + #ifndef u64_to_user_ptr /* Introduced in Linux v4.6 */ @@ -155,7 +157,6 @@ int assign_irqs(struct kbase_device *kbdev); int kbase_sysfs_init(struct kbase_device *kbdev); void kbase_sysfs_term(struct kbase_device *kbdev); -void kbase_device_debugfs_term(struct kbase_device *kbdev); int kbase_protected_mode_init(struct kbase_device *kbdev); void kbase_protected_mode_term(struct kbase_device *kbdev); @@ -163,7 +164,17 @@ void kbase_protected_mode_term(struct kbase_device *kbdev); int power_control_init(struct kbase_device *kbdev); void power_control_term(struct kbase_device *kbdev); +#ifdef CONFIG_DEBUG_FS +void kbase_device_debugfs_term(struct kbase_device *kbdev); int kbase_device_debugfs_init(struct kbase_device *kbdev); +#else /* CONFIG_DEBUG_FS */ +static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) +{ + return 0; +} + +static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } +#endif /* CONFIG_DEBUG_FS */ int registers_map(struct kbase_device *kbdev); void registers_unmap(struct kbase_device *kbdev); @@ -219,9 +230,6 @@ void kbase_jd_zap_context(struct kbase_context *kctx); bool jd_done_nolock(struct kbase_jd_atom *katom, struct list_head *completed_jobs_ctx); void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); -bool jd_submit_atom(struct kbase_context *kctx, - const struct base_jd_atom_v2 *user_atom, - struct kbase_jd_atom *katom); void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); /** @@ -251,6 +259,22 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done); void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *katom); +/** + * kbase_job_slot_softstop_start_rp() - Soft-stop the atom at the start + * of a renderpass. + * @kctx: Pointer to a kernel base context. + * @reg: Reference of a growable GPU memory region in the same context. + * Takes ownership of the reference if successful. + * + * Used to switch to incremental rendering if we have nearly run out of + * virtual address space in a growable memory region and the atom currently + * executing on a job slot is the tiler job chain at the start of a renderpass. + * + * Return 0 if successful, otherwise a negative error code. + */ +int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, + struct kbase_va_region *reg); + void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom); void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, @@ -282,6 +306,16 @@ void kbase_event_wakeup(struct kbase_context *kctx); */ int kbasep_jit_alloc_validate(struct kbase_context *kctx, struct base_jit_alloc_info *info); + +/** + * kbase_jit_retry_pending_alloc() - Retry blocked just-in-time memory + * allocations. + * + * @kctx: Pointer to the kbase context within which the just-in-time + * memory allocations are to be retried. + */ +void kbase_jit_retry_pending_alloc(struct kbase_context *kctx); + /** * kbase_free_user_buffer() - Free memory allocated for struct * @kbase_debug_copy_buffer. diff --git a/mali_kbase/mali_kbase_10969_workaround.c b/mali_kbase/mali_kbase_10969_workaround.c deleted file mode 100644 index 118511a..0000000 --- a/mali_kbase/mali_kbase_10969_workaround.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2015,2017-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ -#include <linux/dma-mapping.h> -#include <mali_kbase.h> -#include <mali_kbase_10969_workaround.h> - -/* Mask of X and Y coordinates for the coordinates words in the descriptors*/ -#define X_COORDINATE_MASK 0x00000FFF -#define Y_COORDINATE_MASK 0x0FFF0000 -/* Max number of words needed from the fragment shader job descriptor */ -#define JOB_HEADER_SIZE_IN_WORDS 10 -#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32)) - -/* Word 0: Status Word */ -#define JOB_DESC_STATUS_WORD 0 -/* Word 1: Restart Index */ -#define JOB_DESC_RESTART_INDEX_WORD 1 -/* Word 2: Fault address low word */ -#define JOB_DESC_FAULT_ADDR_LOW_WORD 2 -/* Word 8: Minimum Tile Coordinates */ -#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8 -/* Word 9: Maximum Tile Coordinates */ -#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9 - -int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom) -{ - struct device *dev = katom->kctx->kbdev->dev; - u32 clamped = 0; - struct kbase_va_region *region; - struct tagged_addr *page_array; - u64 page_index; - u32 offset = katom->jc & (~PAGE_MASK); - u32 *page_1 = NULL; - u32 *page_2 = NULL; - u32 job_header[JOB_HEADER_SIZE_IN_WORDS]; - void *dst = job_header; - u32 minX, minY, maxX, maxY; - u32 restartX, restartY; - struct page *p; - u32 copy_size; - - dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n"); - if (!(katom->core_req & BASE_JD_REQ_FS)) - return 0; - - kbase_gpu_vm_lock(katom->kctx); - region = kbase_region_tracker_find_region_enclosing_address(katom->kctx, - katom->jc); - if (kbase_is_region_invalid_or_free(region)) - goto out_unlock; - - page_array = kbase_get_cpu_phy_pages(region); - if (!page_array) - goto out_unlock; - - page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn; - - p = as_page(page_array[page_index]); - - /* we need the first 10 words of the fragment shader job descriptor. - * We need to check that the offset + 10 words is less that the page - * size otherwise we need to load the next page. - * page_size_overflow will be equal to 0 in case the whole descriptor - * is within the page > 0 otherwise. - */ - copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE); - - page_1 = kmap_atomic(p); - - /* page_1 is a u32 pointer, offset is expressed in bytes */ - page_1 += offset>>2; - - kbase_sync_single_for_cpu(katom->kctx->kbdev, - kbase_dma_addr(p) + offset, - copy_size, DMA_BIDIRECTIONAL); - - memcpy(dst, page_1, copy_size); - - /* The data needed overflows page the dimension, - * need to map the subsequent page */ - if (copy_size < JOB_HEADER_SIZE) { - p = as_page(page_array[page_index + 1]); - page_2 = kmap_atomic(p); - - kbase_sync_single_for_cpu(katom->kctx->kbdev, - kbase_dma_addr(p), - JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL); - - memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size); - } - - /* We managed to correctly map one or two pages (in case of overflow) */ - /* Get Bounding Box data and restart index from fault address low word */ - minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK; - minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK; - maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK; - maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK; - restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK; - restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK; - - dev_warn(dev, "Before Clamping:\n" - "Jobstatus: %08x\n" - "restartIdx: %08x\n" - "Fault_addr_low: %08x\n" - "minCoordsX: %08x minCoordsY: %08x\n" - "maxCoordsX: %08x maxCoordsY: %08x\n", - job_header[JOB_DESC_STATUS_WORD], - job_header[JOB_DESC_RESTART_INDEX_WORD], - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], - minX, minY, - maxX, maxY); - - /* Set the restart index to the one which generated the fault*/ - job_header[JOB_DESC_RESTART_INDEX_WORD] = - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD]; - - if (restartX < minX) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY; - dev_warn(dev, - "Clamping restart X index to minimum. %08x clamped to %08x\n", - restartX, minX); - clamped = 1; - } - if (restartY < minY) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX; - dev_warn(dev, - "Clamping restart Y index to minimum. %08x clamped to %08x\n", - restartY, minY); - clamped = 1; - } - if (restartX > maxX) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY; - dev_warn(dev, - "Clamping restart X index to maximum. %08x clamped to %08x\n", - restartX, maxX); - clamped = 1; - } - if (restartY > maxY) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX; - dev_warn(dev, - "Clamping restart Y index to maximum. %08x clamped to %08x\n", - restartY, maxY); - clamped = 1; - } - - if (clamped) { - /* Reset the fault address low word - * and set the job status to STOPPED */ - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0; - job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED; - dev_warn(dev, "After Clamping:\n" - "Jobstatus: %08x\n" - "restartIdx: %08x\n" - "Fault_addr_low: %08x\n" - "minCoordsX: %08x minCoordsY: %08x\n" - "maxCoordsX: %08x maxCoordsY: %08x\n", - job_header[JOB_DESC_STATUS_WORD], - job_header[JOB_DESC_RESTART_INDEX_WORD], - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], - minX, minY, - maxX, maxY); - - /* Flush CPU cache to update memory for future GPU reads*/ - memcpy(page_1, dst, copy_size); - p = as_page(page_array[page_index]); - - kbase_sync_single_for_device(katom->kctx->kbdev, - kbase_dma_addr(p) + offset, - copy_size, DMA_TO_DEVICE); - - if (copy_size < JOB_HEADER_SIZE) { - memcpy(page_2, dst + copy_size, - JOB_HEADER_SIZE - copy_size); - p = as_page(page_array[page_index + 1]); - - kbase_sync_single_for_device(katom->kctx->kbdev, - kbase_dma_addr(p), - JOB_HEADER_SIZE - copy_size, - DMA_TO_DEVICE); - } - } - if (copy_size < JOB_HEADER_SIZE) - kunmap_atomic(page_2); - - kunmap_atomic(page_1); - -out_unlock: - kbase_gpu_vm_unlock(katom->kctx); - return clamped; -} diff --git a/mali_kbase/mali_kbase_10969_workaround.h b/mali_kbase/mali_kbase_10969_workaround.h deleted file mode 100644 index 379a05a..0000000 --- a/mali_kbase/mali_kbase_10969_workaround.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2014, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_10969_WORKAROUND_ -#define _KBASE_10969_WORKAROUND_ - -/** - * kbasep_10969_workaround_clamp_coordinates - Apply the WA to clamp the restart indices - * @katom: atom representing the fragment job for which the WA has to be applied - * - * This workaround is used to solve an HW issue with single iterator GPUs. - * If a fragment job is soft-stopped on the edge of its bounding box, it can happen - * that the restart index is out of bounds and the rerun causes a tile range - * fault. If this happens we try to clamp the restart index to a correct value. - */ -int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom); - -#endif /* _KBASE_10969_WORKAROUND_ */ diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h index a4c72da..e079281 100644 --- a/mali_kbase/mali_kbase_config_defaults.h +++ b/mali_kbase/mali_kbase_config_defaults.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -200,5 +200,14 @@ enum { */ #define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024) +/** + * Default threshold at which to switch to incremental rendering + * + * Fraction of the maximum size of an allocation that grows on GPU page fault + * that can be used up before the driver switches to incremental rendering, + * in 256ths. 0 means disable incremental rendering. + */ +#define DEFAULT_IR_THRESHOLD (192) + #endif /* _KBASE_CONFIG_DEFAULTS_H_ */ diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index fe2ae0e..3f3d5cc 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -48,6 +48,9 @@ #include <mali_kbase_hwaccess_backend.h> #include <mali_kbase_hwaccess_time.h> #include <mali_kbase_hwaccess_jm.h> +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +#include <mali_kbase_hwaccess_instr.h> +#endif #include <mali_kbase_ctx_sched.h> #include <mali_kbase_reset_gpu.h> #include <backend/gpu/mali_kbase_device_internal.h> @@ -672,12 +675,12 @@ static int kbase_api_set_flags(struct kbase_file *kfile, js_kctx_info = &kctx->jctx.sched_info; mutex_lock(&js_kctx_info->ctx.jsctx_mutex); spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); - /* Translate the flags */ if ((flags->create_flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); } @@ -918,18 +921,43 @@ static int kbase_api_get_ddk_version(struct kbase_context *kctx, return len; } -/* Defaults for legacy JIT init ioctl */ +/* Defaults for legacy just-in-time memory allocator initialization + * kernel calls + */ #define DEFAULT_MAX_JIT_ALLOCATIONS 255 #define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */ -static int kbase_api_mem_jit_init_old(struct kbase_context *kctx, - struct kbase_ioctl_mem_jit_init_old *jit_init) +static int kbase_api_mem_jit_init_10_2(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init_10_2 *jit_init) { kctx->jit_version = 1; + /* since no phys_pages parameter, use the maximum: va_pages */ return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, DEFAULT_MAX_JIT_ALLOCATIONS, - JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT); + JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT, + jit_init->va_pages); +} + +static int kbase_api_mem_jit_init_11_5(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init_11_5 *jit_init) +{ + int i; + + kctx->jit_version = 2; + + for (i = 0; i < sizeof(jit_init->padding); i++) { + /* Ensure all padding bytes are 0 for potential future + * extension + */ + if (jit_init->padding[i]) + return -EINVAL; + } + + /* since no phys_pages parameter, use the maximum: va_pages */ + return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, + jit_init->max_allocations, jit_init->trim_level, + jit_init->group_id, jit_init->va_pages); } static int kbase_api_mem_jit_init(struct kbase_context *kctx, @@ -937,7 +965,7 @@ static int kbase_api_mem_jit_init(struct kbase_context *kctx, { int i; - kctx->jit_version = 2; + kctx->jit_version = 3; for (i = 0; i < sizeof(jit_init->padding); i++) { /* Ensure all padding bytes are 0 for potential future @@ -949,7 +977,7 @@ static int kbase_api_mem_jit_init(struct kbase_context *kctx, return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, jit_init->max_allocations, jit_init->trim_level, - jit_init->group_id); + jit_init->group_id, jit_init->phys_pages); } static int kbase_api_mem_exec_init(struct kbase_context *kctx, @@ -1381,10 +1409,16 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_get_ddk_version, kctx); break; - case KBASE_IOCTL_MEM_JIT_INIT_OLD: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_OLD, - kbase_api_mem_jit_init_old, - struct kbase_ioctl_mem_jit_init_old, + case KBASE_IOCTL_MEM_JIT_INIT_10_2: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_10_2, + kbase_api_mem_jit_init_10_2, + struct kbase_ioctl_mem_jit_init_10_2, + kctx); + break; + case KBASE_IOCTL_MEM_JIT_INIT_11_5: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_11_5, + kbase_api_mem_jit_init_11_5, + struct kbase_ioctl_mem_jit_init_11_5, kctx); break; case KBASE_IOCTL_MEM_JIT_INIT: @@ -1476,12 +1510,14 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_mem_profile_add, kctx); break; + case KBASE_IOCTL_SOFT_EVENT_UPDATE: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, kbase_api_soft_event_update, struct kbase_ioctl_soft_event_update, kctx); break; + case KBASE_IOCTL_STICKY_RESOURCE_MAP: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP, kbase_api_sticky_resource_map, @@ -1563,7 +1599,7 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_tlstream_stats, kctx); break; -#endif +#endif /* MALI_UNIT_TEST */ } dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); @@ -3692,6 +3728,9 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) kbasep_gpu_memory_debugfs_init(kbdev); kbase_as_fault_debugfs_init(kbdev); +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + kbase_instr_backend_debugfs_init(kbdev); +#endif /* fops_* variables created by invocations of macro * MAKE_QUIRK_ACCESSORS() above. */ debugfs_create_file("quirks_sc", 0644, @@ -3746,7 +3785,6 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) kbdev->mali_debugfs_directory, kbdev, &kbasep_serialize_jobs_debugfs_fops); - return 0; out: @@ -3758,14 +3796,6 @@ void kbase_device_debugfs_term(struct kbase_device *kbdev) { debugfs_remove_recursive(kbdev->mali_debugfs_directory); } - -#else /* CONFIG_DEBUG_FS */ -static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) -{ - return 0; -} - -static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } #endif /* CONFIG_DEBUG_FS */ #endif /* MALI_KBASE_BUILD */ @@ -3961,8 +3991,13 @@ static int kbase_platform_device_probe(struct platform_device *pdev) dev_set_drvdata(kbdev->dev, kbdev); err = kbase_device_init(kbdev); + if (err) { - dev_err(kbdev->dev, "Device initialization failed\n"); + if (err == -EPROBE_DEFER) + dev_err(kbdev->dev, "Device initialization Deferred\n"); + else + dev_err(kbdev->dev, "Device initialization failed\n"); + dev_set_drvdata(kbdev->dev, NULL); kbase_device_free(kbdev); } else { @@ -3970,6 +4005,7 @@ static int kbase_platform_device_probe(struct platform_device *pdev) dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); #endif /* MALI_KBASE_BUILD */ + kbase_increment_device_id(); } return err; @@ -4212,14 +4248,12 @@ MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ __stringify(BASE_UK_VERSION_MAJOR) "." \ __stringify(BASE_UK_VERSION_MINOR) ")"); -#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE) -#define CREATE_TRACE_POINTS -#endif -#ifdef CONFIG_MALI_GATOR_SUPPORT +#define CREATE_TRACE_POINTS /* Create the trace points (otherwise we just get code to call a tracepoint) */ #include "mali_linux_trace.h" +#ifdef CONFIG_MALI_GATOR_SUPPORT EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); diff --git a/mali_kbase/mali_kbase_cs_experimental.h b/mali_kbase/mali_kbase_cs_experimental.h index b68a105..e1fffc3 100644 --- a/mali_kbase/mali_kbase_cs_experimental.h +++ b/mali_kbase/mali_kbase_cs_experimental.h @@ -21,7 +21,7 @@ *//* SPDX-License-Identifier: GPL-2.0 */ /* - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,36 +35,18 @@ #include <linux/kernel.h> -#if MALI_CS_EXPERIMENTAL - -/** - * mali_kbase_has_cs_experimental() - Has the driver been built with - * CS_EXPERIMENTAL=y - * - * It is preferable to guard cs_experimental code with this rather than #ifdef - * all through the code. - * - * Return: true if built with CS_EXPERIMENTAL false otherwise - */ -static inline bool mali_kbase_has_cs_experimental(void) -{ - return true; -} -#else -static inline bool mali_kbase_has_cs_experimental(void) -{ - return false; -} -#endif - /** - * mali_kbase_print_cs_experimental() - Print a string if built with - * CS_EXPERIMENTAL=y + * mali_kbase_print_cs_experimental() - Print a string for every Core Services + * experimental feature that is enabled */ static inline void mali_kbase_print_cs_experimental(void) { - if (mali_kbase_has_cs_experimental()) - pr_info("mali_kbase: EXPERIMENTAL (MALI_CS_EXPERIMENTAL) flag enabled"); +#if MALI_JIT_PRESSURE_LIMIT + pr_info("mali_kbase: JIT_PRESSURE_LIMIT (experimental) enabled"); +#endif /* MALI_JIT_PRESSURE_LIMIT */ +#if MALI_INCREMENTAL_RENDERING + pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled"); +#endif /* MALI_INCREMENTAL_RENDERING */ } #endif /* _KBASE_CS_EXPERIMENTAL_H_ */ diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c index 35853a3..3922260 100644 --- a/mali_kbase/mali_kbase_ctx_sched.c +++ b/mali_kbase/mali_kbase_ctx_sched.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -145,8 +145,14 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) lockdep_assert_held(&kbdev->hwaccess_lock); - if (atomic_dec_return(&kctx->refcount) == 0) + if (atomic_dec_return(&kctx->refcount) == 0) { kbdev->as_free |= (1u << kctx->as_nr); + if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) { + kbdev->as_to_kctx[kctx->as_nr] = NULL; + kctx->as_nr = KBASEP_AS_NR_INVALID; + kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); + } + } } void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) @@ -186,6 +192,8 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) kbase_mmu_update(kbdev, &kctx->mmu, kctx->as_nr); + kbase_ctx_flag_clear(kctx, + KCTX_AS_DISABLED_ON_FAULT); } else { /* This context might have been assigned an * AS before, clear it. diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index 059d850..ce32b53 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -89,45 +89,10 @@ #endif /* CONFIG_MALI_DEBUG */ #endif /* KBASE_TRACE_ENABLE */ -/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */ -#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1 - -/** - * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware. - * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU - * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware - * before resetting. - */ -#define ZAP_TIMEOUT 1000 - /** Number of milliseconds before we time out on a GPU soft/hard reset */ #define RESET_TIMEOUT 500 /** - * Prevent soft-stops from occuring in scheduling situations - * - * This is not due to HW issues, but when scheduling is desired to be more predictable. - * - * Therefore, soft stop may still be disabled due to HW issues. - * - * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context. - * - * @note if not in use, define this value to 0 instead of \#undef'ing it - */ -#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 - -/** - * Prevent hard-stops from occuring in scheduling situations - * - * This is not due to HW issues, but when scheduling is desired to be more predictable. - * - * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context. - * - * @note if not in use, define this value to 0 instead of \#undef'ing it - */ -#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 - -/** * The maximum number of Job Slots to support in the Hardware. * * You can optimize this down if your target devices will only ever support a @@ -177,61 +142,6 @@ */ #define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((32 * 1024ul * 1024ul) >> \ PAGE_SHIFT) - -/** Atom has been previously soft-stoppped */ -#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1) -/** Atom has been previously retried to execute */ -#define KBASE_KATOM_FLAGS_RERUN (1<<2) -/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps to - * disambiguate short-running job chains during soft/hard stopping of jobs - */ -#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) -/** Atom has been previously hard-stopped. */ -#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) -/** Atom has caused us to enter disjoint state */ -#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) -/* Atom blocked on cross-slot dependency */ -#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) -/* Atom has fail dependency on cross-slot dependency */ -#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) -/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ -#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) -/* Atom is currently holding a context reference */ -#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) -/* Atom requires GPU to be in protected mode */ -#define KBASE_KATOM_FLAG_PROTECTED (1<<11) -/* Atom has been stored in runnable_tree */ -#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) -/* Atom is waiting for L2 caches to power up in order to enter protected mode */ -#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13) - -/* SW related flags about types of JS_COMMAND action - * NOTE: These must be masked off by JS_COMMAND_MASK */ - -/** This command causes a disjoint event */ -#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 - -/** Bitmask of all SW related flags */ -#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) - -#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) -#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks -#endif - -/** Soft-stop command that causes a Disjoint event. This of course isn't - * entirely masked off by JS_COMMAND_MASK */ -#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ - (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) - -#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT - -/* Serialize atoms within a slot (ie only one atom per job slot) */ -#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0) -/* Serialize atoms between slots (ie only one job slot running at any time) */ -#define KBASE_SERIALIZE_INTER_SLOT (1 << 1) -/* Reset the GPU after each atom completion */ -#define KBASE_SERIALIZE_RESET (1 << 2) - /* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer * clients, to reduce undesired system load. * If a virtualizer client requests a dump within this threshold period after @@ -259,47 +169,6 @@ struct kbase_as; struct kbase_mmu_setup; struct kbase_ipa_model_vinstr_data; -#ifdef CONFIG_DEBUG_FS -/** - * struct base_job_fault_event - keeps track of the atom which faulted or which - * completed after the faulty atom but before the - * debug data for faulty atom was dumped. - * - * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for the - * atom which faulted. - * @katom: pointer to the atom for which job fault occurred or which completed - * after the faulty atom. - * @job_fault_work: work item, queued only for the faulty atom, which waits for - * the dumping to get completed and then does the bottom half - * of job done for the atoms which followed the faulty atom. - * @head: List head used to store the atom in the global list of faulty - * atoms or context specific list of atoms which got completed - * during the dump. - * @reg_offset: offset of the register to be dumped next, only applicable for - * the faulty atom. - */ -struct base_job_fault_event { - - u32 event_code; - struct kbase_jd_atom *katom; - struct work_struct job_fault_work; - struct list_head head; - int reg_offset; -}; - -#endif - -/** - * struct kbase_jd_atom_dependency - Contains the dependency info for an atom. - * @atom: pointer to the dependee atom. - * @dep_type: type of dependency on the dependee @atom, i.e. order or data - * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency. - */ -struct kbase_jd_atom_dependency { - struct kbase_jd_atom *atom; - u8 dep_type; -}; - /** * struct kbase_io_access - holds information about 1 register access * @@ -334,418 +203,6 @@ struct kbase_io_history { }; /** - * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the - * dependee atom. - * @dep: pointer to the dependency info structure. - * - * Return: readonly reference to dependee atom. - */ -static inline const struct kbase_jd_atom * -kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) -{ - LOCAL_ASSERT(dep != NULL); - - return (const struct kbase_jd_atom *)(dep->atom); -} - -/** - * kbase_jd_katom_dep_type - Retrieves the dependency type info - * - * @dep: pointer to the dependency info structure. - * - * Return: the type of dependency there is on the dependee atom. - */ -static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) -{ - LOCAL_ASSERT(dep != NULL); - - return dep->dep_type; -} - -/** - * kbase_jd_katom_dep_set - sets up the dependency info structure - * as per the values passed. - * @const_dep: pointer to the dependency info structure to be setup. - * @a: pointer to the dependee atom. - * @type: type of dependency there is on the dependee atom. - */ -static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep, - struct kbase_jd_atom *a, u8 type) -{ - struct kbase_jd_atom_dependency *dep; - - LOCAL_ASSERT(const_dep != NULL); - - dep = (struct kbase_jd_atom_dependency *)const_dep; - - dep->atom = a; - dep->dep_type = type; -} - -/** - * kbase_jd_katom_dep_clear - resets the dependency info structure - * - * @const_dep: pointer to the dependency info structure to be setup. - */ -static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep) -{ - struct kbase_jd_atom_dependency *dep; - - LOCAL_ASSERT(const_dep != NULL); - - dep = (struct kbase_jd_atom_dependency *)const_dep; - - dep->atom = NULL; - dep->dep_type = BASE_JD_DEP_TYPE_INVALID; -} - -/** - * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it becomes - * runnable, with respect to job slot ringbuffer/fifo. - * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, which - * implies that either atom has not become runnable - * due to dependency or has completed the execution - * on GPU. - * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is blocked - * due to cross slot dependency, can't be submitted to GPU. - * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot fifo but - * is waiting for the completion of previously added atoms - * in current & other slots, as their protected mode - * requirements do not match with the current atom. - * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo and is - * waiting for completion of protected mode transition, - * needed before the atom is submitted to GPU. - * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is waiting - * for the cores, which are needed to execute the job - * chain represented by the atom, to become available - * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to GPU. - * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted to GPU. - * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some failure, - * but only after the previously added atoms in fifo - * have completed or have also been returned to JS. - */ -enum kbase_atom_gpu_rb_state { - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, - KBASE_ATOM_GPU_RB_WAITING_BLOCKED, - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, - KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, - KBASE_ATOM_GPU_RB_READY, - KBASE_ATOM_GPU_RB_SUBMITTED, - KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 -}; - -/** - * enum kbase_atom_enter_protected_state - The state of an atom with respect to the - * preparation for GPU's entry into protected mode, becomes - * pertinent only after atom's state with respect to slot - * ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION - * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any atoms - * currently submitted to GPU and protected mode transition is - * not already in progress. - * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to - * become disabled before entry into protected mode. - * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation - * for the coherency change. L2 shall be powered down and GPU shall - * come out of fully coherent mode before entering protected mode. - * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change; - * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on so that - * coherency register contains correct value when GPU enters - * protected mode. - * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for BASE_HW_ISSUE_TGOX_R1_1234 check - * that L2 is powered up and switch GPU to protected mode. - */ -enum kbase_atom_enter_protected_state { - /** - * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK. - */ - KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, - KBASE_ATOM_ENTER_PROTECTED_HWCNT, - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, - KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY, - KBASE_ATOM_ENTER_PROTECTED_FINISHED, -}; - -/** - * enum kbase_atom_exit_protected_state - The state of an atom with respect to the - * preparation for GPU's exit from protected mode, becomes - * pertinent only after atom's state with respect to slot - * ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION - * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any atoms - * currently submitted to GPU and protected mode transition is - * not already in progress. - * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation - * for the reset, as exiting protected mode requires a reset. - * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from protected mode - * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to complete - */ -enum kbase_atom_exit_protected_state { - /** - * NOTE: The integer value of this must match KBASE_ATOM_ENTER_PROTECTED_CHECK. - */ - KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, - KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, - KBASE_ATOM_EXIT_PROTECTED_RESET, - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, -}; - -/** - * struct kbase_ext_res - Contains the info for external resources referred - * by an atom, which have been mapped on GPU side. - * @gpu_address: Start address of the memory region allocated for - * the resource from GPU virtual address space. - * @alloc: pointer to physical pages tracking object, set on - * mapping the external resource on GPU side. - */ -struct kbase_ext_res { - u64 gpu_address; - struct kbase_mem_phy_alloc *alloc; -}; - -/** - * struct kbase_jd_atom - object representing the atom, containing the complete - * state and attributes of an atom. - * @work: work item for the bottom half processing of the atom, - * by JD or JS, after it got executed on GPU or the input - * fence got signaled - * @start_timestamp: time at which the atom was submitted to the GPU, by - * updating the JS_HEAD_NEXTn register. - * @udata: copy of the user data sent for the atom in base_jd_submit. - * @kctx: Pointer to the base context with which the atom is associated. - * @dep_head: Array of 2 list heads, pointing to the two list of atoms - * which are blocked due to dependency on this atom. - * @dep_item: Array of 2 list heads, used to store the atom in the list of - * other atoms depending on the same dependee atom. - * @dep: Array containing the dependency info for the 2 atoms on which - * the atom depends upon. - * @jd_item: List head used during job dispatch job_done processing - as - * dependencies may not be entirely resolved at this point, - * we need to use a separate list head. - * @in_jd_list: flag set to true if atom's @jd_item is currently on a list, - * prevents atom being processed twice. - * @nr_extres: number of external resources referenced by the atom. - * @extres: pointer to the location containing info about @nr_extres - * external resources referenced by the atom. - * @device_nr: indicates the coregroup with which the atom is associated, - * when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified. - * @jc: GPU address of the job-chain. - * @softjob_data: Copy of data read from the user space buffer that @jc - * points to. - * @fence: Stores either an input or output sync fence, depending - * on soft-job type - * @sync_waiter: Pointer to the sync fence waiter structure passed to the - * callback function on signaling of the input fence. - * @dma_fence: object containing pointers to both input & output fences - * and other related members used for explicit sync through - * soft jobs and for the implicit synchronization required - * on access to external resources. - * @event_code: Event code for the job chain represented by the atom, both - * HW and low-level SW events are represented by event codes. - * @core_req: bitmask of BASE_JD_REQ_* flags specifying either Hw or Sw - * requirements for the job chain represented by the atom. - * @ticks: Number of scheduling ticks for which atom has been running - * on the GPU. - * @sched_priority: Priority of the atom for Job scheduling, as per the - * KBASE_JS_ATOM_SCHED_PRIO_*. - * @completed: Wait queue to wait upon for the completion of atom. - * @status: Indicates at high level at what stage the atom is in, - * as per KBASE_JD_ATOM_STATE_*, that whether it is not in - * use or its queued in JD or given to JS or submitted to Hw - * or it completed the execution on Hw. - * @work_id: used for GPU tracepoints, its a snapshot of the 'work_id' - * counter in kbase_jd_context which is incremented on - * every call to base_jd_submit. - * @slot_nr: Job slot chosen for the atom. - * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the exact - * low level state of the atom. - * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely tracking - * atom's state after it has entered Job scheduler on becoming - * runnable. Atom could be blocked due to cross slot dependency - * or waiting for the shader cores to become available or - * waiting for protected mode transitions to complete. - * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU - * cache is needed for the atom and the shader cores used - * for atom have been kept on. - * @blocked: flag indicating that atom's resubmission to GPU is - * blocked till the work item is scheduled to return the - * atom to JS. - * @pre_dep: Pointer to atom that this atom has same-slot dependency on - * @post_dep: Pointer to atom that has same-slot dependency on this atom - * @x_pre_dep: Pointer to atom that this atom has cross-slot dependency on - * @x_post_dep: Pointer to atom that has cross-slot dependency on this atom - * @flush_id: The GPU's flush count recorded at the time of submission, - * used for the cache flush optimisation - * @fault_event: Info for dumping the debug data on Job fault. - * @queue: List head used for 4 different purposes : - * Adds atom to the list of dma-buf fence waiting atoms. - * Adds atom to the list of atoms blocked due to cross - * slot dependency. - * Adds atom to the list of softjob atoms for which JIT - * allocation has been deferred - * Adds atom to the list of softjob atoms waiting for the - * signaling of fence. - * @jit_node: Used to keep track of all JIT free/alloc jobs in submission order - * @jit_blocked: Flag indicating that JIT allocation requested through - * softjob atom will be reattempted after the impending - * free of other active JIT allocations. - * @will_fail_event_code: If non-zero, this indicates that the atom will fail - * with the set event_code when the atom is processed. - * Used for special handling of atoms, which have a data - * dependency on the failed atoms. - * @protected_state: State of the atom, as per KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*, - * when transitioning into or out of protected mode. Atom will - * be either entering or exiting the protected mode. - * @runnable_tree_node: The node added to context's job slot specific rb tree - * when the atom becomes runnable. - * @age: Age of atom relative to other atoms in the context, is - * snapshot of the age_count counter in kbase context. - */ -struct kbase_jd_atom { - struct work_struct work; - ktime_t start_timestamp; - - struct base_jd_udata udata; - struct kbase_context *kctx; - - struct list_head dep_head[2]; - struct list_head dep_item[2]; - const struct kbase_jd_atom_dependency dep[2]; - struct list_head jd_item; - bool in_jd_list; - - u16 nr_extres; - struct kbase_ext_res *extres; - - u32 device_nr; - u64 jc; - void *softjob_data; -#if defined(CONFIG_SYNC) - struct sync_fence *fence; - struct sync_fence_waiter sync_waiter; -#endif /* CONFIG_SYNC */ -#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) - struct { - /* Use the functions/API defined in mali_kbase_fence.h to - * when working with this sub struct */ -#if defined(CONFIG_SYNC_FILE) - /* Input fence */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence_in; -#else - struct dma_fence *fence_in; -#endif -#endif - /* This points to the dma-buf output fence for this atom. If - * this is NULL then there is no fence for this atom and the - * following fields related to dma_fence may have invalid data. - * - * The context and seqno fields contain the details for this - * fence. - * - * This fence is signaled when the katom is completed, - * regardless of the event_code of the katom (signal also on - * failure). - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence; -#else - struct dma_fence *fence; -#endif - /* The dma-buf fence context number for this atom. A unique - * context number is allocated to each katom in the context on - * context creation. - */ - unsigned int context; - /* The dma-buf fence sequence number for this atom. This is - * increased every time this katom uses dma-buf fence. - */ - atomic_t seqno; - /* This contains a list of all callbacks set up to wait on - * other fences. This atom must be held back from JS until all - * these callbacks have been called and dep_count have reached - * 0. The initial value of dep_count must be equal to the - * number of callbacks on this list. - * - * This list is protected by jctx.lock. Callbacks are added to - * this list when the atom is built and the wait are set up. - * All the callbacks then stay on the list until all callbacks - * have been called and the atom is queued, or cancelled, and - * then all callbacks are taken off the list and freed. - */ - struct list_head callbacks; - /* Atomic counter of number of outstandind dma-buf fence - * dependencies for this atom. When dep_count reaches 0 the - * atom may be queued. - * - * The special value "-1" may only be set after the count - * reaches 0, while holding jctx.lock. This indicates that the - * atom has been handled, either queued in JS or cancelled. - * - * If anyone but the dma-fence worker sets this to -1 they must - * ensure that any potentially queued worker must have - * completed before allowing the atom to be marked as unused. - * This can be done by flushing the fence work queue: - * kctx->dma_fence.wq. - */ - atomic_t dep_count; - } dma_fence; -#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE*/ - - /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */ - enum base_jd_event_code event_code; - base_jd_core_req core_req; - u8 jobslot; - - u32 ticks; - int sched_priority; - - wait_queue_head_t completed; - enum kbase_jd_atom_state status; -#ifdef CONFIG_GPU_TRACEPOINTS - int work_id; -#endif - int slot_nr; - - u32 atom_flags; - - int retry_count; - - enum kbase_atom_gpu_rb_state gpu_rb_state; - - bool need_cache_flush_cores_retained; - - atomic_t blocked; - - struct kbase_jd_atom *pre_dep; - struct kbase_jd_atom *post_dep; - - struct kbase_jd_atom *x_pre_dep; - struct kbase_jd_atom *x_post_dep; - - u32 flush_id; - -#ifdef CONFIG_DEBUG_FS - struct base_job_fault_event fault_event; -#endif - - struct list_head queue; - - struct list_head jit_node; - bool jit_blocked; - - enum base_jd_event_code will_fail_event_code; - - union { - enum kbase_atom_enter_protected_state enter; - enum kbase_atom_exit_protected_state exit; - } protected_state; - - struct rb_node runnable_tree_node; - - u32 age; -}; - -/** * struct kbase_debug_copy_buffer - information about the buffer to be copied. * * @size: size of the buffer in bytes @@ -772,83 +229,6 @@ struct kbase_debug_copy_buffer { int nr_extres_pages; }; -static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom) -{ - return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); -} - -/* - * Theory of operations: - * - * Atom objects are statically allocated within the context structure. - * - * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set. - */ - -#define KBASE_JD_DEP_QUEUE_SIZE 256 - -/** - * struct kbase_jd_context - per context object encapsulating all the Job dispatcher - * related state. - * @lock: lock to serialize the updates made to the Job dispatcher - * state and kbase_jd_atom objects. - * @sched_info: Structure encapsulating all the Job scheduling info. - * @atoms: Array of the objects representing atoms, containing - * the complete state and attributes of an atom. - * @job_nr: Tracks the number of atoms being processed by the - * kbase. This includes atoms that are not tracked by - * scheduler: 'not ready to run' & 'dependency-only' jobs. - * @zero_jobs_wait: Waitq that reflects whether there are no jobs - * (including SW-only dependency jobs). This is set - * when no jobs are present on the ctx, and clear when - * there are jobs. - * This must be updated atomically with @job_nr. - * note: Job Dispatcher knows about more jobs than the - * Job Scheduler as it is unaware of jobs that are - * blocked on dependencies and SW-only dependency jobs. - * This waitq can be waited upon to find out when the - * context jobs are all done/cancelled (including those - * that might've been blocked on dependencies) - and so, - * whether it can be terminated. However, it should only - * be terminated once it is not present in the run-pool. - * Since the waitq is only set under @lock, the waiter - * should also briefly obtain and drop @lock to guarantee - * that the setter has completed its work on the kbase_context - * @job_done_wq: Workqueue to which the per atom work item is queued - * for bottom half processing when the atom completes - * execution on GPU or the input fence get signaled. - * @tb_lock: Lock to serialize the write access made to @tb to - * to store the register access trace messages. - * @tb: Pointer to the Userspace accessible buffer storing - * the trace messages for register read/write accesses - * made by the Kbase. The buffer is filled in circular - * fashion. - * @tb_wrap_offset: Offset to the end location in the trace buffer, the - * write pointer is moved to the beginning on reaching - * this offset. - * @work_id: atomic variable used for GPU tracepoints, incremented - * on every call to base_jd_submit. - */ -struct kbase_jd_context { - struct mutex lock; - struct kbasep_js_kctx_info sched_info; - struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; - - u32 job_nr; - - wait_queue_head_t zero_jobs_wait; - - struct workqueue_struct *job_done_wq; - - spinlock_t tb_lock; - u32 *tb; - size_t tb_wrap_offset; - -#ifdef CONFIG_GPU_TRACEPOINTS - atomic_t work_id; -#endif -}; - struct kbase_device_info { u32 features; }; @@ -923,6 +303,8 @@ struct kbase_mmu_table { struct kbase_context *kctx; }; +#include "jm/mali_kbase_jm_defs.h" + static inline int kbase_as_has_bus_fault(struct kbase_as *as, struct kbase_fault *fault) { @@ -935,10 +317,19 @@ static inline int kbase_as_has_page_fault(struct kbase_as *as, return (fault == &as->pf_data); } +/** + * struct kbasep_mem_device - Data stored per device for memory allocation + * + * @used_pages: Tracks usage of OS shared memory. Updated when OS memory is + * allocated/freed. + * @ir_threshold: Fraction of the maximum size of an allocation that grows + * on GPU page fault that can be used before the driver + * switches to incremental rendering, in 1/256ths. + * 0 means disabled. + */ struct kbasep_mem_device { - atomic_t used_pages; /* Tracks usage of OS shared memory. Updated - when OS memory is allocated/freed. */ - + atomic_t used_pages; + atomic_t ir_threshold; }; #define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X @@ -1201,8 +592,6 @@ struct kbase_mmu_mode { struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); - - #define DEVNAME_SIZE 16 /** @@ -1523,6 +912,7 @@ struct kbase_device { #endif /* CONFIG_MALI_NO_MALI */ struct kbase_pm_device_data pm; + struct kbasep_js_device_data js_data; struct kbase_mem_pool_group mem_pools; struct kbasep_mem_device memdev; @@ -1574,7 +964,6 @@ struct kbase_device { u16 trace_next_in; struct kbase_trace *trace_rbuf; #endif - u32 reset_timeout_ms; bool cache_clean_in_progress; @@ -1706,19 +1095,19 @@ struct kbase_device { struct mutex mmu_hw_mutex; - /* See KBASE_SERIALIZE_* for details */ - u8 serialize_jobs; - -#ifdef CONFIG_MALI_CINSTR_GWT - u8 backup_serialize_jobs; -#endif - u8 l2_size_override; u8 l2_hash_override; /* See KBASE_JS_*_PRIORITY_MODE for details. */ u32 js_ctx_scheduling_mode; + /* See KBASE_SERIALIZE_* for details */ + u8 serialize_jobs; + +#ifdef CONFIG_MALI_CINSTR_GWT + u8 backup_serialize_jobs; +#endif /* CONFIG_MALI_CINSTR_GWT */ + struct { struct kbase_context *ctx; @@ -1728,22 +1117,6 @@ struct kbase_device { } dummy_job_wa; }; -/** - * struct jsctx_queue - JS context atom queue - * @runnable_tree: Root of RB-tree containing currently runnable atoms on this - * job slot. - * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot - * dependencies. Atoms on this list will be moved to the - * runnable_tree when the blocking atom completes. - * - * hwaccess_lock must be held when accessing this structure. - */ -struct jsctx_queue { - struct rb_root runnable_tree; - struct list_head x_dep_head; -}; - - #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ (((minor) & 0xFFF) << 8) | \ ((0 & 0xFF) << 0)) @@ -1852,6 +1225,11 @@ struct kbase_file { * from it for job slot 2. This is reset when the context first goes active or * is re-activated on that slot. * + * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for + * the context due to unhandled page(or bus) fault. It is cleared when the + * refcount for the context drops to 0 or on when the address spaces are + * re-enabled on GPU reset or power cycle. + * * All members need to be separate bits. This enum is intended for use in a * bitmask where multiple values get OR-ed together. */ @@ -1871,6 +1249,7 @@ enum kbase_context_flags { KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, + KCTX_AS_DISABLED_ON_FAULT = 1U << 15, }; struct kbase_sub_alloc { @@ -2053,36 +1432,58 @@ struct kbase_sub_alloc { * soft-jobs which have been blocked for more than the * timeout value used for the soft-jobs * @jit_alloc: Array of 256 pointers to GPU memory regions, used for - * for JIT allocations. - * @jit_max_allocations: Maximum number of JIT allocations allowed at once. - * @jit_current_allocations: Current number of in-flight JIT allocations. - * @jit_current_allocations_per_bin: Current number of in-flight JIT allocations per bin - * @jit_version: version number indicating whether userspace is using - * old or new version of interface for JIT allocations - * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_OLD - * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT + * just-in-time memory allocations. + * @jit_max_allocations: Maximum allowed number of in-flight + * just-in-time memory allocations. + * @jit_current_allocations: Current number of in-flight just-in-time + * memory allocations. + * @jit_current_allocations_per_bin: Current number of in-flight just-in-time + * memory allocations per bin. + * @jit_version: Version number indicating whether userspace is using + * old or new version of interface for just-in-time + * memory allocations. + * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_10_2 + * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT_11_5 + * 3 -> client used KBASE_IOCTL_MEM_JIT_INIT * @jit_group_id: A memory group ID to be passed to a platform-specific * memory group manager. * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). - * @jit_active_head: List containing the JIT allocations which are in use. - * @jit_pool_head: List containing the JIT allocations which have been - * freed up by userpsace and so not being used by them. + * @jit_phys_pages_limit: Limit of physical pages to apply across all + * just-in-time memory allocations, applied to + * @jit_current_phys_pressure. + * @jit_current_phys_pressure: Current 'pressure' on physical pages, which is + * the sum of the worst case estimate of pages that + * could be used (i.e. the + * &struct_kbase_va_region.nr_pages for all in-use + * just-in-time memory regions that have not yet had + * a usage report) and the actual number of pages + * that were used (i.e. the + * &struct_kbase_va_region.used_pages for regions + * that have had a usage report). + * @jit_active_head: List containing the just-in-time memory allocations + * which are in use. + * @jit_pool_head: List containing the just-in-time memory allocations + * which have been freed up by userspace and so not being + * used by them. * Driver caches them to quickly fulfill requests for new * JIT allocations. They are released in case of memory * pressure as they are put on the @evict_list when they * are freed up by userspace. - * @jit_destroy_head: List containing the JIT allocations which were moved to it - * from @jit_pool_head, in the shrinker callback, after freeing - * their backing physical pages. - * @jit_evict_lock: Lock used for operations done on JIT allocations and also - * for accessing @evict_list. - * @jit_work: Work item queued to defer the freeing of memory region when - * JIT allocation is moved to @jit_destroy_head. - * @jit_atoms_head: A list of the JIT soft-jobs, both alloc & free, in submission - * order, protected by kbase_jd_context.lock. - * @jit_pending_alloc: A list of JIT alloc soft-jobs for which allocation will be - * reattempted after the impending free of other active JIT - * allocations. + * @jit_destroy_head: List containing the just-in-time memory allocations + * which were moved to it from @jit_pool_head, in the + * shrinker callback, after freeing their backing + * physical pages. + * @jit_evict_lock: Lock used for operations done on just-in-time memory + * allocations and also for accessing @evict_list. + * @jit_work: Work item queued to defer the freeing of a memory + * region when a just-in-time memory allocation is moved + * to @jit_destroy_head. + * @jit_atoms_head: A list of the just-in-time memory soft-jobs, both + * allocate & free, in submission order, protected by + * &struct_kbase_jd_context.lock. + * @jit_pending_alloc: A list of just-in-time memory allocation soft-jobs + * which will be reattempted after the impending free of + * other active allocations. * @ext_res_meta_head: A list of sticky external resources which were requested to * be mapped on GPU side, through a softjob atom of type * EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl. @@ -2132,10 +1533,28 @@ struct kbase_context { struct list_head mem_partials; struct mutex reg_lock; + struct rb_root reg_rbtree_same; struct rb_root reg_rbtree_custom; struct rb_root reg_rbtree_exec; + struct kbase_jd_context jctx; + struct jsctx_queue jsctx_queue + [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; + + struct list_head completed_jobs; + atomic_t work_count; + struct timer_list soft_job_timeout; + + atomic_t atoms_pulled; + atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; + int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][ + KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + int priority; + bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + u32 slots_pullable; + u32 age_count; DECLARE_BITMAP(cookies, BITS_PER_LONG); struct kbase_va_region *pending_regions[BITS_PER_LONG]; @@ -2143,8 +1562,6 @@ struct kbase_context { wait_queue_head_t event_queue; pid_t tgid; pid_t pid; - - struct kbase_jd_context jctx; atomic_t used_pages; atomic_t nonmapped_pages; atomic_t permanent_mapped_pages; @@ -2186,34 +1603,19 @@ struct kbase_context { #endif /* CONFIG_DEBUG_FS */ - struct jsctx_queue jsctx_queue - [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; - - atomic_t atoms_pulled; - atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; - int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][ - KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - - bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - - u32 slots_pullable; - - struct work_struct work; - struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli; struct mutex legacy_hwcnt_lock; - struct list_head completed_jobs; - atomic_t work_count; - - struct timer_list soft_job_timeout; - - struct kbase_va_region *jit_alloc[256]; + struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT]; u8 jit_max_allocations; u8 jit_current_allocations; u8 jit_current_allocations_per_bin[256]; u8 jit_version; u8 jit_group_id; +#if MALI_JIT_PRESSURE_LIMIT + u64 jit_phys_pages_limit; + u64 jit_current_phys_pressure; +#endif /* MALI_JIT_PRESSURE_LIMIT */ struct list_head jit_active_head; struct list_head jit_pool_head; struct list_head jit_destroy_head; @@ -2225,22 +1627,15 @@ struct kbase_context { struct list_head ext_res_meta_head; - u32 age_count; - u8 trim_level; #ifdef CONFIG_MALI_CINSTR_GWT bool gwt_enabled; - bool gwt_was_enabled; - struct list_head gwt_current_list; - struct list_head gwt_snapshot_list; #endif - int priority; - s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; base_context_create_flags create_flags; }; diff --git a/mali_kbase/mali_kbase_disjoint_events.c b/mali_kbase/mali_kbase_disjoint_events.c index 68eb4ed..b5ac414 100644 --- a/mali_kbase/mali_kbase_disjoint_events.c +++ b/mali_kbase/mali_kbase_disjoint_events.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_kbase_dma_fence.c b/mali_kbase/mali_kbase_dma_fence.c index 6a95900..25acbcb 100644 --- a/mali_kbase/mali_kbase_dma_fence.c +++ b/mali_kbase/mali_kbase_dma_fence.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2017,2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -114,6 +114,8 @@ kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, ww_acquire_fini(ctx); } + + /** * kbase_dma_fence_queue_work() - Queue work to handle @katom * @katom: Pointer to atom for which to queue work diff --git a/mali_kbase/mali_kbase_dummy_job_wa.c b/mali_kbase/mali_kbase_dummy_job_wa.c index a72436a..5830e8e 100644 --- a/mali_kbase/mali_kbase_dummy_job_wa.c +++ b/mali_kbase/mali_kbase_dummy_job_wa.c @@ -187,7 +187,6 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true); if (cores >> 32) wait(kbdev, SHADER_READY_HI, (cores >> 32), true); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX); } if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) { diff --git a/mali_kbase/mali_kbase_dummy_job_wa.h b/mali_kbase/mali_kbase_dummy_job_wa.h index 0ffd5b9..5bbe37d 100644 --- a/mali_kbase/mali_kbase_dummy_job_wa.h +++ b/mali_kbase/mali_kbase_dummy_job_wa.h @@ -31,6 +31,7 @@ KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP | \ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) + int kbase_dummy_job_wa_load(struct kbase_device *kbdev); void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev); int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores); @@ -40,4 +41,5 @@ static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) return (kbdev->dummy_job_wa.ctx != NULL); } + #endif /* _KBASE_DUMMY_JOB_WORKAROUND_ */ diff --git a/mali_kbase/mali_kbase_event.c b/mali_kbase/mali_kbase_event.c index 0ba5f97..2bbc313 100644 --- a/mali_kbase/mali_kbase_event.c +++ b/mali_kbase/mali_kbase_event.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016,2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016,2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -44,7 +44,7 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom); katom->status = KBASE_JD_ATOM_STATE_UNUSED; - + dev_dbg(kbdev->dev, "Atom %p status to unused\n", (void *)katom); wake_up(&katom->completed); return data; @@ -83,10 +83,12 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); uevent->event_code = atom->event_code; + uevent->atom_number = (atom - ctx->jctx.atoms); if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) kbase_jd_free_external_resources(atom); + mutex_lock(&ctx->jctx.lock); uevent->udata = kbase_event_process(ctx, atom); mutex_unlock(&ctx->jctx.lock); @@ -110,6 +112,7 @@ static void kbase_event_process_noreport_worker(struct work_struct *data) if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) kbase_jd_free_external_resources(katom); + mutex_lock(&kctx->jctx.lock); kbase_event_process(kctx, katom); mutex_unlock(&kctx->jctx.lock); @@ -162,22 +165,25 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) { struct kbase_device *kbdev = ctx->kbdev; + dev_dbg(kbdev->dev, "Posting event for atom %p\n", (void *)atom); + if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { if (atom->event_code == BASE_JD_EVENT_DONE) { - /* Don't report the event */ + dev_dbg(kbdev->dev, "Suppressing event (atom done)\n"); kbase_event_process_noreport(ctx, atom); return; } } if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { - /* Don't report the event */ + dev_dbg(kbdev->dev, "Suppressing event (never)\n"); kbase_event_process_noreport(ctx, atom); return; } KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED); if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { /* Don't report the event until other event(s) have completed */ + dev_dbg(kbdev->dev, "Deferring event (coalesced)\n"); mutex_lock(&ctx->event_mutex); list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); ++ctx->event_coalesce_count; @@ -191,6 +197,7 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) list_add_tail(&atom->dep_item[0], &ctx->event_list); atomic_add(event_count, &ctx->event_count); mutex_unlock(&ctx->event_mutex); + dev_dbg(kbdev->dev, "Reporting %d events\n", event_count); kbase_event_wakeup(ctx); } @@ -212,9 +219,7 @@ int kbase_event_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->event_list); INIT_LIST_HEAD(&kctx->event_coalesce_list); mutex_init(&kctx->event_mutex); - atomic_set(&kctx->event_count, 0); kctx->event_coalesce_count = 0; - atomic_set(&kctx->event_closed, false); kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); if (NULL == kctx->event_workq) diff --git a/mali_kbase/mali_kbase_fence.c b/mali_kbase/mali_kbase_fence.c index 96a6ab9..7a715b3 100644 --- a/mali_kbase/mali_kbase_fence.c +++ b/mali_kbase/mali_kbase_fence.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -87,6 +87,7 @@ const struct dma_fence_ops kbase_fence_ops = { .fence_value_str = kbase_fence_fence_value_str }; + #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) struct fence * kbase_fence_out_new(struct kbase_jd_atom *katom) @@ -210,3 +211,4 @@ kbase_fence_add_callback(struct kbase_jd_atom *katom, return err; } + diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h index d7a65e0..8e7024e 100644 --- a/mali_kbase/mali_kbase_fence.h +++ b/mali_kbase/mali_kbase_fence.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -87,6 +87,7 @@ struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); } while (0) #endif + /** * kbase_fence_out_remove() - Removes the output fence from atom * @katom: Atom to remove output fence for @@ -268,6 +269,7 @@ bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); */ #define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence) + /** * kbase_fence_put() - Releases a reference to a fence * @fence: Fence to release reference for. diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c index d5495a1..f1f188f 100644 --- a/mali_kbase/mali_kbase_gpuprops.c +++ b/mali_kbase/mali_kbase_gpuprops.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,7 +51,8 @@ #define KBASE_UBFX32(value, offset, size) \ (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) -static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props) +static void kbase_gpuprops_construct_coherent_groups( + struct base_gpu_props * const props) { struct mali_base_gpu_coherent_group *current_group; u64 group_present; @@ -120,13 +121,14 @@ static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const prop /** * kbase_gpuprops_get_props - Get the GPU configuration - * @gpu_props: The &base_gpu_props structure + * @gpu_props: The &struct base_gpu_props structure * @kbdev: The &struct kbase_device structure for the device * - * Fill the &base_gpu_props structure with values from the GPU configuration - * registers. Only the raw properties are filled in this function + * Fill the &struct base_gpu_props structure with values from the GPU + * configuration registers. Only the raw properties are filled in this function. */ -static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) +static void kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, + struct kbase_device *kbdev) { struct kbase_gpuprops_regdump regdump; int i; @@ -172,7 +174,8 @@ static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kb gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc; } -void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props) +void kbase_gpuprops_update_core_props_gpu_id( + struct base_gpu_props * const gpu_props) { gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); @@ -186,13 +189,14 @@ void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props) /** * kbase_gpuprops_calculate_props - Calculate the derived properties - * @gpu_props: The &base_gpu_props structure + * @gpu_props: The &struct base_gpu_props structure * @kbdev: The &struct kbase_device structure for the device * - * Fill the &base_gpu_props structure with values derived from the GPU + * Fill the &struct base_gpu_props structure with values derived from the GPU * configuration registers */ -static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) +static void kbase_gpuprops_calculate_props( + struct base_gpu_props * const gpu_props, struct kbase_device *kbdev) { int i; u32 gpu_id; @@ -323,7 +327,7 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) void kbase_gpuprops_set_features(struct kbase_device *kbdev) { - base_gpu_props *gpu_props; + struct base_gpu_props *gpu_props; struct kbase_gpuprops_regdump regdump; gpu_props = &kbdev->gpu_props.props; @@ -396,7 +400,7 @@ void kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) { if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { struct kbase_gpuprops_regdump regdump; - base_gpu_props *gpu_props = &kbdev->gpu_props.props; + struct base_gpu_props *gpu_props = &kbdev->gpu_props.props; /* Check for L2 cache size & hash overrides */ if (!kbase_read_l2_config_from_dt(kbdev)) diff --git a/mali_kbase/mali_kbase_gpuprops.h b/mali_kbase/mali_kbase_gpuprops.h index 4fdb3f9..eeba92f 100644 --- a/mali_kbase/mali_kbase_gpuprops.h +++ b/mali_kbase/mali_kbase_gpuprops.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015,2017,2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015, 2017, 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -97,7 +97,7 @@ int kbase_device_populate_max_freq(struct kbase_device *kbdev); * separate fields (version_status, minor_revision, major_revision, product_id) * stored in base_gpu_props::core_props. */ -void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props); - +void kbase_gpuprops_update_core_props_gpu_id( + struct base_gpu_props * const gpu_props); #endif /* _KBASE_GPUPROPS_H_ */ diff --git a/mali_kbase/mali_kbase_gpuprops_types.h b/mali_kbase/mali_kbase_gpuprops_types.h index d7877d1..ec6f1c3 100644 --- a/mali_kbase/mali_kbase_gpuprops_types.h +++ b/mali_kbase/mali_kbase_gpuprops_types.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -89,7 +89,7 @@ struct kbase_gpu_props { struct kbase_gpu_mmu_props mmu; /* Properties shared with userspace */ - base_gpu_props props; + struct base_gpu_props props; u32 prop_buffer_size; void *prop_buffer; diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c index 75a0820..6a47c9d 100644 --- a/mali_kbase/mali_kbase_gwt.c +++ b/mali_kbase/mali_kbase_gwt.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c index b5304e8..c3abad4 100644 --- a/mali_kbase/mali_kbase_hw.c +++ b/mali_kbase/mali_kbase_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -182,6 +182,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0}, {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1}, {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2}, {U32_MAX, NULL} } }, {GPU_ID2_PRODUCT_TNAX, @@ -195,11 +196,13 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( {GPU_ID2_PRODUCT_LBEX, {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0}, + {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tBEx_r1p1}, {U32_MAX, NULL} } }, {GPU_ID2_PRODUCT_TBEX, {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0}, {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p0}, {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0}, {U32_MAX, NULL} } }, diff --git a/mali_kbase/mali_kbase_hwaccess_instr.h b/mali_kbase/mali_kbase_hwaccess_instr.h index d5b9099..be85491 100644 --- a/mali_kbase/mali_kbase_hwaccess_instr.h +++ b/mali_kbase/mali_kbase_hwaccess_instr.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015, 2017-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2017-2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -139,4 +139,13 @@ int kbase_instr_backend_init(struct kbase_device *kbdev); */ void kbase_instr_backend_term(struct kbase_device *kbdev); +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +/** + * kbase_instr_backend_debugfs_init() - Add a debugfs entry for the + * hardware counter set. + * @kbdev: kbase device + */ +void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev); +#endif + #endif /* _KBASE_HWACCESS_INSTR_H_ */ diff --git a/mali_kbase/mali_kbase_hwaccess_jm.h b/mali_kbase/mali_kbase_hwaccess_jm.h index 4972893..3d5934e 100644 --- a/mali_kbase/mali_kbase_hwaccess_jm.h +++ b/mali_kbase/mali_kbase_hwaccess_jm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/mali_kbase_hwcnt.c index 265fc21..14ec5cb 100644 --- a/mali_kbase/mali_kbase_hwcnt.c +++ b/mali_kbase/mali_kbase_hwcnt.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -683,21 +683,14 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) if (!WARN_ON(hctx->disable_count == SIZE_MAX)) { /* - * If disable count is non-zero or no counters are enabled, we - * can just bump the disable count. + * If disable count is non-zero, we can just bump the disable + * count. * * Otherwise, we can't disable in an atomic context. */ if (hctx->disable_count != 0) { hctx->disable_count++; atomic_disabled = true; - } else { - WARN_ON(!hctx->accum_inited); - if (!hctx->accum.enable_map_any_enabled) { - hctx->disable_count++; - hctx->accum.state = ACCUM_STATE_DISABLED; - atomic_disabled = true; - } } } diff --git a/mali_kbase/mali_kbase_ioctl.h b/mali_kbase/mali_kbase_ioctl.h index c041829..977b194 100644 --- a/mali_kbase/mali_kbase_ioctl.h +++ b/mali_kbase/mali_kbase_ioctl.h @@ -30,64 +30,9 @@ extern "C" { #include <asm-generic/ioctl.h> #include <linux/types.h> -#define KBASE_IOCTL_TYPE 0x80 +#include "jm/mali_kbase_jm_ioctl.h" -/* - * 11.1: - * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags - * 11.2: - * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED, - * which some user-side clients prior to 11.2 might fault if they received - * them - * 11.3: - * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and - * KBASE_IOCTL_STICKY_RESOURCE_UNMAP - * 11.4: - * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET - * 11.5: - * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD) - * 11.6: - * - Added flags field to base_jit_alloc_info structure, which can be used to - * specify pseudo chunked tiler alignment for JIT allocations. - * 11.7: - * - Removed UMP support - * 11.8: - * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags - * 11.9: - * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY - * under base_mem_alloc_flags - * 11.10: - * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for - * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations - * with one softjob. - * 11.11: - * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags - * 11.12: - * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS - * 11.13: - * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT - * 11.14: - * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set - * under base_mem_alloc_flags - * 11.15: - * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags. - * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be - * passed to mmap(). - * 11.16: - * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf. - * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for - * dma-buf. Now, buffers are mapped on GPU when first imported, no longer - * requiring external resource or sticky resource tracking. UNLESS, - * CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled. - * 11.17: - * - Added BASE_JD_REQ_JOB_SLOT. - * - Reused padding field in base_jd_atom_v2 to pass job slot number. - * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO - * 11.18: - * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags - */ -#define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 18 +#define KBASE_IOCTL_TYPE 0x80 /** * struct kbase_ioctl_version_check - Check version compatibility with kernel @@ -116,22 +61,6 @@ struct kbase_ioctl_set_flags { _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) /** - * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel - * - * @addr: Memory address of an array of struct base_jd_atom_v2 - * @nr_atoms: Number of entries in the array - * @stride: sizeof(struct base_jd_atom_v2) - */ -struct kbase_ioctl_job_submit { - __u64 addr; - __u32 nr_atoms; - __u32 stride; -}; - -#define KBASE_IOCTL_JOB_SUBMIT \ - _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) - -/** * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel * * @buffer: Pointer to the buffer to store properties into @@ -166,9 +95,6 @@ struct kbase_ioctl_get_gpuprops { #define KBASE_IOCTL_GET_GPUPROPS \ _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) -#define KBASE_IOCTL_POST_TERM \ - _IO(KBASE_IOCTL_TYPE, 4) - /** * union kbase_ioctl_mem_alloc - Allocate memory on the GPU * @@ -332,8 +258,9 @@ struct kbase_ioctl_get_ddk_version { _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) /** - * struct kbase_ioctl_mem_jit_init_old - Initialise the JIT memory allocator - * + * struct kbase_ioctl_mem_jit_init_10_2 - Initialize the just-in-time memory + * allocator (between kernel driver + * version 10.2--11.4) * @va_pages: Number of VA pages to reserve for JIT * * Note that depending on the VA size of the application and GPU, the value @@ -342,16 +269,17 @@ struct kbase_ioctl_get_ddk_version { * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for * backwards compatibility. */ -struct kbase_ioctl_mem_jit_init_old { +struct kbase_ioctl_mem_jit_init_10_2 { __u64 va_pages; }; -#define KBASE_IOCTL_MEM_JIT_INIT_OLD \ - _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_old) +#define KBASE_IOCTL_MEM_JIT_INIT_10_2 \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_10_2) /** - * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator - * + * struct kbase_ioctl_mem_jit_init_11_5 - Initialize the just-in-time memory + * allocator (between kernel driver + * version 11.5--11.19) * @va_pages: Number of VA pages to reserve for JIT * @max_allocations: Maximum number of concurrent allocations * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) @@ -360,6 +288,34 @@ struct kbase_ioctl_mem_jit_init_old { * * Note that depending on the VA size of the application and GPU, the value * specified in @va_pages may be ignored. + * + * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for + * backwards compatibility. + */ +struct kbase_ioctl_mem_jit_init_11_5 { + __u64 va_pages; + __u8 max_allocations; + __u8 trim_level; + __u8 group_id; + __u8 padding[5]; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT_11_5 \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_11_5) + +/** + * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory + * allocator + * @va_pages: Number of GPU virtual address pages to reserve for just-in-time + * memory allocations + * @max_allocations: Maximum number of concurrent allocations + * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) + * @group_id: Group ID to be used for physical allocations + * @padding: Currently unused, must be zero + * @phys_pages: Maximum number of physical pages to allocate just-in-time + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. */ struct kbase_ioctl_mem_jit_init { __u64 va_pages; @@ -367,6 +323,7 @@ struct kbase_ioctl_mem_jit_init { __u8 trim_level; __u8 group_id; __u8 padding[5]; + __u64 phys_pages; }; #define KBASE_IOCTL_MEM_JIT_INIT \ @@ -585,21 +542,6 @@ struct kbase_ioctl_mem_profile_add { _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) /** - * struct kbase_ioctl_soft_event_update - Update the status of a soft-event - * @event: GPU address of the event which has been updated - * @new_status: The new status to set - * @flags: Flags for future expansion - */ -struct kbase_ioctl_soft_event_update { - __u64 event; - __u32 new_status; - __u32 flags; -}; - -#define KBASE_IOCTL_SOFT_EVENT_UPDATE \ - _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) - -/** * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource * @count: Number of resources * @address: Array of u64 GPU addresses of the external resources to map @@ -695,7 +637,6 @@ union kbase_ioctl_cinstr_gwt_dump { #define KBASE_IOCTL_CINSTR_GWT_DUMP \ _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) - /** * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone * @@ -708,7 +649,6 @@ struct kbase_ioctl_mem_exec_init { #define KBASE_IOCTL_MEM_EXEC_INIT \ _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) - /** * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of * cpu/gpu time (counter values) @@ -742,7 +682,6 @@ union kbase_ioctl_get_cpu_gpu_timeinfo { #define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ _IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo) - /*************** * test ioctls * ***************/ @@ -784,38 +723,7 @@ struct kbase_ioctl_tlstream_stats { #define KBASE_IOCTL_TLSTREAM_STATS \ _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) -/** - * struct kbase_ioctl_cs_event_memory_write - Write an event memory address - * @cpu_addr: Memory address to write - * @value: Value to write - * @padding: Currently unused, must be zero - */ -struct kbase_ioctl_cs_event_memory_write { - __u64 cpu_addr; - __u8 value; - __u8 padding[7]; -}; - -/** - * union kbase_ioctl_cs_event_memory_read - Read an event memory address - * @cpu_addr: Memory address to read - * @value: Value read - * @padding: Currently unused, must be zero - * - * @in: Input parameters - * @out: Output parameters - */ -union kbase_ioctl_cs_event_memory_read { - struct { - __u64 cpu_addr; - } in; - struct { - __u8 value; - __u8 padding[7]; - } out; -}; - -#endif +#endif /* MALI_UNIT_TEST */ /* Customer extension range */ #define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2) diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c index 88ab962..b4ae3ba 100644 --- a/mali_kbase/mali_kbase_jd.c +++ b/mali_kbase/mali_kbase_jd.c @@ -36,6 +36,7 @@ #include <tl/mali_kbase_tracepoints.h> #include "mali_kbase_dma_fence.h" +#include <mali_kbase_cs_experimental.h> #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) @@ -49,6 +50,12 @@ #define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ BASE_JD_REQ_DEP))) + +/* Minimum API version that supports the just-in-time memory allocation pressure + * limit feature. + */ +#define MIN_API_VERSION_WITH_JPL KBASE_API_VERSION(11, 20) + /* * This is the kernel side of the API. Only entry points are: * - kbase_jd_submit(): Called from userspace to submit a single bag @@ -76,36 +83,45 @@ get_compat_pointer(struct kbase_context *kctx, const u64 p) * Note that the caller must also check the atom status and * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock */ -static int jd_run_atom(struct kbase_jd_atom *katom) +static bool jd_run_atom(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; + dev_dbg(kctx->kbdev->dev, "JD run atom %p in kctx %p\n", + (void *)katom, (void *)kctx); + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { /* Dependency only atom */ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", + (void *)katom); return 0; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { /* Soft-job */ if (katom->will_fail_event_code) { kbase_finish_soft_job(katom); katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, + "Atom %p status to completed\n", (void *)katom); return 0; } if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, + "Atom %p status to completed\n", (void *)katom); } return 0; } katom->status = KBASE_JD_ATOM_STATE_IN_JS; + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", (void *)katom); /* Queue an action about whether we should try scheduling a context */ return kbasep_js_add_job(kctx, katom); } -#if defined(CONFIG_MALI_DMA_FENCE) void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) { struct kbase_device *kbdev; @@ -136,7 +152,6 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) kbase_js_sched_all(kbdev); } } -#endif void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) { @@ -533,6 +548,124 @@ static void jd_try_submitting_deps(struct list_head *out_list, } } +#if MALI_JIT_PRESSURE_LIMIT +/** + * jd_update_jit_usage - Update just-in-time physical memory usage for an atom. + * + * @katom: An atom that has just finished. + * + * Read back actual just-in-time memory region usage from atoms that provide + * this information, and update the current physical page pressure. + * + * The caller must hold the kbase_jd_context.lock. + */ +static void jd_update_jit_usage(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct kbase_va_region *reg; + struct kbase_vmap_struct mapping; + u64 *ptr; + u64 used_pages; + unsigned int idx; + + lockdep_assert_held(&kctx->jctx.lock); + + /* If this atom wrote to JIT memory, find out how much it has written + * and update the usage information in the region. + */ + for (idx = 0; + idx < ARRAY_SIZE(katom->jit_ids) && katom->jit_ids[idx]; + idx++) { + size_t size_to_read; + u64 read_val; + + reg = kctx->jit_alloc[katom->jit_ids[idx]]; + + if (!reg) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u has no region\n", + __func__, idx, katom->jit_ids[idx]); + continue; + } + + if (reg == KBASE_RESERVED_REG_JIT_ALLOC) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u has failed to allocate a region\n", + __func__, idx, katom->jit_ids[idx]); + continue; + } + + if (!reg->heap_info_gpu_addr) + continue; + + size_to_read = sizeof(*ptr); + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) + size_to_read = sizeof(u32); + + ptr = kbase_vmap(kctx, reg->heap_info_gpu_addr, size_to_read, + &mapping); + + if (!ptr) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u start=0x%llx unable to map end marker %llx\n", + __func__, idx, katom->jit_ids[idx], + reg->start_pfn << PAGE_SHIFT, + reg->heap_info_gpu_addr); + continue; + } + + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) { + read_val = READ_ONCE(*(u32 *)ptr); + used_pages = PFN_UP(read_val); + } else { + u64 addr_end = read_val = READ_ONCE(*ptr); + + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + unsigned long extent_bytes = reg->extent << + PAGE_SHIFT; + /* kbase_check_alloc_sizes() already satisfies + * this, but here to avoid future maintenance + * hazards + */ + WARN_ON(!is_power_of_2(extent_bytes)); + + addr_end = ALIGN(read_val, extent_bytes); + } + used_pages = PFN_UP(addr_end) - reg->start_pfn; + } + + trace_mali_jit_report(katom, reg, idx, read_val, used_pages); + kbase_trace_jit_report_gpu_mem(kctx, reg, 0u); + + /* We can never have used more pages than the VA size of the + * region + */ + if (used_pages > reg->nr_pages) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u start=0x%llx used_pages %llx > %zx (read 0x%llx as %s%s)\n", + __func__, idx, katom->jit_ids[idx], + reg->start_pfn << PAGE_SHIFT, + used_pages, reg->nr_pages, read_val, + (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) ? + "size" : "addr", + (reg->flags & KBASE_REG_TILER_ALIGN_TOP) ? + " with align" : ""); + used_pages = reg->nr_pages; + } + /* Note: one real use case has an atom correctly reporting 0 + * pages in use. This happens in normal use-cases but may only + * happen for a few of the application's frames. + */ + + kbase_vunmap(kctx, &mapping); + + kbase_jit_report_update_pressure(kctx, reg, used_pages, 0u); + } + + kbase_jit_retry_pending_alloc(kctx); +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + /* * Perform the necessary handling of an atom that has finished running * on the GPU. @@ -556,6 +689,10 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); +#if MALI_JIT_PRESSURE_LIMIT + jd_update_jit_usage(katom); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + /* This is needed in case an atom is failed due to being invalid, this * can happen *before* the jobs that the atom depends on have completed */ for (i = 0; i < 2; i++) { @@ -566,6 +703,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, } katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", + (void *)katom); list_add_tail(&katom->jd_item, &completed_jobs); while (!list_empty(&completed_jobs)) { @@ -588,7 +727,12 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, list_del(runnable_jobs.next); node->in_jd_list = false; + dev_dbg(kctx->kbdev->dev, "List node %p has status %d\n", + node, node->status); + KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); + if (node->status == KBASE_JD_ATOM_STATE_IN_JS) + continue; if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && !kbase_ctx_flag(kctx, KCTX_DYING)) { @@ -692,16 +836,20 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) } #endif -bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom) +static bool jd_submit_atom(struct kbase_context *const kctx, + const struct base_jd_atom_v2 *const user_atom, + const struct base_jd_fragment *const user_jc_incr, + struct kbase_jd_atom *const katom) { struct kbase_device *kbdev = kctx->kbdev; struct kbase_jd_context *jctx = &kctx->jctx; int queued = 0; int i; int sched_prio; - bool ret; bool will_fail = false; + dev_dbg(kbdev->dev, "User did JD submit atom %p\n", (void *)katom); + /* Update the TOTAL number of jobs. This includes those not tracked by * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ jctx->job_nr++; @@ -729,6 +877,22 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; katom->softjob_data = NULL; +#if MALI_JIT_PRESSURE_LIMIT + /* Older API version atoms might have random values where jit_id now + * lives, but we must maintain backwards compatibility - handle the + * issue. + */ + if (kctx->api_version < MIN_API_VERSION_WITH_JPL) { + katom->jit_ids[0] = 0; + katom->jit_ids[1] = 0; + } else { + katom->jit_ids[0] = user_atom->jit_id[0]; + katom->jit_ids[1] = user_atom->jit_id[1]; + } +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + katom->renderpass_id = user_atom->renderpass_id; + /* Implicitly sets katom->protected_state.enter as well. */ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; @@ -754,6 +918,9 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us dep_atom_type != BASE_JD_DEP_TYPE_DATA) { katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kbdev->dev, + "Atom %p status to completed\n", + (void *)katom); /* Wrong dependency setup. Atom will be sent * back to user space. Do not record any @@ -770,8 +937,7 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom, TL_ATOM_STATE_IDLE); - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } } } @@ -805,6 +971,8 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us /* Atom has completed, propagate the error code if any */ katom->event_code = dep_atom->event_code; katom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", + (void *)katom); /* This atom will be sent back to user space. * Do not record any dependencies. @@ -840,37 +1008,33 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us kbase_finish_soft_job(katom); } - ret = jd_done_nolock(katom, NULL); - - goto out; - } else { + return jd_done_nolock(katom, NULL); + } - if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { - /* This softjob has failed due to a previous - * dependency, however we should still run the - * prepare & finish functions - */ - if (kbase_prepare_soft_job(katom) != 0) { - katom->event_code = - BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; - } + if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* This softjob has failed due to a previous + * dependency, however we should still run the + * prepare & finish functions + */ + if (kbase_prepare_soft_job(katom) != 0) { + katom->event_code = + BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); } - - katom->will_fail_event_code = katom->event_code; - ret = false; - - goto out; } - } else { - /* These must occur after the above loop to ensure that an atom - * that depends on a previous atom with the same number behaves - * as expected */ - katom->event_code = BASE_JD_EVENT_DONE; - katom->status = KBASE_JD_ATOM_STATE_QUEUED; + + katom->will_fail_event_code = katom->event_code; + return false; } + /* These must occur after the above loop to ensure that an atom + * that depends on a previous atom with the same number behaves + * as expected + */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)katom); + /* For invalid priority, be most lenient and choose the default */ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) @@ -886,34 +1050,49 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority); KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); - /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ - if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { - dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL"); +#if !MALI_INCREMENTAL_RENDERING + /* Reject atoms for incremental rendering if not supported */ + if (katom->core_req & + (BASE_JD_REQ_START_RENDERPASS|BASE_JD_REQ_END_RENDERPASS)) { + dev_err(kctx->kbdev->dev, + "Rejecting atom with unsupported core_req 0x%x\n", + katom->core_req); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); + } +#endif /* !MALI_INCREMENTAL_RENDERING */ + + if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) { + WARN_ON(katom->jc != 0); + katom->jc_fragment = *user_jc_incr; + } else if (!katom->jc && + (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + /* Reject atoms with job chain = NULL, as these cause issues + * with soft-stop + */ + dev_err(kctx->kbdev->dev, "Rejecting atom with jc = NULL\n"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); } /* Reject atoms with an invalid device_nr */ if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) && (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) { - dev_warn(kctx->kbdev->dev, - "Rejecting atom with invalid device_nr %d", + dev_err(kctx->kbdev->dev, + "Rejecting atom with invalid device_nr %d\n", katom->device_nr); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } /* Reject atoms with invalid core requirements */ if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { - dev_warn(kctx->kbdev->dev, - "Rejecting atom with invalid core requirements"); + dev_err(kctx->kbdev->dev, + "Rejecting atom with invalid core requirements\n"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } /* Reject soft-job atom of certain types from accessing external resources */ @@ -921,11 +1100,10 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us (((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) || ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) || ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) { - dev_warn(kctx->kbdev->dev, - "Rejecting soft-job atom accessing external resources"); + dev_err(kctx->kbdev->dev, + "Rejecting soft-job atom accessing external resources\n"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { @@ -933,11 +1111,21 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { /* setup failed (no access, bad resource, unknown resource types, etc.) */ katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } } +#if !MALI_JIT_PRESSURE_LIMIT + if ((kctx->api_version >= MIN_API_VERSION_WITH_JPL) && + (user_atom->jit_id[0] || user_atom->jit_id[1])) { + /* JIT pressure limit is disabled, but we are receiving non-0 + * JIT IDs - atom is invalid. + */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } +#endif /* MALI_JIT_PRESSURE_LIMIT */ + /* Validate the atom. Function will return error if the atom is * malformed. * @@ -948,15 +1136,13 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } } else { /* Soft-job */ if (kbase_prepare_soft_job(katom) != 0) { katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } } @@ -966,39 +1152,38 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us kbasep_map_core_reqs_to_string(katom->core_req)); #endif - if (queued && !IS_GPU_ATOM(katom)) { - ret = false; - goto out; - } + if (queued && !IS_GPU_ATOM(katom)) + return false; #ifdef CONFIG_MALI_DMA_FENCE - if (kbase_fence_dep_count_read(katom) != -1) { - ret = false; - goto out; - } + if (kbase_fence_dep_count_read(katom) != -1) + return false; + #endif /* CONFIG_MALI_DMA_FENCE */ if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } + return false; + } + + if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + bool need_to_try_schedule_context; - ret = false; - } else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { katom->status = KBASE_JD_ATOM_STATE_IN_JS; - ret = kbasep_js_add_job(kctx, katom); + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + (void *)katom); + + need_to_try_schedule_context = kbasep_js_add_job(kctx, katom); /* If job was cancelled then resolve immediately */ - if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED) - ret = jd_done_nolock(katom, NULL); - } else { - /* This is a pure dependency. Resolve it immediately */ - ret = jd_done_nolock(katom, NULL); + if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) + return need_to_try_schedule_context; } - out: - return ret; + /* This is a pure dependency. Resolve it immediately */ + return jd_done_nolock(katom, NULL); } int kbase_jd_submit(struct kbase_context *kctx, @@ -1021,12 +1206,15 @@ int kbase_jd_submit(struct kbase_context *kctx, beenthere(kctx, "%s", "Enter"); if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it"); + dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it\n"); return -EINVAL; } - if (stride != sizeof(base_jd_atom_v2)) { - dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel"); + if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) && + stride != sizeof(struct base_jd_atom_v2)) { + dev_err(kbdev->dev, + "Stride %u passed to job_submit isn't supported by the kernel\n", + stride); return -EINVAL; } @@ -1035,14 +1223,58 @@ int kbase_jd_submit(struct kbase_context *kctx, for (i = 0; i < nr_atoms; i++) { struct base_jd_atom_v2 user_atom; + struct base_jd_fragment user_jc_incr; struct kbase_jd_atom *katom; - if (copy_from_user(&user_atom, user_addr, - sizeof(user_atom)) != 0) { - err = -EINVAL; + if (copy_from_user(&user_atom, user_addr, stride) != 0) { + dev_err(kbdev->dev, + "Invalid atom address %p passed to job_submit\n", + user_addr); + err = -EFAULT; break; } + if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) { + dev_dbg(kbdev->dev, "No renderpass ID: use 0\n"); + user_atom.renderpass_id = 0; + } else { + /* Ensure all padding bytes are 0 for potential future + * extension + */ + size_t j; + + dev_dbg(kbdev->dev, "Renderpass ID is %d\n", + user_atom.renderpass_id); + for (j = 0; j < sizeof(user_atom.padding); j++) { + if (user_atom.padding[j]) { + dev_err(kbdev->dev, + "Bad padding byte %zu: %d\n", + j, user_atom.padding[j]); + err = -EINVAL; + break; + } + } + if (err) + break; + } + + /* In this case 'jc' is the CPU address of a struct + * instead of a GPU address of a job chain. + */ + if (user_atom.core_req & BASE_JD_REQ_END_RENDERPASS) { + if (copy_from_user(&user_jc_incr, + u64_to_user_ptr(user_atom.jc), + sizeof(user_jc_incr))) { + dev_err(kbdev->dev, + "Invalid jc address 0x%llx passed to job_submit\n", + user_atom.jc); + err = -EFAULT; + break; + } + dev_dbg(kbdev->dev, "Copied IR jobchain addresses\n"); + user_atom.jc = 0; + } + user_addr = (void __user *)((uintptr_t) user_addr + stride); mutex_lock(&jctx->lock); @@ -1092,8 +1324,8 @@ while (false) mutex_lock(&jctx->lock); } - need_to_try_schedule_context |= - jd_submit_atom(kctx, &user_atom, katom); + need_to_try_schedule_context |= jd_submit_atom(kctx, &user_atom, + &user_jc_incr, katom); /* Register a completed job as a disjoint event when the GPU is in a disjoint state * (ie. being reset). @@ -1133,6 +1365,9 @@ void kbase_jd_done_worker(struct work_struct *data) js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; + dev_dbg(kbdev->dev, "Enter atom %p done worker for kctx %p\n", + (void *)katom, (void *)kctx); + KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); kbase_backend_complete_wq(kbdev, katom); @@ -1152,15 +1387,18 @@ void kbase_jd_done_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); if (katom->event_code == BASE_JD_EVENT_STOPPED) { - /* Atom has been promoted to stopped */ unsigned long flags; + dev_dbg(kbdev->dev, "Atom %p has been promoted to stopped\n", + (void *)katom); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); katom->status = KBASE_JD_ATOM_STATE_IN_JS; + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + (void *)katom); kbase_js_unpull(kctx, katom); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -1271,6 +1509,9 @@ void kbase_jd_done_worker(struct work_struct *data) kbase_pm_context_idle(kbdev); KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); + + dev_dbg(kbdev->dev, "Leave atom %p done worker for kctx %p\n", + (void *)katom, (void *)kctx); } /** @@ -1398,6 +1639,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) kctx = katom->kctx; KBASE_DEBUG_ASSERT(NULL != kctx); + dev_dbg(kbdev->dev, "JD: cancelling atom %p\n", (void *)katom); KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); /* This should only be done from a context that is not scheduled */ @@ -1494,6 +1736,9 @@ int kbase_jd_init(struct kbase_context *kctx) #endif } + for (i = 0; i < BASE_JD_RP_COUNT; i++) + kctx->jctx.renderpasses[i].state = KBASE_JD_RP_COMPLETE; + mutex_init(&kctx->jctx.lock); init_waitqueue_head(&kctx->jctx.zero_jobs_wait); diff --git a/mali_kbase/mali_kbase_jm.c b/mali_kbase/mali_kbase_jm.c index b91a706..3f17dd7 100644 --- a/mali_kbase/mali_kbase_jm.c +++ b/mali_kbase/mali_kbase_jm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,6 +45,9 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, int i; kctx = kbdev->hwaccess.active_kctx[js]; + dev_dbg(kbdev->dev, + "Trying to run the next %d jobs in kctx %p (s:%d)\n", + nr_jobs_to_submit, (void *)kctx, js); if (!kctx) return true; @@ -58,7 +61,8 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, kbase_backend_run_atom(kbdev, katom); } - return false; /* Slot ringbuffer should now be full */ + dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%d)\n", js); + return false; } u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) @@ -66,6 +70,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) u32 ret_mask = 0; lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask); while (js_mask) { int js = ffs(js_mask) - 1; @@ -77,6 +82,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) js_mask &= ~(1 << js); } + dev_dbg(kbdev->dev, "Can still submit to mask 0x%x\n", ret_mask); return ret_mask; } @@ -111,8 +117,11 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { - if (kbdev->hwaccess.active_kctx[js] == kctx) + if (kbdev->hwaccess.active_kctx[js] == kctx) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)kctx, js); kbdev->hwaccess.active_kctx[js] = NULL; + } } } @@ -121,6 +130,9 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, { lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Atom %p is returning with event code 0x%x\n", + (void *)katom, katom->event_code); + if (katom->event_code != BASE_JD_EVENT_STOPPED && katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { return kbase_js_complete_atom(katom, NULL); diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c index 7ab25d1..b3ae604 100644 --- a/mali_kbase/mali_kbase_js.c +++ b/mali_kbase/mali_kbase_js.c @@ -37,6 +37,7 @@ #include "mali_kbase_jm.h" #include "mali_kbase_hwaccess_jm.h" + /* * Private types */ @@ -138,31 +139,6 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev) mutex_unlock(&kbdev->js_data.runpool_mutex); } -/* Hold the mmu_hw_mutex and hwaccess_lock for this */ -bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - bool result = false; - int as_nr; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - as_nr = kctx->as_nr; - if (atomic_read(&kctx->refcount) > 0) { - KBASE_DEBUG_ASSERT(as_nr >= 0); - - kbase_ctx_sched_retain_ctx_refcount(kctx); - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx, - NULL, 0u, atomic_read(&kctx->refcount)); - result = true; - } - - return result; -} - /** * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms * @kctx: Pointer to kbase context with ring buffer. @@ -179,11 +155,18 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) { + bool none_to_pull; struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - return RB_EMPTY_ROOT(&rb->runnable_tree); + none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); + + dev_dbg(kctx->kbdev->dev, + "Slot %d (prio %d) is %spullable in kctx %p\n", + js, prio, none_to_pull ? "not " : "", kctx); + + return none_to_pull; } /** @@ -245,13 +228,37 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, rb_erase(node, &queue->runnable_tree); callback(kctx->kbdev, entry); + + /* Runnable end-of-renderpass atoms can also be in the linked + * list of atoms blocked on cross-slot dependencies. Remove them + * to avoid calling the callback twice. + */ + if (entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) { + WARN_ON(!(entry->core_req & + BASE_JD_REQ_END_RENDERPASS)); + dev_dbg(kctx->kbdev->dev, + "Del runnable atom %p from X_DEP list\n", + (void *)entry); + + list_del(&entry->queue); + entry->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + } } while (!list_empty(&queue->x_dep_head)) { struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, struct kbase_jd_atom, queue); + WARN_ON(!(entry->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + dev_dbg(kctx->kbdev->dev, + "Del blocked atom %p from X_DEP list\n", + (void *)entry); + list_del(queue->x_dep_head.next); + entry->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; callback(kctx->kbdev, entry); } @@ -296,10 +303,15 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) struct rb_node *node; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + dev_dbg(kctx->kbdev->dev, + "Peeking runnable tree of kctx %p for prio %d (s:%d)\n", + (void *)kctx, prio, js); node = rb_first(&rb->runnable_tree); - if (!node) + if (!node) { + dev_dbg(kctx->kbdev->dev, "Tree is empty\n"); return NULL; + } return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); } @@ -354,6 +366,9 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + dev_dbg(kctx->kbdev->dev, "Erasing atom %p from runnable tree of kctx %p\n", + (void *)katom, (void *)kctx); + /* Atoms must be pulled in the correct order. */ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); @@ -373,6 +388,9 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Adding atom %p to runnable tree of kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); + while (*new) { struct kbase_jd_atom *entry = container_of(*new, struct kbase_jd_atom, runnable_tree_node); @@ -573,6 +591,7 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx) js_kctx_info = &kctx->jctx.sched_info; + kctx->slots_pullable = 0; js_kctx_info->ctx.nr_jobs = 0; kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); kbase_ctx_flag_clear(kctx, KCTX_DYING); @@ -663,6 +682,8 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add pullable tail kctx %p (s:%d)\n", + (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -703,6 +724,8 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add pullable head kctx %p (s:%d)\n", + (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -777,6 +800,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add unpullable tail kctx %p (s:%d)\n", + (void *)kctx, js); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); @@ -867,7 +892,9 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( jctx.sched_info.ctx.ctx_list_entry[js]); list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - + dev_dbg(kbdev->dev, + "Popped %p from the pullable queue (s:%d)\n", + (void *)kctx, js); return kctx; } return NULL; @@ -912,32 +939,57 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, { struct kbasep_js_device_data *js_devdata; struct kbase_jd_atom *katom; + struct kbase_device *kbdev = kctx->kbdev; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); - js_devdata = &kctx->kbdev->js_data; + js_devdata = &kbdev->js_data; if (is_scheduled) { - if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + (void *)kctx); return false; + } } katom = jsctx_rb_peek(kctx, js); - if (!katom) + if (!katom) { + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + (void *)kctx, js); return false; /* No pullable atoms */ - if (kctx->blocked_js[js][katom->sched_priority]) + } + if (kctx->blocked_js[js][katom->sched_priority]) { + dev_dbg(kbdev->dev, + "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + (void *)kctx, katom->sched_priority, js); return false; - if (atomic_read(&katom->blocked)) + } + if (atomic_read(&katom->blocked)) { + dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_ctx_pullable\n", + (void *)katom); return false; /* next atom blocked */ - if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { + } + if (kbase_js_atom_blocked_on_x_dep(katom)) { if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || - katom->x_pre_dep->will_fail_event_code) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { + dev_dbg(kbdev->dev, + "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + (void *)katom->x_pre_dep); return false; + } if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && - kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) + kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { + dev_dbg(kbdev->dev, + "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + (void *)katom, js); return false; + } } + dev_dbg(kbdev->dev, "JS: Atom %p is pullable in kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); + return true; } @@ -958,9 +1010,15 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, int dep_js = kbase_js_get_slot(kbdev, dep_atom); int dep_prio = dep_atom->sched_priority; + dev_dbg(kbdev->dev, + "Checking dep %d of atom %p (s:%d) on %p (s:%d)\n", + i, (void *)katom, js, (void *)dep_atom, dep_js); + /* Dependent atom must already have been submitted */ if (!(dep_atom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { + dev_dbg(kbdev->dev, + "Blocker not submitted yet\n"); ret = false; break; } @@ -968,6 +1026,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* Dependencies with different priorities can't be represented in the ringbuffer */ if (prio != dep_prio) { + dev_dbg(kbdev->dev, + "Different atom priorities\n"); ret = false; break; } @@ -976,12 +1036,16 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* Only one same-slot dependency can be * represented in the ringbuffer */ if (has_dep) { + dev_dbg(kbdev->dev, + "Too many same-slot deps\n"); ret = false; break; } /* Each dependee atom can only have one * same-slot dependency */ if (dep_atom->post_dep) { + dev_dbg(kbdev->dev, + "Too many same-slot successors\n"); ret = false; break; } @@ -990,12 +1054,16 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* Only one cross-slot dependency can be * represented in the ringbuffer */ if (has_x_dep) { + dev_dbg(kbdev->dev, + "Too many cross-slot deps\n"); ret = false; break; } /* Each dependee atom can only have one * cross-slot dependency */ if (dep_atom->x_post_dep) { + dev_dbg(kbdev->dev, + "Too many cross-slot successors\n"); ret = false; break; } @@ -1003,6 +1071,9 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, * HW access ringbuffer */ if (dep_atom->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + dev_dbg(kbdev->dev, + "Blocker already in ringbuffer (state:%d)\n", + dep_atom->gpu_rb_state); ret = false; break; } @@ -1010,6 +1081,9 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, * completed */ if (dep_atom->status != KBASE_JD_ATOM_STATE_IN_JS) { + dev_dbg(kbdev->dev, + "Blocker already completed (status:%d)\n", + dep_atom->status); ret = false; break; } @@ -1030,6 +1104,11 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, if (dep_atom) { int dep_js = kbase_js_get_slot(kbdev, dep_atom); + dev_dbg(kbdev->dev, + "Clearing dep %d of atom %p (s:%d) on %p (s:%d)\n", + i, (void *)katom, js, (void *)dep_atom, + dep_js); + if ((js != dep_js) && (dep_atom->status != KBASE_JD_ATOM_STATE_COMPLETED) @@ -1040,6 +1119,10 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, katom->atom_flags |= KBASE_KATOM_FLAG_X_DEP_BLOCKED; + + dev_dbg(kbdev->dev, "Set X_DEP flag on atom %p\n", + (void *)katom); + katom->x_pre_dep = dep_atom; dep_atom->x_post_dep = katom; if (kbase_jd_katom_dep_type( @@ -1059,6 +1142,10 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, kbase_jd_katom_dep_clear(&katom->dep[i]); } } + } else { + dev_dbg(kbdev->dev, + "Deps of atom %p (s:%d) could not be represented\n", + (void *)katom, js); } return ret; @@ -1110,6 +1197,101 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx) kbase_js_set_ctx_priority(kctx, new_priority); } +/** + * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler + * @start_katom: Pointer to the atom to be added. + * Return: 0 if successful or a negative value on failure. + */ +static int js_add_start_rp(struct kbase_jd_atom *const start_katom) +{ + struct kbase_context *const kctx = start_katom->kctx; + struct kbase_jd_renderpass *rp; + struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return -EINVAL; + + if (start_katom->core_req & BASE_JD_REQ_END_RENDERPASS) + return -EINVAL; + + if (start_katom->renderpass_id >= ARRAY_SIZE(kctx->jctx.renderpasses)) + return -EINVAL; + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (rp->state != KBASE_JD_RP_COMPLETE) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "JS add start atom %p of RP %d\n", + (void *)start_katom, start_katom->renderpass_id); + + /* The following members are read when updating the job slot + * ringbuffer/fifo therefore they require additional locking. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + rp->state = KBASE_JD_RP_START; + rp->start_katom = start_katom; + rp->end_katom = NULL; + INIT_LIST_HEAD(&rp->oom_reg_list); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return 0; +} + +/** + * js_add_end_rp() - Add an atom that ends a renderpass to the job scheduler + * @end_katom: Pointer to the atom to be added. + * Return: 0 if successful or a negative value on failure. + */ +static int js_add_end_rp(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_jd_renderpass *rp; + struct kbase_device *const kbdev = kctx->kbdev; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return -EINVAL; + + if (end_katom->core_req & BASE_JD_REQ_START_RENDERPASS) + return -EINVAL; + + if (end_katom->renderpass_id >= ARRAY_SIZE(kctx->jctx.renderpasses)) + return -EINVAL; + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + dev_dbg(kbdev->dev, "JS add end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (rp->state == KBASE_JD_RP_COMPLETE) + return -EINVAL; + + if (rp->end_katom == NULL) { + /* We can't be in a retry state until the fragment job chain + * has completed. + */ + unsigned long flags; + + WARN_ON(rp->state == KBASE_JD_RP_RETRY); + WARN_ON(rp->state == KBASE_JD_RP_RETRY_PEND_OOM); + WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + rp->end_katom = end_katom; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else + WARN_ON(rp->end_katom != end_katom); + + return 0; +} + bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom) { @@ -1117,6 +1299,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbasep_js_kctx_info *js_kctx_info; struct kbase_device *kbdev; struct kbasep_js_device_data *js_devdata; + int err = 0; bool enqueue_required = false; bool timer_sync = false; @@ -1132,6 +1315,17 @@ bool kbasep_js_add_job(struct kbase_context *kctx, mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + if (atom->core_req & BASE_JD_REQ_START_RENDERPASS) + err = js_add_start_rp(atom); + else if (atom->core_req & BASE_JD_REQ_END_RENDERPASS) + err = js_add_end_rp(atom); + + if (err < 0) { + atom->event_code = BASE_JD_EVENT_JOB_INVALID; + atom->status = KBASE_JD_ATOM_STATE_COMPLETED; + goto out_unlock; + } + /* * Begin Runpool transaction */ @@ -1140,6 +1334,8 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Refcount ctx.nr_jobs */ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); ++(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, "Add atom %p to kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); /* Lock for state available during IRQ */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1150,10 +1346,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx, if (!kbase_js_dep_validate(kctx, atom)) { /* Dependencies could not be represented */ --(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, + "Remove atom %p from kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); /* Setting atom status back to queued as it still has unresolved * dependencies */ atom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)atom); /* Undo the count, as the atom will get added again later but * leave the context priority adjusted or boosted, in case if @@ -1221,6 +1421,9 @@ bool kbasep_js_add_job(struct kbase_context *kctx, } } out_unlock: + dev_dbg(kbdev->dev, "Enqueue of kctx %p is %srequired\n", + kctx, enqueue_required ? "" : "not "); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); @@ -1246,6 +1449,9 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, /* De-refcount ctx.nr_jobs */ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); --(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, + "Remove atom %p from kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (--kctx->atoms_count[atom->sched_priority] == 0) @@ -1282,44 +1488,6 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, return attr_state_changed; } -bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - unsigned long flags; - bool result; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - - return result; -} - -struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, - int as_nr) -{ - unsigned long flags; - struct kbase_context *found_kctx = NULL; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - found_kctx = kbdev->as_to_kctx[as_nr]; - - if (found_kctx != NULL) - kbase_ctx_sched_retain_ctx_refcount(found_kctx); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return found_kctx; -} - /** * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after * releasing a context and/or atom @@ -1476,8 +1644,11 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kbase_backend_release_ctx_irq(kbdev, kctx); for (slot = 0; slot < num_slots; slot++) { - if (kbdev->hwaccess.active_kctx[slot] == kctx) + if (kbdev->hwaccess.active_kctx[slot] == kctx) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)kctx, slot); kbdev->hwaccess.active_kctx[slot] = NULL; + } } /* Ctx Attribute handling @@ -1679,6 +1850,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, bool kctx_suspended = false; int as_nr; + dev_dbg(kbdev->dev, "Scheduling kctx %p (s:%d)\n", kctx, js); + js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; @@ -1812,7 +1985,11 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && kbase_backend_use_ctx_sched(kbdev, kctx, js)) { - /* Context already has ASID - mark as active */ + + dev_dbg(kbdev->dev, + "kctx %p already has ASID - mark as active (s:%d)\n", + (void *)kctx, js); + if (kbdev->hwaccess.active_kctx[js] != kctx) { kbdev->hwaccess.active_kctx[js] = kctx; kbase_ctx_flag_clear(kctx, @@ -2043,7 +2220,7 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - bool enqueue_required; + bool enqueue_required, add_required = true; katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); @@ -2057,6 +2234,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, } else { enqueue_required = false; } + if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || (katom->pre_dep && (katom->pre_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { @@ -2064,10 +2242,21 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, int js = katom->slot_nr; struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + dev_dbg(kctx->kbdev->dev, "Add atom %p to X_DEP list (s:%d)\n", + (void *)katom, js); + list_add_tail(&katom->queue, &queue->x_dep_head); katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; - enqueue_required = false; + if (kbase_js_atom_blocked_on_x_dep(katom)) { + enqueue_required = false; + add_required = false; + } } else { + dev_dbg(kctx->kbdev->dev, "Atom %p not added to X_DEP list\n", + (void *)katom); + } + + if (add_required) { /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); @@ -2076,6 +2265,10 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; } + dev_dbg(kctx->kbdev->dev, + "Enqueue of kctx %p is %srequired to submit atom %p\n", + kctx, enqueue_required ? "" : "not ", katom); + return enqueue_required; } @@ -2090,19 +2283,36 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, */ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) { - lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock); + struct kbase_context *const kctx = katom->kctx; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); while (katom) { WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); - if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + if (!kbase_js_atom_blocked_on_x_dep(katom)) { + dev_dbg(kctx->kbdev->dev, + "Del atom %p from X_DEP list in js_move_to_tree\n", + (void *)katom); + list_del(&katom->queue); katom->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; - jsctx_tree_add(katom->kctx, katom); - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; + /* For incremental rendering, an end-of-renderpass atom + * may have had its dependency on start-of-renderpass + * ignored and may therefore already be in the tree. + */ + if (!(katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { + jsctx_tree_add(kctx, katom); + katom->atom_flags |= + KBASE_KATOM_FLAG_JSCTX_IN_TREE; + } } else { + dev_dbg(kctx->kbdev->dev, + "Atom %p blocked on x-dep in js_move_to_tree\n", + (void *)katom); break; } @@ -2145,6 +2355,9 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, /* Remove dependency.*/ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %p\n", + (void *)x_dep); + /* Fail if it had a data dependency. */ if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { x_dep->will_fail_event_code = katom->event_code; @@ -2164,22 +2377,37 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_DEBUG_ASSERT(kctx); kbdev = kctx->kbdev; + dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %p (s:%d)\n", + (void *)kctx, js); js_devdata = &kbdev->js_data; lockdep_assert_held(&kbdev->hwaccess_lock); - if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + (void *)kctx); return NULL; + } if (kbase_pm_is_suspending(kbdev)) return NULL; katom = jsctx_rb_peek(kctx, js); - if (!katom) + if (!katom) { + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + (void *)kctx, js); return NULL; - if (kctx->blocked_js[js][katom->sched_priority]) + } + if (kctx->blocked_js[js][katom->sched_priority]) { + dev_dbg(kbdev->dev, + "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + (void *)kctx, katom->sched_priority, js); return NULL; - if (atomic_read(&katom->blocked)) + } + if (atomic_read(&katom->blocked)) { + dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_pull\n", + (void *)katom); return NULL; + } /* Due to ordering restrictions when unpulling atoms on failure, we do * not allow multiple runs of fail-dep atoms from the same context to be @@ -2192,14 +2420,22 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) return NULL; } - if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { + if (kbase_js_atom_blocked_on_x_dep(katom)) { if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || - katom->x_pre_dep->will_fail_event_code) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { + dev_dbg(kbdev->dev, + "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + (void *)katom->x_pre_dep); return NULL; + } if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && - kbase_backend_nr_atoms_on_slot(kbdev, js)) + kbase_backend_nr_atoms_on_slot(kbdev, js)) { + dev_dbg(kbdev->dev, + "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + (void *)katom, js); return NULL; + } } kbase_ctx_flag_set(kctx, KCTX_PULLED); @@ -2221,9 +2457,214 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) katom->ticks = 0; + dev_dbg(kbdev->dev, "JS: successfully pulled atom %p from kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); + return katom; } +/** + * js_return_of_start_rp() - Handle soft-stop of an atom that starts a + * renderpass + * @start_katom: Pointer to the start-of-renderpass atom that was soft-stopped + * + * This function is called to switch to incremental rendering if the tiler job + * chain at the start of a renderpass has used too much memory. It prevents the + * tiler job being pulled for execution in the job scheduler again until the + * next phase of incremental rendering is complete. + * + * If the end-of-renderpass atom is already in the job scheduler (because a + * previous attempt at tiling used too much memory during the same renderpass) + * then it is unblocked; otherwise, it is run by handing it to the scheduler. + */ +static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) +{ + struct kbase_context *const kctx = start_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + struct kbase_jd_atom *end_katom; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return; + + if (WARN_ON(start_katom->renderpass_id >= + ARRAY_SIZE(kctx->jctx.renderpasses))) + return; + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (WARN_ON(rp->start_katom != start_katom)) + return; + + dev_dbg(kctx->kbdev->dev, + "JS return start atom %p in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, + start_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return; + + /* The tiler job might have been soft-stopped for some reason other + * than running out of memory. + */ + if (rp->state == KBASE_JD_RP_START || rp->state == KBASE_JD_RP_RETRY) { + dev_dbg(kctx->kbdev->dev, + "JS return isn't OOM in state %d of RP %d\n", + (int)rp->state, start_katom->renderpass_id); + return; + } + + dev_dbg(kctx->kbdev->dev, + "JS return confirm OOM in state %d of RP %d\n", + (int)rp->state, start_katom->renderpass_id); + + if (WARN_ON(rp->state != KBASE_JD_RP_PEND_OOM && + rp->state != KBASE_JD_RP_RETRY_PEND_OOM)) + return; + + /* Prevent the tiler job being pulled for execution in the + * job scheduler again. + */ + dev_dbg(kbdev->dev, "Blocking start atom %p\n", + (void *)start_katom); + atomic_inc(&start_katom->blocked); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + rp->state = (rp->state == KBASE_JD_RP_PEND_OOM) ? + KBASE_JD_RP_OOM : KBASE_JD_RP_RETRY_OOM; + + /* Was the fragment job chain submitted to kbase yet? */ + end_katom = rp->end_katom; + if (end_katom) { + dev_dbg(kctx->kbdev->dev, "JS return add end atom %p\n", + (void *)end_katom); + + if (rp->state == KBASE_JD_RP_RETRY_OOM) { + /* Allow the end of the renderpass to be pulled for + * execution again to continue incremental rendering. + */ + dev_dbg(kbdev->dev, "Unblocking end atom %p\n", + (void *)end_katom); + atomic_dec(&end_katom->blocked); + WARN_ON(!(end_katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_TREE)); + WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); + + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, + end_katom->slot_nr); + + /* Expect the fragment job chain to be scheduled without + * further action because this function is called when + * returning an atom to the job scheduler ringbuffer. + */ + end_katom = NULL; + } else { + WARN_ON(end_katom->status != + KBASE_JD_ATOM_STATE_QUEUED && + end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (end_katom) + kbase_jd_dep_clear_locked(end_katom); +} + +/** + * js_return_of_end_rp() - Handle completion of an atom that ends a renderpass + * @end_katom: Pointer to the end-of-renderpass atom that was completed + * + * This function is called to continue incremental rendering if the tiler job + * chain at the start of a renderpass used too much memory. It resets the + * mechanism for detecting excessive memory usage then allows the soft-stopped + * tiler job chain to be pulled for execution again. + * + * The start-of-renderpass atom must already been submitted to kbase. + */ +static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + struct kbase_jd_atom *start_katom; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return; + + if (WARN_ON(end_katom->renderpass_id >= + ARRAY_SIZE(kctx->jctx.renderpasses))) + return; + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return; + + dev_dbg(kctx->kbdev->dev, + "JS return end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (WARN_ON(rp->state != KBASE_JD_RP_OOM && + rp->state != KBASE_JD_RP_RETRY_OOM)) + return; + + /* Reduce the number of mapped pages in the memory regions that + * triggered out-of-memory last time so that we can detect excessive + * memory usage again. + */ + kbase_gpu_vm_lock(kctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + while (!list_empty(&rp->oom_reg_list)) { + struct kbase_va_region *reg = + list_first_entry(&rp->oom_reg_list, + struct kbase_va_region, link); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, + "Reset backing to %zu pages for region %p\n", + reg->threshold_pages, (void *)reg); + + if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) + kbase_mem_shrink(kctx, reg, reg->threshold_pages); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + dev_dbg(kbdev->dev, "Deleting region %p from list\n", + (void *)reg); + list_del_init(®->link); + kbase_va_region_alloc_put(kctx, reg); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_gpu_vm_unlock(kctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + rp->state = KBASE_JD_RP_RETRY; + dev_dbg(kbdev->dev, "Changed state to %d for retry\n", rp->state); + + /* Allow the start of the renderpass to be pulled for execution again + * to begin/continue incremental rendering. + */ + start_katom = rp->start_katom; + if (!WARN_ON(!start_katom)) { + dev_dbg(kbdev->dev, "Unblocking start atom %p\n", + (void *)start_katom); + atomic_dec(&start_katom->blocked); + (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, + start_katom->slot_nr); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} static void js_return_worker(struct work_struct *data) { @@ -2241,7 +2682,11 @@ static void js_return_worker(struct work_struct *data) unsigned long flags; base_jd_core_req core_req = katom->core_req; - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom); + dev_dbg(kbdev->dev, "%s for atom %p with event code 0x%x\n", + __func__, (void *)katom, katom->event_code); + + if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom); kbase_backend_complete_wq(kbdev, katom); @@ -2253,7 +2698,8 @@ static void js_return_worker(struct work_struct *data) atomic_dec(&kctx->atoms_pulled); atomic_dec(&kctx->atoms_pulled_slot[js]); - atomic_dec(&katom->blocked); + if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) + atomic_dec(&katom->blocked); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -2278,7 +2724,17 @@ static void js_return_worker(struct work_struct *data) } if (!atomic_read(&kctx->atoms_pulled)) { + dev_dbg(kbdev->dev, + "No atoms currently pulled from context %p\n", + (void *)kctx); + if (!kctx->slots_pullable) { + dev_dbg(kbdev->dev, + "Context %p %s counted as runnable\n", + (void *)kctx, + kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? + "is" : "isn't"); + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); atomic_dec(&kbdev->js_data.nr_contexts_runnable); @@ -2309,6 +2765,11 @@ static void js_return_worker(struct work_struct *data) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (context_idle) { + dev_dbg(kbdev->dev, + "Context %p %s counted as active\n", + (void *)kctx, + kbase_ctx_flag(kctx, KCTX_ACTIVE) ? + "is" : "isn't"); WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); kbase_pm_context_idle(kbdev); @@ -2320,7 +2781,21 @@ static void js_return_worker(struct work_struct *data) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); + if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) { + mutex_lock(&kctx->jctx.lock); + js_return_of_start_rp(katom); + mutex_unlock(&kctx->jctx.lock); + } else if (katom->event_code == BASE_JD_EVENT_END_RP_DONE) { + mutex_lock(&kctx->jctx.lock); + js_return_of_end_rp(katom); + mutex_unlock(&kctx->jctx.lock); + } + katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; + dev_dbg(kbdev->dev, "JS: retained state %s finished", + kbasep_js_has_atom_finished(&retained_state) ? + "has" : "hasn't"); + WARN_ON(kbasep_js_has_atom_finished(&retained_state)); kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, @@ -2329,10 +2804,16 @@ static void js_return_worker(struct work_struct *data) kbase_js_sched_all(kbdev); kbase_backend_complete_wq_post_sched(kbdev, core_req); + + dev_dbg(kbdev->dev, "Leaving %s for atom %p\n", + __func__, (void *)katom); } void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { + dev_dbg(kctx->kbdev->dev, "Unpulling atom %p in kctx %p\n", + (void *)katom, (void *)kctx); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); jsctx_rb_unpull(kctx, katom); @@ -2348,6 +2829,157 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } +/** + * js_complete_start_rp() - Handle completion of atom that starts a renderpass + * @kctx: Context pointer + * @start_katom: Pointer to the atom that completed + * + * Put any references to virtual memory regions that might have been added by + * kbase_job_slot_softstop_start_rp() because the tiler job chain completed + * despite any pending soft-stop request. + * + * If the atom that just completed was soft-stopped during a previous attempt to + * run it then there should be a blocked end-of-renderpass atom waiting for it, + * which we must unblock to process the output of the tiler job chain. + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool js_complete_start_rp(struct kbase_context *kctx, + struct kbase_jd_atom *const start_katom) +{ + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + bool timer_sync = false; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return false; + + if (WARN_ON(start_katom->renderpass_id >= + ARRAY_SIZE(kctx->jctx.renderpasses))) + return false; + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (WARN_ON(rp->start_katom != start_katom)) + return false; + + dev_dbg(kctx->kbdev->dev, + "Start atom %p is done in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, + start_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return false; + + if (rp->state == KBASE_JD_RP_PEND_OOM || + rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { + unsigned long flags; + + dev_dbg(kctx->kbdev->dev, + "Start atom %p completed before soft-stop\n", + (void *)start_katom); + + kbase_gpu_vm_lock(kctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + while (!list_empty(&rp->oom_reg_list)) { + struct kbase_va_region *reg = + list_first_entry(&rp->oom_reg_list, + struct kbase_va_region, link); + + WARN_ON(reg->flags & KBASE_REG_VA_FREED); + dev_dbg(kctx->kbdev->dev, "Deleting region %p from list\n", + (void *)reg); + list_del_init(®->link); + kbase_va_region_alloc_put(kctx, reg); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_gpu_vm_unlock(kctx); + } else { + dev_dbg(kctx->kbdev->dev, + "Start atom %p did not exceed memory threshold\n", + (void *)start_katom); + + WARN_ON(rp->state != KBASE_JD_RP_START && + rp->state != KBASE_JD_RP_RETRY); + } + + if (rp->state == KBASE_JD_RP_RETRY || + rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { + struct kbase_jd_atom *const end_katom = rp->end_katom; + + if (!WARN_ON(!end_katom)) { + unsigned long flags; + + /* Allow the end of the renderpass to be pulled for + * execution again to continue incremental rendering. + */ + dev_dbg(kbdev->dev, "Unblocking end atom %p!\n", + (void *)end_katom); + atomic_dec(&end_katom->blocked); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + timer_sync = kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, end_katom->slot_nr); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + } + + return timer_sync; +} + +/** + * js_complete_end_rp() - Handle final completion of atom that ends a renderpass + * @kctx: Context pointer + * @end_katom: Pointer to the atom that completed for the last time + * + * This function must only be called if the renderpass actually completed + * without the tiler job chain at the start using too much memory; otherwise + * completion of the end-of-renderpass atom is handled similarly to a soft-stop. + */ +static void js_complete_end_rp(struct kbase_context *kctx, + struct kbase_jd_atom *const end_katom) +{ + struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return; + + if (WARN_ON(end_katom->renderpass_id >= + ARRAY_SIZE(kctx->jctx.renderpasses))) + return; + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return; + + dev_dbg(kbdev->dev, "End atom %p is done in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || + WARN_ON(rp->state == KBASE_JD_RP_OOM) || + WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM)) + return; + + /* Rendering completed without running out of memory. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON(!list_empty(&rp->oom_reg_list)); + rp->state = KBASE_JD_RP_COMPLETE; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, "Renderpass %d is complete\n", + end_katom->renderpass_id); +} + bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -2363,6 +2995,16 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, kbdev = kctx->kbdev; atom_slot = katom->slot_nr; + dev_dbg(kbdev->dev, "%s for atom %p (s:%d)\n", + __func__, (void *)katom, atom_slot); + + /* Update the incremental rendering state machine. + */ + if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) + timer_sync |= js_complete_start_rp(kctx, katom); + else if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) + js_complete_end_rp(kctx, katom); + js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; @@ -2372,6 +3014,9 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { + dev_dbg(kbdev->dev, "Atom %p is in runnable_tree\n", + (void *)katom); + context_idle = !atomic_dec_return(&kctx->atoms_pulled); atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); kctx->atoms_pulled_slot_pri[atom_slot][prio]--; @@ -2388,6 +3033,10 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * all atoms have now been processed, then unblock the slot */ if (!kctx->atoms_pulled_slot_pri[atom_slot][prio] && kctx->blocked_js[atom_slot][prio]) { + dev_dbg(kbdev->dev, + "kctx %p is no longer blocked from submitting on slot %d at priority %d\n", + (void *)kctx, atom_slot, prio); + kctx->blocked_js[atom_slot][prio] = false; if (kbase_js_ctx_pullable(kctx, atom_slot, true)) timer_sync |= @@ -2438,17 +3087,79 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, /* Mark context as inactive. The pm reference will be dropped later in * jd_done_worker(). */ - if (context_idle) + if (context_idle) { + dev_dbg(kbdev->dev, "kctx %p is no longer active\n", + (void *)kctx); kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (timer_sync) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&js_devdata->runpool_mutex); + dev_dbg(kbdev->dev, "Leaving %s\n", __func__); return context_idle; } +/** + * js_end_rp_is_complete() - Check whether an atom that ends a renderpass has + * completed for the last time. + * + * @end_katom: Pointer to the atom that completed on the hardware. + * + * An atom that ends a renderpass may be run on the hardware several times + * before notifying userspace or allowing dependent atoms to be executed. + * + * This function is used to decide whether or not to allow end-of-renderpass + * atom completion. It only returns false if the atom at the start of the + * renderpass was soft-stopped because it used too much memory during the most + * recent attempt at tiling. + * + * Return: True if the atom completed for the last time. + */ +static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return true; + + if (WARN_ON(end_katom->renderpass_id >= + ARRAY_SIZE(kctx->jctx.renderpasses))) + return true; + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return true; + + dev_dbg(kbdev->dev, + "JS complete end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, + end_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return true; + + /* Failure of end-of-renderpass atoms must not return to the + * start of the renderpass. + */ + if (end_katom->event_code != BASE_JD_EVENT_DONE) + return true; + + if (rp->state != KBASE_JD_RP_OOM && + rp->state != KBASE_JD_RP_RETRY_OOM) + return true; + + dev_dbg(kbdev->dev, "Suppressing end atom completion\n"); + return false; +} + struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) { @@ -2457,14 +3168,23 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, struct kbase_jd_atom *x_dep = katom->x_post_dep; kbdev = kctx->kbdev; - + dev_dbg(kbdev->dev, "Atom %p complete in kctx %p (post-dep %p)\n", + (void *)katom, (void *)kctx, (void *)x_dep); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + if ((katom->core_req & BASE_JD_REQ_END_RENDERPASS) && + !js_end_rp_is_complete(katom)) { + katom->event_code = BASE_JD_EVENT_END_RP_DONE; + kbase_js_unpull(kctx, katom); + return NULL; + } + if (katom->will_fail_event_code) katom->event_code = katom->will_fail_event_code; katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; + dev_dbg(kbdev->dev, "Atom %p status to HW completed\n", (void *)katom); if (katom->event_code != BASE_JD_EVENT_DONE) { kbase_js_evict_deps(kctx, katom, katom->slot_nr, @@ -2478,24 +3198,103 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, /* Unblock cross dependency if present */ if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || - !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && - (x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && + (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)) { bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, false); x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + + dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %p\n", + (void *)x_dep); + kbase_js_move_to_tree(x_dep); + if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, false)) kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, x_dep->slot_nr); - if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) + if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { + dev_dbg(kbdev->dev, "Atom %p is in runnable tree\n", + (void *)x_dep); return x_dep; + } + } else { + dev_dbg(kbdev->dev, + "No cross-slot dep to unblock for atom %p\n", + (void *)katom); } return NULL; } +/** + * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot + * dependency + * @katom: Pointer to an atom in the slot ringbuffer + * + * A cross-slot dependency is ignored if necessary to unblock incremental + * rendering. If the atom at the start of a renderpass used too much memory + * and was soft-stopped then the atom at the end of a renderpass is submitted + * to hardware regardless of its dependency on the start-of-renderpass atom. + * This can happen multiple times for the same pair of atoms. + * + * Return: true to block the atom or false to allow it to be submitted to + * hardware + */ +bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!(katom->atom_flags & + KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + dev_dbg(kbdev->dev, "Atom %p is not blocked on a cross-slot dependency", + (void *)katom); + return false; + } + + if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) { + dev_dbg(kbdev->dev, "Atom %p is blocked on a cross-slot dependency", + (void *)katom); + return true; + } + + if (WARN_ON(katom->renderpass_id >= + ARRAY_SIZE(kctx->jctx.renderpasses))) + return true; + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + /* We can read a subset of renderpass state without holding + * higher-level locks (but not end_katom, for example). + */ + + WARN_ON(rp->state == KBASE_JD_RP_COMPLETE); + + dev_dbg(kbdev->dev, "End atom has cross-slot dep in state %d\n", + (int)rp->state); + + if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM) + return true; + + /* Tiler ran out of memory so allow the fragment job chain to run + * if it only depends on the tiler job chain. + */ + if (katom->x_pre_dep != rp->start_katom) { + dev_dbg(kbdev->dev, "Dependency is on %p not start atom %p\n", + (void *)katom->x_pre_dep, (void *)rp->start_katom); + return true; + } + + dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %p\n", + (void *)katom->x_pre_dep); + + return false; +} + void kbase_js_sched(struct kbase_device *kbdev, int js_mask) { struct kbasep_js_device_data *js_devdata; @@ -2504,6 +3303,9 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; int js; + dev_dbg(kbdev->dev, "%s kbdev %p mask 0x%x\n", + __func__, (void *)kbdev, (unsigned int)js_mask); + js_devdata = &kbdev->js_data; down(&js_devdata->schedule_sem); @@ -2526,15 +3328,24 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) if (!kctx) { js_mask &= ~(1 << js); - break; /* No contexts on pullable list */ + dev_dbg(kbdev->dev, + "No kctx on pullable list (s:%d)\n", + js); + break; } if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { context_idle = true; + dev_dbg(kbdev->dev, + "kctx %p is not active (s:%d)\n", + (void *)kctx, js); + if (kbase_pm_context_active_handle_suspend( kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + dev_dbg(kbdev->dev, + "Suspend pending (s:%d)\n", js); /* Suspend pending - return context to * queue and stop scheduling */ mutex_lock( @@ -2554,7 +3365,11 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) if (!kbase_js_use_ctx(kbdev, kctx, js)) { mutex_lock( &kctx->jctx.sched_info.ctx.jsctx_mutex); - /* Context can not be used at this time */ + + dev_dbg(kbdev->dev, + "kctx %p cannot be used at this time\n", + kctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbase_js_ctx_pullable(kctx, js, false) || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) @@ -2585,10 +3400,18 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) kbase_ctx_flag_clear(kctx, KCTX_PULLED); if (!kbase_jm_kick(kbdev, 1 << js)) - /* No more jobs can be submitted on this slot */ + dev_dbg(kbdev->dev, + "No more jobs can be submitted (s:%d)\n", + js); js_mask &= ~(1 << js); if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { - bool pullable = kbase_js_ctx_pullable(kctx, js, + bool pullable; + + dev_dbg(kbdev->dev, + "No atoms pulled from kctx %p (s:%d)\n", + (void *)kctx, js); + + pullable = kbase_js_ctx_pullable(kctx, js, true); /* Failed to pull jobs - push to head of list. @@ -2645,7 +3468,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) break; /* Could not run atoms on this slot */ } - /* Push to back of list */ + dev_dbg(kbdev->dev, "Push kctx %p to back of list\n", + (void *)kctx); if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= kbase_js_ctx_list_add_pullable_nolock( @@ -2665,8 +3489,11 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == last_active[js] && - ctx_waiting[js]) + ctx_waiting[js]) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)last_active[js], js); kbdev->hwaccess.active_kctx[js] = NULL; + } } mutex_unlock(&js_devdata->queue_mutex); @@ -2867,3 +3694,81 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } + + +/* Hold the mmu_hw_mutex and hwaccess_lock for this */ +bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + bool result = false; + int as_nr; + + if (WARN_ON(kbdev == NULL)) + return result; + + if (WARN_ON(kctx == NULL)) + return result; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + as_nr = kctx->as_nr; + if (atomic_read(&kctx->refcount) > 0) { + KBASE_DEBUG_ASSERT(as_nr >= 0); + + kbase_ctx_sched_retain_ctx_refcount(kctx); + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx, + NULL, 0u, atomic_read(&kctx->refcount)); + result = true; + } + + return result; +} + + +bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + unsigned long flags; + bool result = false; + + if (WARN_ON(kbdev == NULL)) + return result; + + if (WARN_ON(kctx == NULL)) + return result; + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + return result; +} + +struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, + int as_nr) +{ + unsigned long flags; + struct kbase_context *found_kctx = NULL; + + if (WARN_ON(kbdev == NULL)) + return NULL; + + if (WARN_ON(as_nr < 0)) + return NULL; + + if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) + return NULL; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + found_kctx = kbdev->as_to_kctx[as_nr]; + + if (found_kctx != NULL) + kbase_ctx_sched_retain_ctx_refcount(found_kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return found_kctx; +} diff --git a/mali_kbase/mali_kbase_js.h b/mali_kbase/mali_kbase_js.h index e4bd4a2..51ab023 100644 --- a/mali_kbase/mali_kbase_js.h +++ b/mali_kbase/mali_kbase_js.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,737 +34,85 @@ #include "context/mali_kbase_context.h" #include "mali_kbase_defs.h" #include "mali_kbase_debug.h" +#include <mali_kbase_ctx_sched.h> -#include "mali_kbase_js_ctx_attr.h" +#include <jm/mali_kbase_jm_js.h> /** - * @addtogroup base_api - * @{ - */ - -/** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup kbase_js Job Scheduler Internal APIs - * @{ - * - * These APIs are Internal to KBase. - */ - -/** - * @brief Initialize the Job Scheduler - * - * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero - * initialized before passing to the kbasep_js_devdata_init() function. This is - * to give efficient error path code. - */ -int kbasep_js_devdata_init(struct kbase_device * const kbdev); - -/** - * @brief Halt the Job Scheduler. - * - * It is safe to call this on \a kbdev even if it the kbasep_js_device_data - * sub-structure was never initialized/failed initialization, to give efficient - * error-path code. - * - * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must - * be zero initialized before passing to the kbasep_js_devdata_init() - * function. This is to give efficient error path code. - * - * It is a Programming Error to call this whilst there are still kbase_context - * structures registered with this scheduler. - * - */ -void kbasep_js_devdata_halt(struct kbase_device *kbdev); - -/** - * @brief Terminate the Job Scheduler - * - * It is safe to call this on \a kbdev even if it the kbasep_js_device_data - * sub-structure was never initialized/failed initialization, to give efficient - * error-path code. - * - * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must - * be zero initialized before passing to the kbasep_js_devdata_init() - * function. This is to give efficient error path code. - * - * It is a Programming Error to call this whilst there are still kbase_context - * structures registered with this scheduler. - */ -void kbasep_js_devdata_term(struct kbase_device *kbdev); - -/** - * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler. - * - * This effectively registers a struct kbase_context with a Job Scheduler. - * - * It does not register any jobs owned by the struct kbase_context with the scheduler. - * Those must be separately registered by kbasep_js_add_job(). - * - * The struct kbase_context must be zero intitialized before passing to the - * kbase_js_init() function. This is to give efficient error path code. - */ -int kbasep_js_kctx_init(struct kbase_context * const kctx); - -/** - * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler - * - * This effectively de-registers a struct kbase_context from its Job Scheduler - * - * It is safe to call this on a struct kbase_context that has never had or failed - * initialization of its jctx.sched_info member, to give efficient error-path - * code. - * - * For this to work, the struct kbase_context must be zero intitialized before passing - * to the kbase_js_init() function. - * - * It is a Programming Error to call this whilst there are still jobs - * registered with this context. - */ -void kbasep_js_kctx_term(struct kbase_context *kctx); - -/** - * @brief Add a job chain to the Job Scheduler, and take necessary actions to - * schedule the context/run the job. - * - * This atomically does the following: - * - Update the numbers of jobs information - * - Add the job to the run pool if necessary (part of init_job) - * - * Once this is done, then an appropriate action is taken: - * - If the ctx is scheduled, it attempts to start the next job (which might be - * this added job) - * - Otherwise, and if this is the first job on the context, it enqueues it on - * the Policy Queue - * - * The Policy's Queue can be updated by this in the following ways: - * - In the above case that this is the first job on the context - * - If the context is high priority and the context is not scheduled, then it - * could cause the Policy to schedule out a low-priority context, allowing - * this context to be scheduled in. - * - * If the context is already scheduled on the RunPool, then adding a job to it - * is guarenteed not to update the Policy Queue. And so, the caller is - * guarenteed to not need to try scheduling a context from the Run Pool - it - * can safely assert that the result is false. - * - * It is a programming error to have more than U32_MAX jobs in flight at a time. - * - * The following locking conditions are made on the caller: - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold hwaccess_lock (as this will be obtained internally) - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). - * - * @return true indicates that the Policy Queue was updated, and so the - * caller will need to try scheduling a context onto the Run Pool. - * @return false indicates that no updates were made to the Policy Queue, - * so no further action is required from the caller. This is \b always returned - * when the context is currently scheduled. - */ -bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); - -/** - * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'. - * - * Completely removing a job requires several calls: - * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of - * the atom - * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler - * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the - * remaining state held as part of the job having been run. - * - * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions. - * - * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions. - * - * It is a programming error to call this when: - * - \a atom is not a job belonging to kctx. - * - \a atom has already been removed from the Job Scheduler. - * - \a atom is still in the runpool - * - * Do not use this for removing jobs being killed by kbase_jd_cancel() - use - * kbasep_js_remove_cancelled_job() instead. - * - * The following locking conditions are made on the caller: - * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - */ -void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom); - -/** - * @brief Completely remove a job chain from the Job Scheduler, in the case - * where the job chain was cancelled. + * kbasep_js_runpool_retain_ctx_nolock - Refcount a context as being busy, + * preventing it from being scheduled + * out. * - * This is a variant of kbasep_js_remove_job() that takes care of removing all - * of the retained state too. This is generally useful for cancelled atoms, - * which need not be handled in an optimal way. + * This function can safely be called from IRQ context. * - * It is a programming error to call this when: - * - \a atom is not a job belonging to kctx. - * - \a atom has already been removed from the Job Scheduler. - * - \a atom is still in the runpool: - * - it is not being killed with kbasep_jd_cancel() - * - * The following locking conditions are made on the caller: - * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold the hwaccess_lock, (as this will be obtained - * internally) - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be - * obtained internally) + * The following locks must be held by the caller: + * * mmu_hw_mutex, hwaccess_lock * - * @return true indicates that ctx attributes have changed and the caller - * should call kbase_js_sched_all() to try to run more jobs - * @return false otherwise + * Return: value true if the retain succeeded, and the context will not be + * scheduled out, otherwise false if the retain failed (because the context + * is being/has been scheduled out). */ -bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_jd_atom *katom); +bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx); /** - * @brief Refcount a context as being busy, preventing it from being scheduled - * out. + * kbasep_js_runpool_retain_ctx - Refcount a context as being busy, preventing + * it from being scheduled out. * - * @note This function can safely be called from IRQ context. + * This function can safely be called from IRQ context. * * The following locking conditions are made on the caller: - * - it must \em not hold mmu_hw_mutex and hwaccess_lock, because they will be + * * it must not hold mmu_hw_mutex and hwaccess_lock, because they will be * used internally. * - * @return value != false if the retain succeeded, and the context will not be scheduled out. - * @return false if the retain failed (because the context is being/has been scheduled out). + * Return: value true if the retain succeeded, and the context will not be + * scheduled out, otherwise false if the retain failed (because the context + * is being/has been scheduled out). */ -bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); +bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); /** - * @brief Refcount a context as being busy, preventing it from being scheduled - * out. - * - * @note This function can safely be called from IRQ context. - * - * The following locks must be held by the caller: - * - mmu_hw_mutex, hwaccess_lock - * - * @return value != false if the retain succeeded, and the context will not be scheduled out. - * @return false if the retain failed (because the context is being/has been scheduled out). - */ -bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Lookup a context in the Run Pool based upon its current address space - * and ensure that is stays scheduled in. + * kbasep_js_runpool_lookup_ctx - Lookup a context in the Run Pool based upon + * its current address space and ensure that + * is stays scheduled in. * * The context is refcounted as being busy to prevent it from scheduling * out. It must be released with kbasep_js_runpool_release_ctx() when it is no * longer required to stay scheduled in. * - * @note This function can safely be called from IRQ context. + * This function can safely be called from IRQ context. * * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. + * * it must not hold the hwaccess_lock, because it will be used internally. * If the hwaccess_lock is already held, then the caller should use * kbasep_js_runpool_lookup_ctx_nolock() instead. * - * @return a valid struct kbase_context on success, which has been refcounted as being busy. - * @return NULL on failure, indicating that no context was found in \a as_nr - */ -struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr); - -/** - * @brief Handling the requeuing/killing of a context that was evicted from the - * policy queue or runpool. - * - * This should be used whenever handing off a context that has been evicted - * from the policy queue or the runpool: - * - If the context is not dying and has jobs, it gets re-added to the policy - * queue - * - Otherwise, it is not added - * - * In addition, if the context is dying the jobs are killed asynchronously. - * - * In all cases, the Power Manager active reference is released - * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. \a - * has_pm_ref must be set to false whenever the context was not previously in - * the runpool and does not hold a Power Manager active refcount. Note that - * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an - * active refcount even though they weren't in the runpool. - * - * The following locking conditions are made on the caller: - * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be - * obtained internally) - */ -void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref); - -/** - * @brief Release a refcount of a context being busy, allowing it to be - * scheduled out. - * - * When the refcount reaches zero and the context \em might be scheduled out - * (depending on whether the Scheudling Policy has deemed it so, or if it has run - * out of jobs). - * - * If the context does get scheduled out, then The following actions will be - * taken as part of deschduling a context: - * - For the context being descheduled: - * - If the context is in the processing of dying (all the jobs are being - * removed from it), then descheduling also kills off any jobs remaining in the - * context. - * - If the context is not dying, and any jobs remain after descheduling the - * context then it is re-enqueued to the Policy's Queue. - * - Otherwise, the context is still known to the scheduler, but remains absent - * from the Policy Queue until a job is next added to it. - * - In all descheduling cases, the Power Manager active reference (obtained - * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()). - * - * Whilst the context is being descheduled, this also handles actions that - * cause more atoms to be run: - * - Attempt submitting atoms when the Context Attributes on the Runpool have - * changed. This is because the context being scheduled out could mean that - * there are more opportunities to run atoms. - * - Attempt submitting to a slot that was previously blocked due to affinity - * restrictions. This is usually only necessary when releasing a context - * happens as part of completing a previous job, but is harmless nonetheless. - * - Attempt scheduling in a new context (if one is available), and if necessary, - * running a job from that new context. - * - * Unlike retaining a context in the runpool, this function \b cannot be called - * from IRQ context. - * - * It is a programming error to call this on a \a kctx that is not currently - * scheduled, or that already has a zero refcount. - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) - * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be - * obtained internally) - * - */ -void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional - * actions from completing an atom. - * - * This is usually called as part of completing an atom and releasing the - * refcount on the context held by the atom. - * - * Therefore, the extra actions carried out are part of handling actions queued - * on a completed atom, namely: - * - Releasing the atom's context attributes - * - Retrying the submission on a particular slot, because we couldn't submit - * on that slot from an IRQ handler. - * - * The locking conditions of this function are the same as those for - * kbasep_js_runpool_release_ctx() - */ -void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); - -/** - * @brief Variant of kbase_js_runpool_release_ctx() that assumes that - * kbasep_js_device_data::runpool_mutex and - * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not - * attempt to schedule new contexts. - */ -void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx); - -/** - * @brief Schedule in a privileged context - * - * This schedules a context in regardless of the context priority. - * If the runpool is full, a context will be forced out of the runpool and the function will wait - * for the new context to be scheduled in. - * The context will be kept scheduled in (and the corresponding address space reserved) until - * kbasep_js_release_privileged_ctx is called). - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) - * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will - * be used internally. - * - */ -void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Release a privileged context, allowing it to be scheduled out. - * - * See kbasep_js_runpool_release_ctx for potential side effects. - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) - * + * Return: a valid struct kbase_context on success, which has been refcounted + * as being busy or return NULL on failure, indicating that no context was found + * in as_nr. */ -void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); +struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, + int as_nr); /** - * @brief Try to submit the next job on each slot - * - * The following locks may be used: - * - kbasep_js_device_data::runpool_mutex - * - hwaccess_lock - */ -void kbase_js_try_run_jobs(struct kbase_device *kbdev); - -/** - * @brief Suspend the job scheduler during a Power Management Suspend event. - * - * Causes all contexts to be removed from the runpool, and prevents any - * contexts from (re)entering the runpool. - * - * This does not handle suspending the one privileged context: the caller must - * instead do this by by suspending the GPU HW Counter Instrumentation. - * - * This will eventually cause all Power Management active references held by - * contexts on the runpool to be released, without running any more atoms. - * - * The caller must then wait for all Power Mangement active refcount to become - * zero before completing the suspend. - * - * The emptying mechanism may take some time to complete, since it can wait for - * jobs to complete naturally instead of forcing them to end quickly. However, - * this is bounded by the Job Scheduler's Job Timeouts. Hence, this - * function is guaranteed to complete in a finite time. - */ -void kbasep_js_suspend(struct kbase_device *kbdev); - -/** - * @brief Resume the Job Scheduler after a Power Management Resume event. - * - * This restores the actions from kbasep_js_suspend(): - * - Schedules contexts back into the runpool - * - Resumes running atoms on the GPU - */ -void kbasep_js_resume(struct kbase_device *kbdev); - -/** - * @brief Submit an atom to the job scheduler. - * - * The atom is enqueued on the context's ringbuffer. The caller must have - * ensured that all dependencies can be represented in the ringbuffer. - * - * Caller must hold jctx->lock - * - * @param[in] kctx Context pointer - * @param[in] atom Pointer to the atom to submit - * - * @return Whether the context requires to be enqueued. */ -bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom); - -/** - * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. - * @kctx: Context Pointer - * @prio: Priority (specifies the queue together with js). - * @js: Job slot (specifies the queue together with prio). - * - * Pushes all possible atoms from the linked list to the ringbuffer. - * Number of atoms are limited to free space in the ringbuffer and - * number of available atoms in the linked list. - * - */ -void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); -/** - * @brief Pull an atom from a context in the job scheduler for execution. - * - * The atom will not be removed from the ringbuffer at this stage. - * - * The HW access lock must be held when calling this function. - * - * @param[in] kctx Context to pull from - * @param[in] js Job slot to pull from - * @return Pointer to an atom, or NULL if there are no atoms for this - * slot that can be currently run. - */ -struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); - -/** - * @brief Return an atom to the job scheduler ringbuffer. - * - * An atom is 'unpulled' if execution is stopped but intended to be returned to - * later. The most common reason for this is that the atom has been - * soft-stopped. - * - * Note that if multiple atoms are to be 'unpulled', they must be returned in - * the reverse order to which they were originally pulled. It is a programming - * error to return atoms in any other order. - * - * The HW access lock must be held when calling this function. - * - * @param[in] kctx Context pointer - * @param[in] atom Pointer to the atom to unpull - */ -void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); - -/** - * @brief Complete an atom from jd_done_worker(), removing it from the job - * scheduler ringbuffer. - * - * If the atom failed then all dependee atoms marked for failure propagation - * will also fail. - * - * @param[in] kctx Context pointer - * @param[in] katom Pointer to the atom to complete - * @return true if the context is now idle (no jobs pulled) - * false otherwise - */ -bool kbase_js_complete_atom_wq(struct kbase_context *kctx, - struct kbase_jd_atom *katom); - -/** - * @brief Complete an atom. - * - * Most of the work required to complete an atom will be performed by - * jd_done_worker(). - * - * The HW access lock must be held when calling this function. - * - * @param[in] katom Pointer to the atom to complete - * @param[in] end_timestamp The time that the atom completed (may be NULL) - * - * Return: Atom that has now been unblocked and can now be run, or NULL if none - */ -struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, - ktime_t *end_timestamp); - - -/** - * @brief Submit atoms from all available contexts. - * - * This will attempt to submit as many jobs as possible to the provided job - * slots. It will exit when either all job slots are full, or all contexts have - * been used. - * - * @param[in] kbdev Device pointer - * @param[in] js_mask Mask of job slots to submit to - */ -void kbase_js_sched(struct kbase_device *kbdev, int js_mask); - -/** - * kbase_jd_zap_context - Attempt to deschedule a context that is being - * destroyed - * @kctx: Context pointer - * - * This will attempt to remove a context from any internal job scheduler queues - * and perform any other actions to ensure a context will not be submitted - * from. - * - * If the context is currently scheduled, then the caller must wait for all - * pending jobs to complete before taking any further action. - */ -void kbase_js_zap_context(struct kbase_context *kctx); - -/** - * @brief Validate an atom - * - * This will determine whether the atom can be scheduled onto the GPU. Atoms - * with invalid combinations of core requirements will be rejected. - * - * @param[in] kbdev Device pointer - * @param[in] katom Atom to validate - * @return true if atom is valid - * false otherwise - */ -bool kbase_js_is_atom_valid(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - -/** - * kbase_js_set_timeouts - update all JS timeouts with user specified data - * @kbdev: Device pointer - * - * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is - * set to a positive number then that becomes the new value used, if a timeout - * is negative then the default is set. - */ -void kbase_js_set_timeouts(struct kbase_device *kbdev); - -/** - * kbase_js_set_ctx_priority - set the context priority - * @kctx: Context pointer - * @new_priority: New priority value for the Context - * - * The context priority is set to a new value and it is moved to the - * pullable/unpullable list as per the new priority. - */ -void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); - - -/** - * kbase_js_update_ctx_priority - update the context priority - * @kctx: Context pointer - * - * The context priority gets updated as per the priority of atoms currently in - * use for that context, but only if system priority mode for context scheduling - * is being used. - */ -void kbase_js_update_ctx_priority(struct kbase_context *kctx); - -/* - * Helpers follow - */ - -/** - * @brief Check that a context is allowed to submit jobs on this policy - * - * The purpose of this abstraction is to hide the underlying data size, and wrap up - * the long repeated line of code. - * - * As with any bool, never test the return value with true. - * - * The caller must hold hwaccess_lock. - */ -static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -{ - u16 test_bit; - - /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - test_bit = (u16) (1u << kctx->as_nr); - - return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); -} - -/** - * @brief Allow a context to submit jobs on this policy - * - * The purpose of this abstraction is to hide the underlying data size, and wrap up - * the long repeated line of code. - * - * The caller must hold hwaccess_lock. - */ -static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -{ - u16 set_bit; - - /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - set_bit = (u16) (1u << kctx->as_nr); - - dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", - kctx, kctx->as_nr); - - js_devdata->runpool_irq.submit_allowed |= set_bit; -} - -/** - * @brief Prevent a context from submitting more jobs on this policy - * - * The purpose of this abstraction is to hide the underlying data size, and wrap up - * the long repeated line of code. - * - * The caller must hold hwaccess_lock. - */ -static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -{ - u16 clear_bit; - u16 clear_mask; - - /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - clear_bit = (u16) (1u << kctx->as_nr); - clear_mask = ~clear_bit; - - dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", - kctx, kctx->as_nr); - - js_devdata->runpool_irq.submit_allowed &= clear_mask; -} - -/** - * Create an initial 'invalid' atom retained state, that requires no - * atom-related work to be done on releasing with - * kbasep_js_runpool_release_ctx_and_katom_retained_state() - */ -static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state) -{ - retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; - retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; -} - -/** - * Copy atom state that can be made available after jd_done_nolock() is called - * on that atom. - */ -static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom) -{ - retained_state->event_code = katom->event_code; - retained_state->core_req = katom->core_req; - retained_state->sched_priority = katom->sched_priority; - retained_state->device_nr = katom->device_nr; -} - -/** - * @brief Determine whether an atom has finished (given its retained state), - * and so should be given back to userspace/removed from the system. - * - * Reasons for an atom not finishing include: - * - Being soft-stopped (and so, the atom should be resubmitted sometime later) - * - * @param[in] katom_retained_state the retained state of the atom to check - * @return false if the atom has not finished - * @return !=false if the atom has finished - */ -static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state) -{ - return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT); -} - -/** - * @brief Determine whether a struct kbasep_js_atom_retained_state is valid - * - * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the - * code should just ignore it. - * - * @param[in] katom_retained_state the atom's retained state to check - * @return false if the retained state is invalid, and can be ignored - * @return !=false if the retained state is valid - */ -static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state) -{ - return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); -} - -/** - * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the + * kbasep_js_runpool_lookup_ctx_noretain - Variant of + * kbasep_js_runpool_lookup_ctx() that can be used when the * context is guaranteed to be already previously retained. * - * It is a programming error to supply the \a as_nr of a context that has not + * It is a programming error to supply the as_nr of a context that has not * been previously retained/has a busy refcount of zero. The only exception is - * when there is no ctx in \a as_nr (NULL returned). + * when there is no ctx in as_nr (NULL returned). * * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. + * * it must not hold the hwaccess_lock, because it will be used internally. * - * @return a valid struct kbase_context on success, with a refcount that is guaranteed - * to be non-zero and unmodified by this function. - * @return NULL on failure, indicating that no context was found in \a as_nr + * Return: a valid struct kbase_context on success, with a refcount that is + * guaranteed to be non-zero and unmodified by this function or + * return NULL on failure, indicating that no context was found in as_nr. */ -static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr) +static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain( + struct kbase_device *kbdev, int as_nr) { struct kbase_context *found_kctx; @@ -778,136 +126,4 @@ static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct return found_kctx; } -/* - * The following locking conditions are made on the caller: - * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. - * - The caller must hold the kbasep_js_device_data::runpool_mutex - */ -static inline void kbase_js_runpool_inc_context_count( - struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&js_devdata->runpool_mutex); - - /* Track total contexts */ - KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); - ++(js_devdata->nr_all_contexts_running); - - if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - /* Track contexts that can submit jobs */ - KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < - S8_MAX); - ++(js_devdata->nr_user_contexts_running); - } -} - -/* - * The following locking conditions are made on the caller: - * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. - * - The caller must hold the kbasep_js_device_data::runpool_mutex - */ -static inline void kbase_js_runpool_dec_context_count( - struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&js_devdata->runpool_mutex); - - /* Track total contexts */ - --(js_devdata->nr_all_contexts_running); - KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); - - if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - /* Track contexts that can submit jobs */ - --(js_devdata->nr_user_contexts_running); - KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); - } -} - - -/** - * @brief Submit atoms from all available contexts to all job slots. - * - * This will attempt to submit as many jobs as possible. It will exit when - * either all job slots are full, or all contexts have been used. - * - * @param[in] kbdev Device pointer - */ -static inline void kbase_js_sched_all(struct kbase_device *kbdev) -{ - kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); -} - -extern const int -kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; - -extern const base_jd_prio -kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - -/** - * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio) - * to relative ordering - * @atom_prio: Priority ID to translate. - * - * Atom priority values for @ref base_jd_prio cannot be compared directly to - * find out which are higher or lower. - * - * This function will convert base_jd_prio values for successively lower - * priorities into a monotonically increasing sequence. That is, the lower the - * base_jd_prio priority, the higher the value produced by this function. This - * is in accordance with how the rest of the kernel treates priority. - * - * The mapping is 1:1 and the size of the valid input range is the same as the - * size of the valid output range, i.e. - * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS - * - * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions - * - * Return: On success: a value in the inclusive range - * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: - * KBASE_JS_ATOM_SCHED_PRIO_INVALID - */ -static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) -{ - if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) - return KBASE_JS_ATOM_SCHED_PRIO_INVALID; - - return kbasep_js_atom_priority_to_relative[atom_prio]; -} - -static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) -{ - unsigned int prio_idx; - - KBASE_DEBUG_ASSERT(0 <= sched_prio - && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT); - - prio_idx = (unsigned int)sched_prio; - - return kbasep_js_relative_priority_to_atom[prio_idx]; -} - - /** @} *//* end group kbase_js */ - /** @} *//* end group base_kbase_api */ - /** @} *//* end group base_api */ - -#endif /* _KBASE_JS_H_ */ +#endif /* _KBASE_JS_H_ */ diff --git a/mali_kbase/mali_kbase_js_defs.h b/mali_kbase/mali_kbase_js_defs.h index 052a0b3..f858687 100644 --- a/mali_kbase/mali_kbase_js_defs.h +++ b/mali_kbase/mali_kbase_js_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,12 +51,6 @@ struct kbase_jd_atom; typedef u32 kbase_context_flags; -struct kbasep_atom_req { - base_jd_core_req core_req; - kbase_context_flags ctx_req; - u32 device_nr; -}; - /** Callback function run on all of a context's jobs registered with the Job * Scheduler */ typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); @@ -246,24 +240,6 @@ struct kbasep_js_device_data { } runpool_irq; /** - * Run Pool mutex, for managing contexts within the runpool. - * Unless otherwise specified, you must hold this lock whilst accessing any - * members that follow - * - * In addition, this is used to access: - * - the kbasep_js_kctx_info::runpool substructure - */ - struct mutex runpool_mutex; - - /** - * Queue Lock, used to access the Policy's queue of contexts independently - * of the Run Pool. - * - * Of course, you don't need the Run Pool lock to access this. - */ - struct mutex queue_mutex; - - /** * Scheduling semaphore. This must be held when calling * kbase_jm_kick() */ @@ -299,9 +275,6 @@ struct kbasep_js_device_data { u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */ u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */ - /**< Value for JS_SOFT_JOB_TIMEOUT */ - atomic_t soft_job_timeout_ms; - /** List of suspended soft jobs */ struct list_head suspended_soft_jobs_list; @@ -321,6 +294,27 @@ struct kbasep_js_device_data { /* Number of contexts that can either be pulled from or are currently * running */ atomic_t nr_contexts_runnable; + + /** Value for JS_SOFT_JOB_TIMEOUT */ + atomic_t soft_job_timeout_ms; + + /** + * Queue Lock, used to access the Policy's queue of contexts + * independently of the Run Pool. + * + * Of course, you don't need the Run Pool lock to access this. + */ + struct mutex queue_mutex; + + /** + * Run Pool mutex, for managing contexts within the runpool. + * Unless otherwise specified, you must hold this lock whilst accessing + * any members that follow + * + * In addition, this is used to access: + * * the kbasep_js_kctx_info::runpool substructure + */ + struct mutex runpool_mutex; }; /** diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index de57024..2362e22 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,8 +20,6 @@ * */ - - /** * Base kernel memory APIs */ @@ -44,6 +42,30 @@ #include <mali_kbase_native_mgm.h> #include <mali_kbase_mem_pool_group.h> #include <mmu/mali_kbase_mmu.h> +#include <mali_kbase_config_defaults.h> + +/* + * Alignment of objects allocated by the GPU inside a just-in-time memory + * region whose size is given by an end address + * + * This is the alignment of objects allocated by the GPU, but possibly not + * fully written to. When taken into account with + * KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES it gives the maximum number of bytes + * that the JIT memory report size can exceed the actual backed memory size. + */ +#define KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES (128u) + +/* + * Maximum size of objects allocated by the GPU inside a just-in-time memory + * region whose size is given by an end address + * + * This is the maximum size of objects allocated by the GPU, but possibly not + * fully written to. When taken into account with + * KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES it gives the maximum number of bytes + * that the JIT memory report size can exceed the actual backed memory size. + */ +#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u) + /* Forward declarations */ static void free_partial_locked(struct kbase_context *kctx, @@ -672,9 +694,11 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) void kbase_region_tracker_term(struct kbase_context *kctx) { + kbase_gpu_vm_lock(kctx); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); + kbase_gpu_vm_unlock(kctx); } void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) @@ -812,15 +836,22 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, #endif int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, - u8 max_allocations, u8 trim_level, int group_id) + int max_allocations, int trim_level, int group_id, + u64 phys_pages_limit) { int err = 0; - if (trim_level > 100) + if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL) + return -EINVAL; + + if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) return -EINVAL; - if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || - WARN_ON(group_id < 0)) +#if MALI_JIT_PRESSURE_LIMIT + if (phys_pages_limit > jit_va_pages) +#else + if (phys_pages_limit != jit_va_pages) +#endif /* MALI_JIT_PRESSURE_LIMIT */ return -EINVAL; kbase_gpu_vm_lock(kctx); @@ -839,6 +870,11 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, kctx->trim_level = trim_level; kctx->jit_va = true; kctx->jit_group_id = group_id; +#if MALI_JIT_PRESSURE_LIMIT + kctx->jit_phys_pages_limit = phys_pages_limit; + dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", + phys_pages_limit); +#endif /* MALI_JIT_PRESSURE_LIMIT */ } kbase_gpu_vm_unlock(kctx); @@ -940,6 +976,12 @@ int kbase_mem_init(struct kbase_device *kbdev) /* Initialize memory usage */ atomic_set(&memdev->used_pages, 0); +#ifdef IR_THRESHOLD + atomic_set(&memdev->ir_threshold, IR_THRESHOLD); +#else + atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD); +#endif + kbdev->mgm_dev = &kbase_native_mgm_dev; #ifdef CONFIG_OF @@ -1055,6 +1097,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, new_reg->nr_pages = nr_pages; INIT_LIST_HEAD(&new_reg->jit_node); + INIT_LIST_HEAD(&new_reg->link); return new_reg; } @@ -1109,6 +1152,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) if (WARN_ON(kbase_is_region_invalid(reg))) return; + dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n", + (void *)reg); mutex_lock(&kctx->jit_evict_lock); @@ -1633,6 +1678,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != reg); + dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n", + __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); if (reg->flags & KBASE_REG_NO_USER_FREE) { @@ -1688,6 +1735,8 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) struct kbase_va_region *reg; KBASE_DEBUG_ASSERT(kctx != NULL); + dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n", + __func__, gpu_addr, (void *)kctx); if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid"); @@ -3049,6 +3098,153 @@ static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kct return meet_reqs; } +#if MALI_JIT_PRESSURE_LIMIT +/* Function will guarantee *@freed will not exceed @pages_needed + */ +static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, + struct kbase_va_region *reg, size_t pages_needed, + size_t *freed) +{ + int err = 0; + size_t available_pages = 0u; + const size_t old_pages = kbase_reg_current_backed_size(reg); + size_t new_pages = old_pages; + size_t to_free = 0u; + size_t max_allowed_pages = old_pages; + + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + + /* Is this a JIT allocation that has been reported on? */ + if (reg->used_pages == reg->nr_pages) + goto out; + + if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) { + /* For address based memory usage calculation, the GPU + * allocates objects of up to size 's', but aligns every object + * to alignment 'a', with a < s. + * + * It also doesn't have to write to all bytes in an object of + * size 's'. + * + * Hence, we can observe the GPU's address for the end of used + * memory being up to (s - a) bytes into the first unallocated + * page. + * + * We allow for this and only warn when it exceeds this bound + * (rounded up to page sized units). Note, this is allowed to + * exceed reg->nr_pages. + */ + max_allowed_pages += PFN_UP( + KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES - + KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); + } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + /* The GPU could report being ready to write to the next + * 'extent' sized chunk, but didn't actually write to it, so we + * can report up to 'extent' size pages more than the backed + * size. + * + * Note, this is allowed to exceed reg->nr_pages. + */ + max_allowed_pages += reg->extent; + + /* Also note that in these GPUs, the GPU may make a large (>1 + * page) initial allocation but not actually write out to all + * of it. Hence it might report that a much higher amount of + * memory was used than actually was written to. This does not + * result in a real warning because on growing this memory we + * round up the size of the allocation up to an 'extent' sized + * chunk, hence automatically bringing the backed size up to + * the reported size. + */ + } + + if (old_pages < reg->used_pages) { + /* Prevent overflow on available_pages, but only report the + * problem if it's in a scenario where used_pages should have + * been consistent with the backed size + * + * Note: In case of a size-based report, this legitimately + * happens in common use-cases: we allow for up to this size of + * memory being used, but depending on the content it doesn't + * have to use all of it. + * + * Hence, we're much more quiet about that in the size-based + * report case - it's not indicating a real problem, it's just + * for information + */ + if (max_allowed_pages < reg->used_pages) { + if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) + dev_warn(kctx->kbdev->dev, + "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n", + __func__, + old_pages, reg->used_pages, + max_allowed_pages, + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages); + else + dev_dbg(kctx->kbdev->dev, + "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n", + __func__, + old_pages, reg->used_pages, + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages); + } + /* In any case, no error condition to report here, caller can + * try other regions + */ + + goto out; + } + available_pages = old_pages - reg->used_pages; + to_free = min(available_pages, pages_needed); + + new_pages -= to_free; + + err = kbase_mem_shrink(kctx, reg, new_pages); + +out: + trace_mali_jit_trim_from_region(reg, to_free, old_pages, + available_pages, new_pages); + *freed = to_free; + return err; +} + +size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, + size_t pages_needed) +{ + struct kbase_va_region *reg, *tmp; + size_t total_freed = 0; + + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) { + int err; + size_t freed = 0u; + + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, + pages_needed, &freed); + + if (err) { + /* Failed to trim, try the next region */ + continue; + } + + total_freed += freed; + WARN_ON(freed > pages_needed); + pages_needed -= freed; + if (!pages_needed) + break; + } + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + + trace_mali_jit_trim(total_freed); + + return total_freed; +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + static int kbase_jit_grow(struct kbase_context *kctx, struct base_jit_alloc_info *info, struct kbase_va_region *reg) { @@ -3208,6 +3404,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, { struct kbase_va_region *reg = NULL; +#if MALI_JIT_PRESSURE_LIMIT + if (info->va_pages > (kctx->jit_phys_pages_limit - + kctx->jit_current_phys_pressure) && + kctx->jit_current_phys_pressure > 0) { + dev_dbg(kctx->kbdev->dev, + "Max JIT page allocations limit reached: active pages %llu, max pages %llu\n", + kctx->jit_current_phys_pressure + info->va_pages, + kctx->jit_phys_pages_limit); + return NULL; + } +#endif /* MALI_JIT_PRESSURE_LIMIT */ if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { /* Too many current allocations */ dev_dbg(kctx->kbdev->dev, @@ -3228,6 +3435,33 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, return NULL; } +#if MALI_JIT_PRESSURE_LIMIT + /* Before allocating a new just-in-time memory region or reusing a + * previous one, ensure that the total JIT physical page usage also will + * not exceed the pressure limit. + * + * If there are no reported-on allocations, then we already guarantee + * this will be the case - because our current pressure then only comes + * from the va_pages of each JIT region, hence JIT physical page usage + * is guaranteed to be bounded by this. + * + * However as soon as JIT allocations become "reported on", the + * pressure is lowered to allow new JIT regions to be allocated. It is + * after such a point that the total JIT physical page usage could + * (either now or in the future on a grow-on-GPU-page-fault) exceed the + * pressure limit, but only on newly allocated JIT regions. Hence, trim + * any "reported on" regions. + * + * Any pages freed will go into the pool and be allocated from there in + * kbase_mem_alloc(). + * + * In future, GPUCORE-21217: Only do this when physical page usage + * could exceed the pressure limit, and only trim as much as is + * necessary. + */ + kbase_mem_jit_trim_pages(kctx, SIZE_MAX); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + mutex_lock(&kctx->jit_evict_lock); /* @@ -3372,6 +3606,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, mutex_unlock(&kctx->jit_evict_lock); } + trace_mali_jit_alloc(reg, info->id); + kctx->jit_current_allocations++; kctx->jit_current_allocations_per_bin[info->bin_id]++; @@ -3379,6 +3615,13 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, reg->jit_usage_id = info->usage_id; reg->jit_bin_id = info->bin_id; +#if MALI_JIT_PRESSURE_LIMIT + if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; + reg->heap_info_gpu_addr = info->heap_info_gpu_addr; + kbase_jit_report_update_pressure(kctx, reg, info->va_pages, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif /* MALI_JIT_PRESSURE_LIMIT */ return reg; @@ -3394,6 +3637,9 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) { u64 old_pages; + /* JIT id not immediately available here, so use 0u */ + trace_mali_jit_free(reg, 0u); + /* Get current size of JIT region */ old_pages = kbase_reg_current_backed_size(reg); if (reg->initial_commit < old_pages) { @@ -3404,19 +3650,16 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) div_u64(old_pages * (100 - kctx->trim_level), 100)); u64 delta = old_pages - new_size; - if (delta) { - kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta, - old_pages); - kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta, - old_pages); - - kbase_free_phy_pages_helper(reg->cpu_alloc, delta); - if (reg->cpu_alloc != reg->gpu_alloc) - kbase_free_phy_pages_helper(reg->gpu_alloc, - delta); - } + if (delta) + kbase_mem_shrink(kctx, reg, old_pages - delta); } +#if MALI_JIT_PRESSURE_LIMIT + reg->heap_info_gpu_addr = 0; + kbase_jit_report_update_pressure(kctx, reg, 0, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + kctx->jit_current_allocations--; kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; @@ -3535,6 +3778,118 @@ void kbase_jit_term(struct kbase_context *kctx) cancel_work_sync(&kctx->jit_work); } +#if MALI_JIT_PRESSURE_LIMIT +void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned int flags) +{ + /* Offset to the location used for a JIT report within the GPU memory + * + * This constants only used for this debugging function - not useful + * anywhere else in kbase + */ + const u64 jit_report_gpu_mem_offset = sizeof(u64)*2; + + u64 addr_start; + struct kbase_vmap_struct mapping; + u64 *ptr; + + if (reg->heap_info_gpu_addr == 0ull) + goto out; + + /* Nothing else to trace in the case the memory just contains the + * size. Other tracepoints already record the relevant area of memory. + */ + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) + goto out; + + addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset; + + ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, + &mapping); + if (!ptr) { + dev_warn(kctx->kbdev->dev, + "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", + __func__, reg->start_pfn << PAGE_SHIFT, + addr_start); + goto out; + } + + trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT, + ptr, flags); + + kbase_vunmap(kctx, &mapping); +out: + return; +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +#if MALI_JIT_PRESSURE_LIMIT +void kbase_jit_report_update_pressure(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_used_pages, + unsigned int flags) +{ + u64 diff; + + lockdep_assert_held(&kctx->jctx.lock); + + trace_mali_jit_report_pressure(reg, new_used_pages, + kctx->jit_current_phys_pressure + new_used_pages - + reg->used_pages, + flags); + + if (WARN_ON(new_used_pages > reg->nr_pages)) + return; + + if (reg->used_pages > new_used_pages) { + /* We reduced the number of used pages */ + diff = reg->used_pages - new_used_pages; + + if (!WARN_ON(diff > kctx->jit_current_phys_pressure)) + kctx->jit_current_phys_pressure -= diff; + + reg->used_pages = new_used_pages; + + /* In the case of pressure reduced on a free, don't attempt to + * trim the region: it will soon be placed on the evict_list + * so that if we really were close to running out of memory then + * the shrinker can reclaim the memory. + */ + if ((flags & KBASE_JIT_REPORT_ON_ALLOC_OR_FREE) == 0u) { + size_t freed; + int err; + + kbase_gpu_vm_lock(kctx); + /* If this was from an allocation that a single + * BASE_JD_REQ_SOFT_JIT_ALLOC atom that is allowed to + * breach the pressure limit, then check whether we can + * bring the total JIT physical page below (or at least + * nearer) the pressure limit. + * + * In future, GPUCORE-21217: Only do this when physical + * page usage currently exceeds the pressure limit, and + * only trim as much as is necessary. + */ + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, + SIZE_MAX, &freed); + kbase_gpu_vm_unlock(kctx); + + CSTD_UNUSED(freed); + /* Nothing to do if trimming failed */ + CSTD_UNUSED(err); + } + } else { + /* We increased the number of used pages */ + diff = new_used_pages - reg->used_pages; + + if (!WARN_ON(diff > U64_MAX - kctx->jit_current_phys_pressure)) + kctx->jit_current_phys_pressure += diff; + + reg->used_pages = new_used_pages; + } + +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + bool kbase_has_exec_va_zone(struct kbase_context *kctx) { bool has_exec_va_zone; diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index 0ce3037..3f74492 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -184,6 +184,19 @@ struct kbase_mem_phy_alloc { */ #define PINNED_ON_IMPORT (1<<31) +/** + * enum kbase_jit_report_flags - Flags for just-in-time memory allocation + * pressure limit functions + * @KBASE_JIT_REPORT_ON_ALLOC_OR_FREE: Notifying about an update happening due + * to a just-in-time memory allocation or free + * + * Used to control flow within pressure limit related functions, or to provide + * extra debugging information + */ +enum kbase_jit_report_flags { + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) +}; + static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) { KBASE_DEBUG_ASSERT(alloc); @@ -236,18 +249,35 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m /** * A GPU memory region, and attributes for CPU mappings. + * + * @rblink: Node in a red-black tree of memory regions within the same zone of + * the GPU's virtual address space. + * @link: Links to neighboring items in a list of growable memory regions + * that triggered incremental rendering by growing too much. + * @rbtree: Backlink to the red-black tree of memory regions. + * @start_pfn: The Page Frame Number in GPU virtual address space. + * @nr_pages: The size of the region in pages. + * @initial_commit: Initial commit, for aligning the start address and + * correctly growing KBASE_REG_TILER_ALIGN_TOP regions. + * @threshold_pages: If non-zero and the amount of memory committed to a region + * that can grow on page fault exceeds this number of pages + * then the driver switches to incremental rendering. + * @extent: Number of pages allocated on page fault. + * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region. + * @gpu_alloc: The physical memory we mmap to the GPU when mapping this region. + * @jit_node: Links to neighboring regions in the just-in-time memory pool. + * @jit_usage_id: The last just-in-time memory usage ID for this region. + * @jit_bin_id: The just-in-time memory bin this region came from. + * @va_refcnt: Number of users of this region. Protected by reg_lock. */ struct kbase_va_region { struct rb_node rblink; struct list_head link; - - struct rb_root *rbtree; /* Backlink to rb tree */ - - u64 start_pfn; /* The PFN in GPU space */ + struct rb_root *rbtree; + u64 start_pfn; size_t nr_pages; - /* Initial commit, for aligning the start address and correctly growing - * KBASE_REG_TILER_ALIGN_TOP regions */ size_t initial_commit; + size_t threshold_pages; /* Free region */ #define KBASE_REG_FREE (1ul << 0) @@ -332,6 +362,11 @@ struct kbase_va_region { */ #define KBASE_REG_VA_FREED (1ul << 26) +/* If set, the heap info address points to a u32 holding the used size in bytes; + * otherwise it points to a u64 holding the lowest address of unused memory. + */ +#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) + #define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) /* only used with 32-bit clients */ @@ -357,22 +392,47 @@ struct kbase_va_region { unsigned long flags; - - size_t extent; /* nr of pages alloc'd on PF */ - - struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */ - struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */ - - /* List head used to store the region in the JIT allocation pool */ + size_t extent; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; struct list_head jit_node; - /* The last JIT usage ID for this region */ u16 jit_usage_id; - /* The JIT bin this allocation came from */ u8 jit_bin_id; +#if MALI_JIT_PRESSURE_LIMIT + /* Pointer to an object in GPU memory defining an end of an allocated + * region + * + * The object can be one of: + * - u32 value defining the size of the region + * - u64 pointer first unused byte in the region + * + * The interpretation of the object depends on + * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in jit_info_flags - if it is + * set, the heap info object should be interpreted as size. + */ + u64 heap_info_gpu_addr; + + /* The current estimate of the number of pages used, which in normal + * use is either: + * - the initial estimate == va_pages + * - the actual pages used, as found by a JIT usage report + * + * Note that since the value is calculated from GPU memory after a JIT + * usage report, at any point in time it is allowed to take a random + * value that is no greater than va_pages (e.g. it may be greater than + * gpu_alloc->nents) + */ + size_t used_pages; +#endif /* MALI_JIT_PRESSURE_LIMIT */ - int va_refcnt; /* number of users of this va */ + int va_refcnt; }; +/* Special marker for failed JIT allocations that still must be marked as + * in-use + */ +#define KBASE_RESERVED_REG_JIT_ALLOC ((struct kbase_va_region *)-1) + static inline bool kbase_is_region_free(struct kbase_va_region *reg) { return (!reg || reg->flags & KBASE_REG_FREE); @@ -411,6 +471,8 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get( WARN_ON(!region->va_refcnt); /* non-atomic as kctx->reg_lock is held */ + dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %p\n", + region->va_refcnt, (void *)region); region->va_refcnt++; return region; @@ -426,6 +488,8 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put( /* non-atomic as kctx->reg_lock is held */ region->va_refcnt--; + dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %p\n", + region->va_refcnt, (void *)region); if (!region->va_refcnt) kbase_region_refcnt_free(region); @@ -905,21 +969,27 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); int kbase_region_tracker_init(struct kbase_context *kctx); /** - * kbase_region_tracker_init_jit - Initialize the JIT region - * @kctx: kbase context - * @jit_va_pages: Size of the JIT region in pages - * @max_allocations: Maximum number of allocations allowed for the JIT region - * @trim_level: Trim level for the JIT region - * @group_id: The physical group ID from which to allocate JIT memory. - * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * kbase_region_tracker_init_jit - Initialize the just-in-time memory + * allocation region + * @kctx: Kbase context. + * @jit_va_pages: Size of the JIT region in pages. + * @max_allocations: Maximum number of allocations allowed for the JIT region. + * Valid range is 0..%BASE_JIT_ALLOC_COUNT. + * @trim_level: Trim level for the JIT region. + * Valid range is 0..%BASE_JIT_MAX_TRIM_LEVEL. + * @group_id: The physical group ID from which to allocate JIT memory. + * Valid range is 0..(%MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @phys_pages_limit: Maximum number of physical pages to use to back the JIT + * region. Must not exceed @jit_va_pages. * * Return: 0 if success, negative error code otherwise. */ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, - u8 max_allocations, u8 trim_level, int group_id); + int max_allocations, int trim_level, int group_id, + u64 phys_pages_limit); /** - * kbase_region_tracker_init_exec - Initialize the EXEC_VA region + * kbase_region_tracker_init_exec - Initialize the GPU-executable memory region * @kctx: kbase context * @exec_va_pages: Size of the JIT region in pages. * It must not be greater than 4 GB. @@ -1096,8 +1166,6 @@ int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset); void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa, struct tagged_addr gpu_pa, off_t offset, size_t size, enum kbase_sync_type sync_fn); -void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); -void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); /* OS specific functions */ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); @@ -1427,6 +1495,93 @@ bool kbase_jit_evict(struct kbase_context *kctx); */ void kbase_jit_term(struct kbase_context *kctx); +#if MALI_JIT_PRESSURE_LIMIT +/** + * kbase_trace_jit_report_gpu_mem_trace_enabled - variant of + * kbase_trace_jit_report_gpu_mem() that should only be called once the + * corresponding tracepoint is verified to be enabled + * @kctx: kbase context + * @reg: Just-in-time memory region to trace + * @flags: combination of values from enum kbase_jit_report_flags + */ +void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned int flags); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +/** + * kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used + * to make a JIT report + * @kctx: kbase context + * @reg: Just-in-time memory region to trace + * @flags: combination of values from enum kbase_jit_report_flags + * + * Information is traced using the trace_mali_jit_report_gpu_mem() tracepoint. + * + * In case that tracepoint is not enabled, this function should have the same + * low overheads as a tracepoint itself (i.e. use of 'jump labels' to avoid + * conditional branches) + * + * This can take the reg_lock on @kctx, do not use in places where this lock is + * already held. + * + * Note: this has to be a macro because at this stage the tracepoints have not + * been included. Also gives no opportunity for the compiler to mess up + * inlining it. + */ +#if MALI_JIT_PRESSURE_LIMIT +#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ + do { \ + if (trace_mali_jit_report_gpu_mem_enabled()) \ + kbase_trace_jit_report_gpu_mem_trace_enabled( \ + (kctx), (reg), (flags)); \ + } while (0) +#else +#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ + CSTD_NOP(kctx, reg, flags) +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +#if MALI_JIT_PRESSURE_LIMIT +/** + * kbase_jit_report_update_pressure - safely update the JIT physical page + * pressure and JIT region's estimate of used_pages + * @kctx: kbase context, to update the current physical pressure + * @reg: Just-in-time memory region to update with @new_used_pages + * @new_used_pages: new value of number of pages used in the JIT region + * @flags: combination of values from enum kbase_jit_report_flags + * + * Takes care of: + * - correctly updating the pressure given the current reg->used_pages and + * new_used_pages + * - then updating the %kbase_va_region used_pages member + * + * Precondition: + * - new_used_pages <= reg->nr_pages + */ +void kbase_jit_report_update_pressure(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_used_pages, + unsigned int flags); + +/** + * kbase_mem_jit_trim_pages - Trim JIT regions until sufficient pages have been + * freed + * @kctx: Pointer to the kbase context whose active JIT allocations will be + * checked. + * @pages_needed: The maximum number of pages to trim. + * + * This functions checks all active JIT allocations in @kctx for unused pages + * at the end, and trim the backed memory regions of those allocations down to + * the used portion and free the unused pages into the page pool. + * + * Specifying @pages_needed allows us to stop early when there's enough + * physical memory freed to sufficiently bring down the total JIT physical page + * usage (e.g. to below the pressure limit) + * + * Return: Total number of successfully freed pages + */ +size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, + size_t pages_needed); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + /** * kbase_has_exec_va_zone - EXEC_VA zone predicate * diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index 57667be..219e0af 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -85,6 +85,8 @@ #define KBASE_MEM_ION_SYNC_WORKAROUND #endif +#define IR_THRESHOLD_STEPS (256u) + static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, u64 offset_bytes, size_t size, @@ -94,6 +96,10 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx, static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); +static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + /* Retrieve the associated region pointer if the GPU address corresponds to * one of the event memory pages. The enclosing region, if found, shouldn't * have been marked as free. @@ -282,7 +288,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, if (!(*flags & BASE_MEM_FLAG_MAP_FIXED)) *gpu_va = 0; /* return 0 on failure */ else - dev_err(dev, "Keeping requested GPU VA of 0x%llx\n", (unsigned long long)*gpu_va); + dev_err(dev, + "Keeping requested GPU VA of 0x%llx\n", + (unsigned long long)*gpu_va); if (!kbase_check_alloc_flags(*flags)) { dev_warn(dev, @@ -355,6 +363,15 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, goto prepare_failed; } + if (*flags & BASE_MEM_GROW_ON_GPF) { + unsigned int const ir_threshold = atomic_read( + &kctx->kbdev->memdev.ir_threshold); + + reg->threshold_pages = ((va_pages * ir_threshold) + + (IR_THRESHOLD_STEPS / 2)) / IR_THRESHOLD_STEPS; + } else + reg->threshold_pages = 0; + if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) { /* kbase_check_alloc_sizes() already checks extent is valid for * assigning to reg->extent */ @@ -1978,9 +1995,22 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, (old_pages - new_pages)<<PAGE_SHIFT, 1); } -int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages) +/** + * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region or NULL if there isn't one + * @new_pages: The number of pages after the shrink + * @old_pages: The number of pages before the shrink + * + * Return: 0 on success, negative -errno on error + * + * Unmap the shrunk pages from the GPU mapping. Note that the size of the region + * itself is unmodified as we still need to reserve the VA, only the page tables + * will be modified by this function. + */ +static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, + struct kbase_va_region *const reg, + u64 const new_pages, u64 const old_pages) { u64 delta = old_pages - new_pages; int ret = 0; @@ -2089,23 +2119,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) goto out_unlock; } } else { - delta = old_pages - new_pages; - - /* Update all CPU mapping(s) */ - kbase_mem_shrink_cpu_mapping(kctx, reg, - new_pages, old_pages); - - /* Update the GPU mapping */ - res = kbase_mem_shrink_gpu_mapping(kctx, reg, - new_pages, old_pages); - if (res) { + res = kbase_mem_shrink(kctx, reg, new_pages); + if (res) res = -ENOMEM; - goto out_unlock; - } - - kbase_free_phy_pages_helper(reg->cpu_alloc, delta); - if (reg->cpu_alloc != reg->gpu_alloc) - kbase_free_phy_pages_helper(reg->gpu_alloc, delta); } out_unlock: @@ -2118,6 +2134,43 @@ out_unlock: return res; } +int kbase_mem_shrink(struct kbase_context *const kctx, + struct kbase_va_region *const reg, u64 const new_pages) +{ + u64 delta, old_pages; + int err; + + lockdep_assert_held(&kctx->reg_lock); + + if (WARN_ON(!kctx)) + return -EINVAL; + + if (WARN_ON(!reg)) + return -EINVAL; + + old_pages = kbase_reg_current_backed_size(reg); + if (WARN_ON(old_pages < new_pages)) + return -EINVAL; + + delta = old_pages - new_pages; + + /* Update the GPU mapping */ + err = kbase_mem_shrink_gpu_mapping(kctx, reg, + new_pages, old_pages); + if (err >= 0) { + /* Update all CPU mapping(s) */ + kbase_mem_shrink_cpu_mapping(kctx, reg, + new_pages, old_pages); + + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); + if (reg->cpu_alloc != reg->gpu_alloc) + kbase_free_phy_pages_helper(reg->gpu_alloc, delta); + } + + return err; +} + + static void kbase_cpu_vm_open(struct vm_area_struct *vma) { struct kbase_cpu_mapping *map = vma->vm_private_data; @@ -2880,6 +2933,20 @@ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) } KBASE_EXPORT_TEST_API(kbase_vunmap); +static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)) + /* To avoid the build breakage due to an unexported kernel symbol + * 'mm_trace_rss_stat' from later kernels, i.e. from V5.5.0 onwards, + * we inline here the equivalent of 'add_mm_counter()' from linux + * kernel V5.4.0~8. + */ + atomic_long_add(value, &mm->rss_stat.count[member]); +#else + add_mm_counter(mm, member, value); +#endif +} + void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) { struct mm_struct *mm; @@ -2889,10 +2956,10 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) if (mm) { atomic_add(pages, &kctx->nonmapped_pages); #ifdef SPLIT_RSS_COUNTING - add_mm_counter(mm, MM_FILEPAGES, pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); #else spin_lock(&mm->page_table_lock); - add_mm_counter(mm, MM_FILEPAGES, pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); spin_unlock(&mm->page_table_lock); #endif } @@ -2917,10 +2984,10 @@ static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) pages = atomic_xchg(&kctx->nonmapped_pages, 0); #ifdef SPLIT_RSS_COUNTING - add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); #else spin_lock(&mm->page_table_lock); - add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); spin_unlock(&mm->page_table_lock); #endif } diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h index 02f1c3b..cd094b3 100644 --- a/mali_kbase/mali_kbase_mem_linux.h +++ b/mali_kbase/mali_kbase_mem_linux.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010, 2012-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010, 2012-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -129,6 +129,18 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages); /** + * kbase_mem_shrink - Shrink the physical backing size of a region + * + * @kctx: The kernel context + * @reg: The GPU region + * @new_pages: Number of physical pages to back the region with + * + * Return: 0 on success or error code + */ +int kbase_mem_shrink(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_pages); + +/** * kbase_context_mmap - Memory map method, gets invoked when mmap system call is * issued on device file /dev/malixx. * @kctx: The kernel context @@ -334,23 +346,6 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, u64 new_pages, u64 old_pages); /** - * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation - * @kctx: Context the region belongs to - * @reg: The GPU region or NULL if there isn't one - * @new_pages: The number of pages after the shrink - * @old_pages: The number of pages before the shrink - * - * Return: 0 on success, negative -errno on error - * - * Unmap the shrunk pages from the GPU mapping. Note that the size of the region - * itself is unmodified as we still need to reserve the VA, only the page tables - * will be modified by this function. - */ -int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); - -/** * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a * physical allocation * @kctx: The kernel base context associated with the mapping diff --git a/mali_kbase/mali_kbase_mipe_gen_header.h b/mali_kbase/mali_kbase_mipe_gen_header.h index 99475b6..ec52122 100644 --- a/mali_kbase/mali_kbase_mipe_gen_header.h +++ b/mali_kbase/mali_kbase_mipe_gen_header.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,101 +20,198 @@ * */ +/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +/* clang-format off */ + #include "mali_kbase_mipe_proto.h" /** * This header generates MIPE tracepoint declaration BLOB at * compile time. * - * Before including this header, the following parameters - * must be defined: + * It is intentional that there is no header guard. + * The header could be included multiple times for + * different blobs compilation. + * + * Before including this header MIPE_HEADER_* parameters must be + * defined. See documentation below: + */ + +/** + * The name of the variable where the result BLOB will be stored. + */ +#if !defined(MIPE_HEADER_BLOB_VAR_NAME) +#error "MIPE_HEADER_BLOB_VAR_NAME must be defined!" +#endif + +/** + * A compiler attribute for the BLOB variable. + * + * e.g. __attribute__((section("my_section"))) * - * MIPE_HEADER_BLOB_VAR_NAME: the name of the variable - * where the result BLOB will be stored. + * Default value is no attribute. + */ +#if !defined(MIPE_HEADER_BLOB_VAR_ATTRIBUTE) +#define MIPE_HEADER_BLOB_VAR_ATTRIBUTE +#endif + +/** + * MIPE stream id. + * + * See enum tl_stream_id. + */ +#if !defined(MIPE_HEADER_STREAM_ID) +#error "MIPE_HEADER_STREAM_ID must be defined!" +#endif + +/** + * MIPE packet class. + * + * See enum tl_packet_class. + */ +#if !defined(MIPE_HEADER_PKT_CLASS) +#error "MIPE_HEADER_PKT_CLASS must be defined!" +#endif + +/** + * The list of tracepoints to process. * - * MIPE_HEADER_TP_LIST: the list of tracepoints to process. * It should be defined as follows: - * #define MIPE_HEADER_TP_LIST \ - * TP_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ - * TP_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ + * #define MIPE_HEADER_TRACEPOINT_LIST \ + * TRACEPOINT_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ + * TRACEPOINT_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ * etc. + * * Where the first argument is tracepoints name, the second * argument is a short tracepoint description, the third argument * argument types (see MIPE documentation), and the fourth argument * is comma separated argument names. - * - * MIPE_HEADER_TP_LIST_COUNT: number of entries in MIPE_HEADER_TP_LIST. - * - * MIPE_HEADER_PKT_CLASS: MIPE packet class. */ - -#if !defined(MIPE_HEADER_BLOB_VAR_NAME) -#error "MIPE_HEADER_BLOB_VAR_NAME must be defined!" +#if !defined(MIPE_HEADER_TRACEPOINT_LIST) +#error "MIPE_HEADER_TRACEPOINT_LIST must be defined!" #endif -#if !defined(MIPE_HEADER_TP_LIST) -#error "MIPE_HEADER_TP_LIST must be defined!" +/** + * The number of entries in MIPE_HEADER_TRACEPOINT_LIST. + */ +#if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE) +#error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!" #endif -#if !defined(MIPE_HEADER_TP_LIST_COUNT) -#error "MIPE_HEADER_TP_LIST_COUNT must be defined!" +/** + * The list of enums to process. + * + * It should be defined as follows: + * #define MIPE_HEADER_ENUM_LIST \ + * ENUM_DESC(enum_arg_name, enum_value) \ + * ENUM_DESC(enum_arg_name, enum_value) \ + * etc. + * + * Where enum_arg_name is the name of a tracepoint argument being used with + * this enum. enum_value is a valid C enum value. + * + * Default value is an empty list. + */ +#if defined(MIPE_HEADER_ENUM_LIST) + +/** + * Tracepoint message ID used for enums declaration. + */ +#if !defined(MIPE_HEADER_ENUM_MSG_ID) +#error "MIPE_HEADER_ENUM_MSG_ID must be defined!" #endif -#if !defined(MIPE_HEADER_PKT_CLASS) -#error "MIPE_HEADER_PKT_CLASS must be defined!" +#else +#define MIPE_HEADER_ENUM_LIST #endif -static const struct { +/** + * The MIPE tracepoint declaration BLOB. + */ +const struct +{ u32 _mipe_w0; u32 _mipe_w1; u8 _protocol_version; u8 _pointer_size; u32 _tp_count; -#define TP_DESC(name, desc, arg_types, arg_names) \ - struct { \ - u32 _name; \ - u32 _size_string_name; \ - char _string_name[sizeof(#name)]; \ - u32 _size_desc; \ - char _desc[sizeof(desc)]; \ - u32 _size_arg_types; \ - char _arg_types[sizeof(arg_types)]; \ - u32 _size_arg_names; \ - char _arg_names[sizeof(arg_names)]; \ +#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ + struct { \ + u32 _name; \ + u32 _size_string_name; \ + char _string_name[sizeof(#name)]; \ + u32 _size_desc; \ + char _desc[sizeof(desc)]; \ + u32 _size_arg_types; \ + char _arg_types[sizeof(arg_types)]; \ + u32 _size_arg_names; \ + char _arg_names[sizeof(arg_names)]; \ } __attribute__ ((__packed__)) __ ## name; - MIPE_HEADER_TP_LIST -#undef TP_DESC +#define ENUM_DESC(arg_name, value) \ + struct { \ + u32 _msg_id; \ + u32 _arg_name_len; \ + char _arg_name[sizeof(#arg_name)]; \ + u32 _value; \ + u32 _value_str_len; \ + char _value_str[sizeof(#value)]; \ + } __attribute__ ((__packed__)) __ ## arg_name ## _ ## value; -} __attribute__ ((__packed__)) MIPE_HEADER_BLOB_VAR_NAME = { + MIPE_HEADER_TRACEPOINT_LIST + MIPE_HEADER_ENUM_LIST +#undef TRACEPOINT_DESC +#undef ENUM_DESC +} __attribute__((packed)) MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = { ._mipe_w0 = MIPE_PACKET_HEADER_W0( TL_PACKET_FAMILY_TL, MIPE_HEADER_PKT_CLASS, TL_PACKET_TYPE_HEADER, - 1), + MIPE_HEADER_STREAM_ID), ._mipe_w1 = MIPE_PACKET_HEADER_W1( sizeof(MIPE_HEADER_BLOB_VAR_NAME) - PACKET_HEADER_SIZE, 0), ._protocol_version = SWTRACE_VERSION, ._pointer_size = sizeof(void *), - ._tp_count = MIPE_HEADER_TP_LIST_COUNT, -#define TP_DESC(name, desc, arg_types, arg_names) \ - .__ ## name = { \ - ._name = name, \ - ._size_string_name = sizeof(#name), \ - ._string_name = #name, \ - ._size_desc = sizeof(desc), \ - ._desc = desc, \ - ._size_arg_types = sizeof(arg_types), \ - ._arg_types = arg_types, \ - ._size_arg_names = sizeof(arg_names), \ - ._arg_names = arg_names \ + ._tp_count = MIPE_HEADER_TRACEPOINT_LIST_SIZE, +#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ + .__ ## name = { \ + ._name = name, \ + ._size_string_name = sizeof(#name), \ + ._string_name = #name, \ + ._size_desc = sizeof(desc), \ + ._desc = desc, \ + ._size_arg_types = sizeof(arg_types), \ + ._arg_types = arg_types, \ + ._size_arg_names = sizeof(arg_names), \ + ._arg_names = arg_names \ }, - MIPE_HEADER_TP_LIST -#undef TP_DESC +#define ENUM_DESC(arg_name, value) \ + .__ ## arg_name ## _ ## value = { \ + ._msg_id = MIPE_HEADER_ENUM_MSG_ID, \ + ._arg_name_len = sizeof(#arg_name), \ + ._arg_name = #arg_name, \ + ._value = value, \ + ._value_str_len = sizeof(#value), \ + ._value_str = #value \ + }, + + MIPE_HEADER_TRACEPOINT_LIST + MIPE_HEADER_ENUM_LIST +#undef TRACEPOINT_DESC +#undef ENUM_DESC }; #undef MIPE_HEADER_BLOB_VAR_NAME -#undef MIPE_HEADER_TP_LIST -#undef MIPE_HEADER_TP_LIST_COUNT +#undef MIPE_HEADER_BLOB_VAR_ATTRIBUTE +#undef MIPE_HEADER_STREAM_ID #undef MIPE_HEADER_PKT_CLASS +#undef MIPE_HEADER_TRACEPOINT_LIST +#undef MIPE_HEADER_TRACEPOINT_LIST_SIZE +#undef MIPE_HEADER_ENUM_LIST +#undef MIPE_HEADER_ENUM_MSG_ID + +/* clang-format on */ diff --git a/mali_kbase/mali_kbase_mipe_proto.h b/mali_kbase/mali_kbase_mipe_proto.h index 1a0b8b4..54667cf 100644 --- a/mali_kbase/mali_kbase_mipe_proto.h +++ b/mali_kbase/mali_kbase_mipe_proto.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,6 +20,12 @@ * */ +/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +/* clang-format off */ + #if !defined(_KBASE_MIPE_PROTO_H) #define _KBASE_MIPE_PROTO_H @@ -109,5 +115,13 @@ enum tl_packet_type { TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */ }; +/* Stream ID types (timeline family). */ +enum tl_stream_id { + TL_STREAM_ID_USER = 0, /* User-space driver Timeline stream. */ + TL_STREAM_ID_KERNEL = 1, /* Kernel-space driver Timeline stream. */ + TL_STREAM_ID_CSFFW = 2, /* CSF firmware driver Timeline stream. */ +}; + #endif /* _KBASE_MIPE_PROTO_H */ +/* clang-format on */ diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c index 2251031..2adbb21 100644 --- a/mali_kbase/mali_kbase_pm.c +++ b/mali_kbase/mali_kbase_pm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_kbase_smc.c b/mali_kbase/mali_kbase_smc.c index 3470f58..b5c7b12 100644 --- a/mali_kbase/mali_kbase_smc.c +++ b/mali_kbase/mali_kbase_smc.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015, 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015, 2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c index c264d0b..45ce8ad 100644 --- a/mali_kbase/mali_kbase_softjobs.c +++ b/mali_kbase/mali_kbase_softjobs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -719,6 +719,36 @@ out_cleanup: return ret; } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, + unsigned long page_num, struct page **page) +{ + struct sg_table *sgt = gpu_alloc->imported.umm.sgt; + struct sg_page_iter sg_iter; + unsigned long page_index = 0; + + if (WARN_ON(gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) + return NULL; + + if (!sgt) + return NULL; + + if (WARN_ON(page_num >= gpu_alloc->nents)) + return NULL; + + for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0) { + if (page_index == page_num) { + *page = sg_page_iter_page(&sg_iter); + + return kmap(*page); + } + page_index++; + } + + return NULL; +} +#endif + int kbase_mem_copy_from_extres(struct kbase_context *kctx, struct kbase_debug_copy_buffer *buf_data) { @@ -779,16 +809,23 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, for (i = 0; i < dma_to_copy/PAGE_SIZE && target_page_nr < buf_data->nr_pages; i++) { - +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + struct page *pg; + void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg); +#else void *extres_page = dma_buf_kmap(dma_buf, i); - +#endif if (extres_page) { ret = kbase_mem_copy_to_pinned_user_pages( pages, extres_page, &to_copy, buf_data->nr_pages, &target_page_nr, offset); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + kunmap(pg); +#else dma_buf_kunmap(dma_buf, i, extres_page); +#endif if (ret) goto out_unlock; } @@ -831,6 +868,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom) int kbasep_jit_alloc_validate(struct kbase_context *kctx, struct base_jit_alloc_info *info) { + int j; /* If the ID is zero, then fail the job */ if (info->id == 0) return -EINVAL; @@ -843,46 +881,82 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx, if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0) return -EINVAL; - if (kctx->jit_version == 1) { - /* Old JIT didn't have usage_id, max_allocations, bin_id - * or padding, so force them to zero - */ - info->usage_id = 0; - info->max_allocations = 0; - info->bin_id = 0; - info->flags = 0; - memset(info->padding, 0, sizeof(info->padding)); - } else { - int j; - - /* Check padding is all zeroed */ - for (j = 0; j < sizeof(info->padding); j++) { - if (info->padding[j] != 0) { - return -EINVAL; - } - } + /* Interface version 2 (introduced with kernel driver version 11.5) + * onward has padding and a flags member to validate. + * + * Note: To support earlier versions the extra bytes will have been set + * to 0 by the caller. + */ - /* No bit other than TILER_ALIGN_TOP shall be set */ - if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { + /* Check padding is all zeroed */ + for (j = 0; j < sizeof(info->padding); j++) { + if (info->padding[j] != 0) return -EINVAL; - } } + /* Only valid flags shall be set */ + if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS)) + return -EINVAL; + +#if !MALI_JIT_PRESSURE_LIMIT + /* If just-in-time memory allocation pressure limit feature is disabled, + * heap_info_gpu_addr must be zeroed-out + */ + if (info->heap_info_gpu_addr) + return -EINVAL; +#endif + + /* If BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE is set, heap_info_gpu_addr + * cannot be 0 + */ + if ((info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) && + !info->heap_info_gpu_addr) + return -EINVAL; + return 0; } +/* + * Sizes of user data to copy for each just-in-time memory interface version + * + * In interface version 2 onwards this is the same as the struct size, allowing + * copying of arrays of structures from userspace. + * + * In interface version 1 the structure size was variable, and hence arrays of + * structures cannot be supported easily, and were not a feature present in + * version 1 anyway. + */ +static const size_t jit_info_copy_size_for_jit_version[] = { + /* in jit_version 1, the structure did not have any end padding, hence + * it could be a different size on 32 and 64-bit clients. We therefore + * do not copy past the last member + */ + [1] = offsetofend(struct base_jit_alloc_info_10_2, id), + [2] = sizeof(struct base_jit_alloc_info_11_5), + [3] = sizeof(struct base_jit_alloc_info) +}; + static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) { - __user void *data = (__user void *)(uintptr_t) katom->jc; + __user u8 *data = (__user u8 *)(uintptr_t) katom->jc; struct base_jit_alloc_info *info; struct kbase_context *kctx = katom->kctx; struct kbase_device *kbdev = kctx->kbdev; u32 count; int ret; u32 i; + size_t jit_info_user_copy_size; - /* For backwards compatibility */ - if (katom->nr_extres == 0) + WARN_ON(kctx->jit_version >= + ARRAY_SIZE(jit_info_copy_size_for_jit_version)); + jit_info_user_copy_size = + jit_info_copy_size_for_jit_version[kctx->jit_version]; + WARN_ON(jit_info_user_copy_size > sizeof(*info)); + + /* For backwards compatibility, and to prevent reading more than 1 jit + * info struct on jit version 1 + */ + if (katom->nr_extres == 0 || kctx->jit_version == 1) katom->nr_extres = 1; count = katom->nr_extres; @@ -899,13 +973,21 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) ret = -ENOMEM; goto fail; } - if (copy_from_user(info, data, sizeof(*info)*count) != 0) { - ret = -EINVAL; - goto free_info; - } + katom->softjob_data = info; - for (i = 0; i < count; i++, info++) { + for (i = 0; i < count; i++, info++, data += jit_info_user_copy_size) { + if (copy_from_user(info, data, jit_info_user_copy_size) != 0) { + ret = -EINVAL; + goto free_info; + } + /* Clear any remaining bytes when user struct is smaller than + * kernel struct. For jit version 1, this also clears the + * padding bytes + */ + memset(((u8 *)info) + jit_info_user_copy_size, 0, + sizeof(*info) - jit_info_user_copy_size); + ret = kbasep_jit_alloc_validate(kctx, info); if (ret) goto free_info; @@ -1009,7 +1091,7 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) for (j = 0; j < i; j++, info++) { kbase_jit_free(kctx, kctx->jit_alloc[info->id]); kctx->jit_alloc[info->id] = - (struct kbase_va_region *) -1; + KBASE_RESERVED_REG_JIT_ALLOC; } katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; @@ -1054,7 +1136,7 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) */ for (; i < count; i++, info++) { kctx->jit_alloc[info->id] = - (struct kbase_va_region *) -1; + KBASE_RESERVED_REG_JIT_ALLOC; } katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; @@ -1121,6 +1203,9 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) entry_mmu_flags, info->id, info->commit_pages, info->extent, info->va_pages); kbase_vunmap(kctx, &mapping); + + kbase_trace_jit_report_gpu_mem(kctx, reg, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); } katom->event_code = BASE_JD_EVENT_DONE; @@ -1229,7 +1314,7 @@ static void kbase_jit_free_process(struct kbase_jd_atom *katom) } } -static void kbasep_jit_free_finish_worker(struct work_struct *work) +static void kbasep_jit_finish_worker(struct work_struct *work) { struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, work); @@ -1245,11 +1330,29 @@ static void kbasep_jit_free_finish_worker(struct work_struct *work) kbase_js_sched_all(kctx->kbdev); } -static void kbase_jit_free_finish(struct kbase_jd_atom *katom) +void kbase_jit_retry_pending_alloc(struct kbase_context *kctx) { + LIST_HEAD(jit_pending_alloc_list); struct list_head *i, *tmp; + + list_splice_tail_init(&kctx->jit_pending_alloc, + &jit_pending_alloc_list); + + list_for_each_safe(i, tmp, &jit_pending_alloc_list) { + struct kbase_jd_atom *pending_atom = list_entry(i, + struct kbase_jd_atom, queue); + if (kbase_jit_allocate_process(pending_atom) == 0) { + /* Atom has completed */ + INIT_WORK(&pending_atom->work, + kbasep_jit_finish_worker); + queue_work(kctx->jctx.job_done_wq, &pending_atom->work); + } + } +} + +static void kbase_jit_free_finish(struct kbase_jd_atom *katom) +{ struct kbase_context *kctx = katom->kctx; - LIST_HEAD(jit_pending_alloc_list); u8 *ids; size_t j; @@ -1270,7 +1373,8 @@ static void kbase_jit_free_finish(struct kbase_jd_atom *katom) * still succeed this soft job but don't try and free * the allocation. */ - if (kctx->jit_alloc[ids[j]] != (struct kbase_va_region *) -1) { + if (kctx->jit_alloc[ids[j]] != + KBASE_RESERVED_REG_JIT_ALLOC) { KBASE_TLSTREAM_TL_JIT_USEDPAGES(kctx->kbdev, kctx->jit_alloc[ids[j]]-> gpu_alloc->nents, ids[j]); @@ -1282,18 +1386,7 @@ static void kbase_jit_free_finish(struct kbase_jd_atom *katom) /* Free the list of ids */ kfree(ids); - list_splice_tail_init(&kctx->jit_pending_alloc, &jit_pending_alloc_list); - - list_for_each_safe(i, tmp, &jit_pending_alloc_list) { - struct kbase_jd_atom *pending_atom = list_entry(i, - struct kbase_jd_atom, queue); - if (kbase_jit_allocate_process(pending_atom) == 0) { - /* Atom has completed */ - INIT_WORK(&pending_atom->work, - kbasep_jit_free_finish_worker); - queue_work(kctx->jctx.job_done_wq, &pending_atom->work); - } - } + kbase_jit_retry_pending_alloc(kctx); } static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c index 5e3b74d..f01291a 100644 --- a/mali_kbase/mali_kbase_vinstr.c +++ b/mali_kbase/mali_kbase_vinstr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -574,25 +574,34 @@ int kbase_vinstr_hwcnt_reader_setup( if (errcode) goto error; + /* Add the new client. No need to reschedule worker, as not periodic */ + mutex_lock(&vctx->lock); + + vctx->client_count++; + list_add(&vcli->node, &vctx->clients); + + mutex_unlock(&vctx->lock); + + /* Expose to user-space only once the client is fully initialized */ errcode = anon_inode_getfd( "[mali_vinstr_desc]", &vinstr_client_fops, vcli, O_RDONLY | O_CLOEXEC); if (errcode < 0) - goto error; + goto client_installed_error; fd = errcode; - /* Add the new client. No need to reschedule worker, as not periodic */ + return fd; + +client_installed_error: mutex_lock(&vctx->lock); - vctx->client_count++; - list_add(&vcli->node, &vctx->clients); + vctx->client_count--; + list_del(&vcli->node); mutex_unlock(&vctx->lock); - - return fd; error: kbasep_vinstr_client_destroy(vcli); return errcode; diff --git a/mali_kbase/mali_linux_trace.h b/mali_kbase/mali_linux_trace.h index 96296ac..b639764 100644 --- a/mali_kbase/mali_linux_trace.h +++ b/mali_kbase/mali_linux_trace.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,15 +20,15 @@ * */ -#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_MALI_H - #undef TRACE_SYSTEM #define TRACE_SYSTEM mali -#define TRACE_INCLUDE_FILE mali_linux_trace + +#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MALI_H #include <linux/tracepoint.h> +#if defined(CONFIG_MALI_GATOR_SUPPORT) #define MALI_JOB_SLOTS_EVENT_CHANGED /** @@ -127,12 +127,335 @@ TRACE_EVENT(mali_total_alloc_pages_change, ), TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id) ); +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + +/* + * MMU subsystem tracepoints + */ + +/* Fault status and exception code helpers + * + * Must be macros to allow use by user-side tracepoint tools + * + * bits 0:1 masked off code, and used for the level + * + * Tracepoint files get included more than once - protect against multiple + * definition + */ +#ifndef __TRACE_MALI_MMU_HELPERS +#define __TRACE_MALI_MMU_HELPERS +/* Complex macros should be enclosed in parenthesis. + * + * We need to have those parentheses removed for our arrays of symbolic look-ups + * for __print_symbolic() whilst also being able to use them outside trace code + */ +#define _ENSURE_PARENTHESIS(args...) args + +#define KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT(code) \ + (!KBASE_MMU_FAULT_CODE_VALID(code) ? "UNKNOWN,level=" : \ + __print_symbolic(((code) & ~3u), \ + KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS)) +#define KBASE_MMU_FAULT_CODE_LEVEL(code) \ + (((((code) & ~0x3u) == 0xC4) ? 4 : 0) + ((code) & 0x3u)) + +#define KBASE_MMU_FAULT_STATUS_CODE(status) \ + ((status) & 0xFFu) +#define KBASE_MMU_FAULT_STATUS_DECODED_STRING(status) \ + (((status) & (1u << 10)) ? "DECODER_FAULT" : "SLAVE_FAULT") + +#define KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(status) \ + KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT( \ + KBASE_MMU_FAULT_STATUS_CODE(status)) + +#define KBASE_MMU_FAULT_STATUS_LEVEL(status) \ + KBASE_MMU_FAULT_CODE_LEVEL(KBASE_MMU_FAULT_STATUS_CODE(status)) + +#define KBASE_MMU_FAULT_STATUS_ACCESS(status) \ + ((status) & AS_FAULTSTATUS_ACCESS_TYPE_MASK) +#define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ + {AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \ + {AS_FAULTSTATUS_ACCESS_TYPE_EX, "EXECUTE"}, \ + {AS_FAULTSTATUS_ACCESS_TYPE_READ, "READ" }, \ + {AS_FAULTSTATUS_ACCESS_TYPE_WRITE, "WRITE" }) +#define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \ + __print_symbolic(KBASE_MMU_FAULT_STATUS_ACCESS(status), \ + KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS) + +#define KBASE_MMU_FAULT_CODE_VALID(code) \ + ((code >= 0xC0 && code <= 0xEF) && \ + (!(code >= 0xC5 && code <= 0xC6)) && \ + (!(code >= 0xCC && code <= 0xCF)) && \ + (!(code >= 0xD4 && code <= 0xD7)) && \ + (!(code >= 0xDC && code <= 0xDF))) +#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ + {0xC0, "TRANSLATION_FAULT_" }, \ + {0xC4, "TRANSLATION_FAULT(_7==_IDENTITY)_" }, \ + {0xC8, "PERMISSION_FAULT_" }, \ + {0xD0, "TRANSTAB_BUS_FAULT_" }, \ + {0xD8, "ACCESS_FLAG_" }, \ + {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ + {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ + {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }, \ + {0xEC, "MEMORY_ATTRIBUTES_NONCACHEABLE_" }) +#endif /* __TRACE_MALI_MMU_HELPERS */ + +/* trace_mali_mmu_page_fault_grow + * + * Tracepoint about a successful grow of a region due to a GPU page fault + */ +TRACE_EVENT(mali_mmu_page_fault_grow, + TP_PROTO(struct kbase_va_region *reg, struct kbase_fault *fault, + size_t new_pages), + TP_ARGS(reg, fault, new_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, fault_addr) + __field(u64, fault_extra_addr) + __field(size_t, new_pages) + __field(u32, status) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->fault_addr = fault->addr; + __entry->fault_extra_addr = fault->extra_addr; + __entry->new_pages = new_pages; + __entry->status = fault->status; + ), + TP_printk("start=0x%llx fault_addr=0x%llx fault_extra_addr=0x%llx new_pages=%zu raw_fault_status=0x%x decoded_faultstatus=%s exception_type=0x%x,%s%u access_type=0x%x,%s source_id=0x%x", + __entry->start_addr, __entry->fault_addr, + __entry->fault_extra_addr, __entry->new_pages, + __entry->status, + KBASE_MMU_FAULT_STATUS_DECODED_STRING(__entry->status), + KBASE_MMU_FAULT_STATUS_CODE(__entry->status), + KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(__entry->status), + KBASE_MMU_FAULT_STATUS_LEVEL(__entry->status), + KBASE_MMU_FAULT_STATUS_ACCESS(__entry->status) >> 8, + KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(__entry->status), + __entry->status >> 16) +); + + + + +/* + * Just-in-time memory allocation subsystem tracepoints + */ + +/* Just-in-time memory allocation soft-job template. Override the TP_printk + * further if need be. jit_id can be 0. + */ +DECLARE_EVENT_CLASS(mali_jit_softjob_template, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(size_t, nr_pages) + __field(size_t, backed_pages) + __field(u8, jit_id) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->nr_pages = reg->nr_pages; + __entry->backed_pages = kbase_reg_current_backed_size(reg); + __entry->jit_id = jit_id; + ), + TP_printk("jit_id=%u start=0x%llx va_pages=0x%zx backed_size=0x%zx", + __entry->jit_id, __entry->start_addr, __entry->nr_pages, + __entry->backed_pages) +); + +/* trace_mali_jit_alloc() + * + * Tracepoint about a just-in-time memory allocation soft-job successfully + * allocating memory + */ +DEFINE_EVENT(mali_jit_softjob_template, mali_jit_alloc, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id)); + +/* trace_mali_jit_free() + * + * Tracepoint about memory that was allocated just-in-time being freed + * (which may happen either on free soft-job, or during rollback error + * paths of an allocation soft-job, etc) + * + * Free doesn't immediately have the just-in-time memory allocation ID so + * it's currently suppressed from the output - set jit_id to 0 + */ +DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id), + TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx", + __entry->start_addr, __entry->nr_pages, __entry->backed_pages)); + +#if MALI_JIT_PRESSURE_LIMIT && !MALI_USE_CSF +/* trace_mali_jit_report + * + * Tracepoint about the GPU data structure read to form a just-in-time memory + * allocation report, and its calculated physical page usage + */ +TRACE_EVENT(mali_jit_report, + TP_PROTO(struct kbase_jd_atom *katom, struct kbase_va_region *reg, + unsigned int id_idx, u64 read_val, u64 used_pages), + TP_ARGS(katom, reg, id_idx, read_val, used_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, read_val) + __field(u64, used_pages) + __field(unsigned long, flags) + __field(u8, id_idx) + __field(u8, jit_id) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->read_val = read_val; + __entry->used_pages = used_pages; + __entry->flags = reg->flags; + __entry->id_idx = id_idx; + __entry->jit_id = katom->jit_ids[id_idx]; + ), + TP_printk("start=0x%llx jit_ids[%u]=%u read_type='%s' read_val=0x%llx used_pages=%llu", + __entry->start_addr, __entry->id_idx, __entry->jit_id, + __print_symbolic(__entry->flags, + { 0, "address"}, + { KBASE_REG_TILER_ALIGN_TOP, "address with align" }, + { KBASE_REG_HEAP_INFO_IS_SIZE, "size" }, + { KBASE_REG_HEAP_INFO_IS_SIZE | + KBASE_REG_TILER_ALIGN_TOP, + "size with align (invalid)" } + ), + __entry->read_val, __entry->used_pages) +); +#endif /* MALI_JIT_PRESSURE_LIMIT && !MALI_USE_CSF */ + +TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + +#if MALI_JIT_PRESSURE_LIMIT +/* trace_mali_jit_report_pressure + * + * Tracepoint about change in physical memory pressure, due to the information + * about a region changing. Examples include: + * - a report on a region that was allocated just-in-time + * - just-in-time allocation of a region + * - free of a region that was allocated just-in-time + */ +TRACE_EVENT(mali_jit_report_pressure, + TP_PROTO(struct kbase_va_region *reg, u64 new_used_pages, + u64 new_pressure, unsigned int flags), + TP_ARGS(reg, new_used_pages, new_pressure, flags), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, used_pages) + __field(u64, new_used_pages) + __field(u64, new_pressure) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->used_pages = reg->used_pages; + __entry->new_used_pages = new_used_pages; + __entry->new_pressure = new_pressure; + __entry->flags = flags; + ), + TP_printk("start=0x%llx old_used_pages=%llu new_used_pages=%llu new_pressure=%llu report_flags=%s", + __entry->start_addr, __entry->used_pages, + __entry->new_used_pages, __entry->new_pressure, + __print_flags(__entry->flags, "|", + { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, + "HAPPENED_ON_ALLOC_OR_FREE" })) +); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +/* Tracepoint files get included more than once - protect against multiple + * definition + */ +#undef KBASE_JIT_REPORT_GPU_MEM_SIZE + +/* Size in bytes of the memory surrounding the location used for a just-in-time + * memory allocation report + */ +#define KBASE_JIT_REPORT_GPU_MEM_SIZE (4 * sizeof(u64)) + +/* trace_mali_jit_report_gpu_mem + * + * Tracepoint about the GPU memory nearby the location used for a just-in-time + * memory allocation report + */ +TRACE_EVENT(mali_jit_report_gpu_mem, + TP_PROTO(u64 base_addr, u64 reg_addr, u64 *gpu_mem, unsigned int flags), + TP_ARGS(base_addr, reg_addr, gpu_mem, flags), + TP_STRUCT__entry( + __field(u64, base_addr) + __field(u64, reg_addr) + __array(u64, mem_values, + KBASE_JIT_REPORT_GPU_MEM_SIZE / sizeof(u64)) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->base_addr = base_addr; + __entry->reg_addr = reg_addr; + memcpy(__entry->mem_values, gpu_mem, + sizeof(__entry->mem_values)); + __entry->flags = flags; + ), + TP_printk("start=0x%llx read GPU memory base=0x%llx values=%s report_flags=%s", + __entry->reg_addr, __entry->base_addr, + __print_array(__entry->mem_values, + ARRAY_SIZE(__entry->mem_values), sizeof(u64)), + __print_flags(__entry->flags, "|", + { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, + "HAPPENED_ON_ALLOC_OR_FREE" })) +); + +/* trace_mali_jit_trim_from_region + * + * Tracepoint about trimming physical pages from a region + */ +TRACE_EVENT(mali_jit_trim_from_region, + TP_PROTO(struct kbase_va_region *reg, size_t freed_pages, + size_t old_pages, size_t available_pages, size_t new_pages), + TP_ARGS(reg, freed_pages, old_pages, available_pages, new_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(size_t, freed_pages) + __field(size_t, old_pages) + __field(size_t, available_pages) + __field(size_t, new_pages) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->freed_pages = freed_pages; + __entry->old_pages = old_pages; + __entry->available_pages = available_pages; + __entry->new_pages = new_pages; + ), + TP_printk("start=0x%llx freed_pages=%zu old_pages=%zu available_pages=%zu new_pages=%zu", + __entry->start_addr, __entry->freed_pages, __entry->old_pages, + __entry->available_pages, __entry->new_pages) +); + +/* trace_mali_jit_trim + * + * Tracepoint about total trimmed physical pages + */ +TRACE_EVENT(mali_jit_trim, + TP_PROTO(size_t freed_pages), + TP_ARGS(freed_pages), + TP_STRUCT__entry( + __field(size_t, freed_pages) + ), + TP_fast_assign( + __entry->freed_pages = freed_pages; + ), + TP_printk("freed_pages=%zu", __entry->freed_pages) +); #endif /* _TRACE_MALI_H */ #undef TRACE_INCLUDE_PATH -#undef linux #define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_linux_trace /* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c index fd60e35..46800fe 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,7 @@ #include <mali_kbase_hwaccess_jm.h> #include <backend/gpu/mali_kbase_device_internal.h> #include <mali_kbase_as_fault_debugfs.h> +#include "../mali_kbase_mmu_internal.h" void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup * const setup) @@ -191,6 +192,10 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, { lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, + "Entering %s kctx %p, as %p\n", + __func__, (void *)kctx, (void *)as); + if (!kctx) { dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", kbase_as_has_bus_fault(as, fault) ? @@ -254,6 +259,10 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); atomic_inc(&kbdev->faults_pending); } + + dev_dbg(kbdev->dev, + "Leaving %s kctx %p, as %p\n", + __func__, (void *)kctx, (void *)as); } static void validate_protected_page_fault(struct kbase_device *kbdev) @@ -285,12 +294,14 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) const unsigned long as_bit_mask = (1UL << num_as) - 1; unsigned long flags; u32 new_mask; - u32 tmp; + u32 tmp, bf_bits, pf_bits; + dev_dbg(kbdev->dev, "Entering %s irq_stat %u\n", + __func__, irq_stat); /* bus faults */ - u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; + bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; /* page faults (note: Ignore ASes with both pf and bf) */ - u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; + pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; if (WARN_ON(kbdev == NULL)) return; @@ -388,4 +399,16 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) new_mask |= tmp; kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + + dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n", + __func__, irq_stat); +} + +int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, + struct kbase_va_region *const reg) +{ + dev_dbg(kctx->kbdev->dev, + "Switching to incremental rendering for region %p\n", + (void *)reg); + return kbase_job_slot_softstop_start_rp(kctx, reg); } diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index 5392305..c4bea39 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,6 +42,7 @@ #include <mali_kbase_reset_gpu.h> #include <mmu/mali_kbase_mmu.h> #include <mmu/mali_kbase_mmu_internal.h> +#include <mali_kbase_cs_experimental.h> #define KBASE_MMU_PAGE_ENTRIES 512 @@ -534,6 +535,8 @@ void page_fault_worker(struct work_struct *data) bool grow_2mb_pool; struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; int i; + size_t current_backed_size; + faulting_as = container_of(data, struct kbase_as, work_pagefault); fault = &faulting_as->pf_data; @@ -541,6 +544,9 @@ void page_fault_worker(struct work_struct *data) as_no = faulting_as->number; kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + dev_dbg(kbdev->dev, + "Entering %s %p, fault_pfn %lld, as_no %d\n", + __func__, (void *)data, fault_pfn, as_no); /* Grab the context that was already refcounted in kbase_mmu_interrupt() * Therefore, it cannot be scheduled out of this AS until we explicitly @@ -684,11 +690,14 @@ page_fault_retry: */ fault_rel_pfn = fault_pfn - region->start_pfn; - if (fault_rel_pfn < kbase_reg_current_backed_size(region)) { - dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", + current_backed_size = kbase_reg_current_backed_size(region); + + if (fault_rel_pfn < current_backed_size) { + dev_dbg(kbdev->dev, + "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", fault->addr, region->start_pfn, region->start_pfn + - kbase_reg_current_backed_size(region)); + current_backed_size); mutex_lock(&kbdev->mmu_hw_mutex); @@ -717,8 +726,9 @@ page_fault_retry: new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn); /* cap to max vsize */ - new_pages = min(new_pages, region->nr_pages - - kbase_reg_current_backed_size(region)); + new_pages = min(new_pages, region->nr_pages - current_backed_size); + dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n", + new_pages); if (new_pages == 0) { mutex_lock(&kbdev->mmu_hw_mutex); @@ -750,8 +760,8 @@ page_fault_retry: u32 op; /* alloc success */ - KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) - <= region->nr_pages); + WARN_ON(kbase_reg_current_backed_size(region) > + region->nr_pages); /* set up the new pages */ pfn_offset = kbase_reg_current_backed_size(region) - new_pages; @@ -783,6 +793,29 @@ page_fault_retry: } KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, (u64)new_pages); + trace_mali_mmu_page_fault_grow(region, fault, new_pages); + +#if MALI_INCREMENTAL_RENDERING + /* Switch to incremental rendering if we have nearly run out of + * memory in a JIT memory allocation. + */ + if (region->threshold_pages && + kbase_reg_current_backed_size(region) > + region->threshold_pages) { + + dev_dbg(kctx->kbdev->dev, + "%zu pages exceeded IR threshold %zu\n", + new_pages + current_backed_size, + region->threshold_pages); + + if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { + dev_dbg(kctx->kbdev->dev, + "Get region %p for IR\n", + (void *)region); + kbase_va_region_alloc_get(kctx, region); + } + } +#endif /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); @@ -871,6 +904,7 @@ page_fault_retry: kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Page allocation failure", fault); } else { + dev_dbg(kbdev->dev, "Try again after pool_grow\n"); goto page_fault_retry; } } @@ -886,6 +920,7 @@ fault_done: kbasep_js_runpool_release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); + dev_dbg(kbdev->dev, "Leaving page_fault_worker %p\n", (void *)data); } static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, diff --git a/mali_kbase/mmu/mali_kbase_mmu_internal.h b/mali_kbase/mmu/mali_kbase_mmu_internal.h index 54b0c35..28bd341 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_internal.h +++ b/mali_kbase/mmu/mali_kbase_mmu_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,4 +46,18 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as, struct kbase_fault *fault); +/** + * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible + * @kctx The kbase_context for the faulting address space. + * @reg Reference of a growable GPU memory region in the same context. + * Takes ownership of the reference if successful. + * + * Used to switch to incremental rendering if we have nearly run out of + * virtual address space in a growable memory region. + * + * Return 0 if successful, otherwise a negative error code. + */ +int kbase_mmu_switch_to_ir(struct kbase_context *kctx, + struct kbase_va_region *reg); + #endif /* _KBASE_MMU_INTERNAL_H_ */ diff --git a/mali_kbase/tests/kutf/build.bp b/mali_kbase/tests/kutf/build.bp index f0c7a0c..32eab14 100644 --- a/mali_kbase/tests/kutf/build.bp +++ b/mali_kbase/tests/kutf/build.bp @@ -1,13 +1,16 @@ /* - * Copyright: - * ---------------------------------------------------------------------------- - * This confidential and proprietary software may be used only as authorized - * by a licensing agreement from ARM Limited. - * (C) COPYRIGHT 2018-2019 ARM Limited, ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorized copies and - * copies may only be made to the extent permitted by a licensing agreement - * from ARM Limited. - * ---------------------------------------------------------------------------- + * + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * */ bob_kernel_module { diff --git a/mali_kbase/tests/kutf/kutf_suite.c b/mali_kbase/tests/kutf/kutf_suite.c index 3307c0e..3f15669 100644 --- a/mali_kbase/tests/kutf/kutf_suite.c +++ b/mali_kbase/tests/kutf/kutf_suite.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2017-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -634,6 +634,17 @@ static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix) kfree(test_fix); } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) +/* Adapting to the upstream debugfs_create_x32() change */ +static int ktufp_u32_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(kutfp_fops_x32_ro, ktufp_u32_get, NULL, "0x%08llx\n"); +#endif + void kutf_add_test_with_filters_and_data( struct kutf_suite *suite, unsigned int id, @@ -668,8 +679,13 @@ void kutf_add_test_with_filters_and_data( } test_func->filters = filters; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) + tmp = debugfs_create_file_unsafe("filters", S_IROTH, test_func->dir, + &test_func->filters, &kutfp_fops_x32_ro); +#else tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir, &test_func->filters); +#endif if (!tmp) { pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name); goto fail_file; diff --git a/mali_kbase/tests/mali_kutf_irq_test/build.bp b/mali_kbase/tests/mali_kutf_irq_test/build.bp index 971f092..90efdcf 100644 --- a/mali_kbase/tests/mali_kutf_irq_test/build.bp +++ b/mali_kbase/tests/mali_kutf_irq_test/build.bp @@ -1,13 +1,16 @@ /* - * Copyright: - * ---------------------------------------------------------------------------- - * This confidential and proprietary software may be used only as authorized - * by a licensing agreement from ARM Limited. - * (C) COPYRIGHT 2018-2019 ARM Limited, ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorized copies and - * copies may only be made to the extent permitted by a licensing agreement - * from ARM Limited. - * ---------------------------------------------------------------------------- + * + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * */ bob_kernel_module { diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c index 201b30e..5d073be 100644 --- a/mali_kbase/tl/mali_kbase_timeline.c +++ b/mali_kbase/tl/mali_kbase_timeline.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -131,6 +131,7 @@ int kbase_timeline_init(struct kbase_timeline **timeline, kbasep_timeline_autoflush_timer_callback); result->is_enabled = timeline_is_enabled; + *timeline = result; return 0; } @@ -142,6 +143,7 @@ void kbase_timeline_term(struct kbase_timeline *timeline) if (!timeline) return; + for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++) kbase_tlstream_term(&timeline->streams[i]); diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c index 9a899f2..6e09a17 100644 --- a/mali_kbase/tl/mali_kbase_timeline_io.c +++ b/mali_kbase/tl/mali_kbase_timeline_io.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -85,6 +85,43 @@ static int kbasep_timeline_io_packet_pending( } /** + * copy_stream_header() - copy timeline stream header. + * + * @buffer: Pointer to the buffer provided by user. + * @size: Maximum amount of data that can be stored in the buffer. + * @copy_len: Pointer to amount of bytes that has been copied already + * within the read system call. + * @hdr: Pointer to the stream header. + * @hdr_size: Header size. + * @hdr_btc: Pointer to the remaining number of bytes to copy. + * + * Returns: 0 if success, -1 otherwise. + */ +static inline int copy_stream_header( + char __user *buffer, size_t size, ssize_t *copy_len, + const char *hdr, + size_t hdr_size, + size_t *hdr_btc) +{ + const size_t offset = hdr_size - *hdr_btc; + const size_t copy_size = MIN(size - *copy_len, *hdr_btc); + + if (!*hdr_btc) + return 0; + + if (WARN_ON(*hdr_btc > hdr_size)) + return -1; + + if (copy_to_user(&buffer[*copy_len], &hdr[offset], copy_size)) + return -1; + + *hdr_btc -= copy_size; + *copy_len += copy_size; + + return 0; +} + +/** * kbasep_timeline_copy_header - copy timeline headers to the user * @timeline: Timeline instance * @buffer: Pointer to the buffer provided by user @@ -93,51 +130,28 @@ static int kbasep_timeline_io_packet_pending( * within the read system call. * * This helper function checks if timeline headers have not been sent - * to the user, and if so, sends them. @ref copy_len is respectively + * to the user, and if so, sends them. copy_len is respectively * updated. * * Returns: 0 if success, -1 if copy_to_user has failed. */ -static inline int kbasep_timeline_copy_header( +static inline int kbasep_timeline_copy_headers( struct kbase_timeline *timeline, char __user *buffer, size_t size, ssize_t *copy_len) { - if (timeline->obj_header_btc) { - size_t offset = obj_desc_header_size - - timeline->obj_header_btc; - - size_t header_cp_size = MIN( - size - *copy_len, - timeline->obj_header_btc); - - if (copy_to_user( - &buffer[*copy_len], - &obj_desc_header[offset], - header_cp_size)) - return -1; - - timeline->obj_header_btc -= header_cp_size; - *copy_len += header_cp_size; - } - - if (timeline->aux_header_btc) { - size_t offset = aux_desc_header_size - - timeline->aux_header_btc; - size_t header_cp_size = MIN( - size - *copy_len, - timeline->aux_header_btc); - - if (copy_to_user( - &buffer[*copy_len], - &aux_desc_header[offset], - header_cp_size)) - return -1; - - timeline->aux_header_btc -= header_cp_size; - *copy_len += header_cp_size; - } + if (copy_stream_header(buffer, size, copy_len, + obj_desc_header, + obj_desc_header_size, + &timeline->obj_header_btc)) + return -1; + + if (copy_stream_header(buffer, size, copy_len, + aux_desc_header, + aux_desc_header_size, + &timeline->aux_header_btc)) + return -1; return 0; } @@ -183,7 +197,7 @@ static ssize_t kbasep_timeline_io_read( unsigned int rb_idx; size_t rb_size; - if (kbasep_timeline_copy_header( + if (kbasep_timeline_copy_headers( timeline, buffer, size, ©_len)) { copy_len = -EFAULT; break; @@ -305,6 +319,7 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) timeline = (struct kbase_timeline *) filp->private_data; + /* Stop autoflush timer before releasing access to streams. */ atomic_set(&timeline->autoflush_timer_active, 0); del_timer_sync(&timeline->autoflush_timer); diff --git a/mali_kbase/tl/mali_kbase_timeline_priv.h b/mali_kbase/tl/mali_kbase_timeline_priv.h index d4c4773..73499ce 100644 --- a/mali_kbase/tl/mali_kbase_timeline_priv.h +++ b/mali_kbase/tl/mali_kbase_timeline_priv.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,6 +26,7 @@ #include <mali_kbase.h> #include "mali_kbase_tlstream.h" + #include <linux/timer.h> #include <linux/atomic.h> #include <linux/mutex.h> diff --git a/mali_kbase/tl/mali_kbase_tlstream.c b/mali_kbase/tl/mali_kbase_tlstream.c index 2a76bc0..bec4be7 100644 --- a/mali_kbase/tl/mali_kbase_tlstream.c +++ b/mali_kbase/tl/mali_kbase_tlstream.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -106,20 +106,31 @@ void kbase_tlstream_reset(struct kbase_tlstream *stream) atomic_set(&stream->rbi, 0); } -/* Configuration of timeline streams generated by kernel. - * Kernel emit only streams containing either timeline object events or - * auxiliary events. All streams have stream id value of 1 (as opposed to user - * space streams that have value of 0). - */ +/* Configuration of timeline streams generated by kernel. */ static const struct { enum tl_packet_family pkt_family; enum tl_packet_class pkt_class; enum tl_packet_type pkt_type; - unsigned int stream_id; + enum tl_stream_id stream_id; } tl_stream_cfg[TL_STREAM_TYPE_COUNT] = { - {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1}, - {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY, 1}, - {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY, 1} + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_OBJ, + TL_PACKET_TYPE_SUMMARY, + TL_STREAM_ID_KERNEL, + }, + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_OBJ, + TL_PACKET_TYPE_BODY, + TL_STREAM_ID_KERNEL, + }, + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_AUX, + TL_PACKET_TYPE_BODY, + TL_STREAM_ID_KERNEL, + }, }; void kbase_tlstream_init( diff --git a/mali_kbase/tl/mali_kbase_tlstream.h b/mali_kbase/tl/mali_kbase_tlstream.h index 5797738..427bb09 100644 --- a/mali_kbase/tl/mali_kbase_tlstream.h +++ b/mali_kbase/tl/mali_kbase_tlstream.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -97,7 +97,6 @@ enum tl_stream_type { TL_STREAM_TYPE_OBJ_SUMMARY = TL_STREAM_TYPE_FIRST, TL_STREAM_TYPE_OBJ, TL_STREAM_TYPE_AUX, - TL_STREAM_TYPE_COUNT }; diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c index bae95b4..b028ef8 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.c +++ b/mali_kbase/tl/mali_kbase_tracepoints.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -118,6 +118,7 @@ enum tl_msg_id_obj { KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, + KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, KBASE_OBJ_MSG_COUNT, }; @@ -136,404 +137,410 @@ enum tl_msg_id_aux { KBASE_AUX_MSG_COUNT, }; -#define OBJ_TL_LIST \ - TP_DESC(KBASE_TL_NEW_CTX, \ +#define OBJ_TP_LIST \ + TRACEPOINT_DESC(KBASE_TL_NEW_CTX, \ "object ctx is created", \ "@pII", \ "ctx,ctx_nr,tgid") \ - TP_DESC(KBASE_TL_NEW_GPU, \ + TRACEPOINT_DESC(KBASE_TL_NEW_GPU, \ "object gpu is created", \ "@pII", \ "gpu,gpu_id,core_count") \ - TP_DESC(KBASE_TL_NEW_LPU, \ + TRACEPOINT_DESC(KBASE_TL_NEW_LPU, \ "object lpu is created", \ "@pII", \ "lpu,lpu_nr,lpu_fn") \ - TP_DESC(KBASE_TL_NEW_ATOM, \ + TRACEPOINT_DESC(KBASE_TL_NEW_ATOM, \ "object atom is created", \ "@pI", \ "atom,atom_nr") \ - TP_DESC(KBASE_TL_NEW_AS, \ + TRACEPOINT_DESC(KBASE_TL_NEW_AS, \ "address space object is created", \ "@pI", \ "address_space,as_nr") \ - TP_DESC(KBASE_TL_DEL_CTX, \ + TRACEPOINT_DESC(KBASE_TL_DEL_CTX, \ "context is destroyed", \ "@p", \ "ctx") \ - TP_DESC(KBASE_TL_DEL_ATOM, \ + TRACEPOINT_DESC(KBASE_TL_DEL_ATOM, \ "atom is destroyed", \ "@p", \ "atom") \ - TP_DESC(KBASE_TL_LIFELINK_LPU_GPU, \ + TRACEPOINT_DESC(KBASE_TL_LIFELINK_LPU_GPU, \ "lpu is deleted with gpu", \ "@pp", \ "lpu,gpu") \ - TP_DESC(KBASE_TL_LIFELINK_AS_GPU, \ + TRACEPOINT_DESC(KBASE_TL_LIFELINK_AS_GPU, \ "address space is deleted with gpu", \ "@pp", \ "address_space,gpu") \ - TP_DESC(KBASE_TL_RET_CTX_LPU, \ + TRACEPOINT_DESC(KBASE_TL_RET_CTX_LPU, \ "context is retained by lpu", \ "@pp", \ "ctx,lpu") \ - TP_DESC(KBASE_TL_RET_ATOM_CTX, \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_CTX, \ "atom is retained by context", \ "@pp", \ "atom,ctx") \ - TP_DESC(KBASE_TL_RET_ATOM_LPU, \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_LPU, \ "atom is retained by lpu", \ "@pps", \ "atom,lpu,attrib_match_list") \ - TP_DESC(KBASE_TL_NRET_CTX_LPU, \ + TRACEPOINT_DESC(KBASE_TL_NRET_CTX_LPU, \ "context is released by lpu", \ "@pp", \ "ctx,lpu") \ - TP_DESC(KBASE_TL_NRET_ATOM_CTX, \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_CTX, \ "atom is released by context", \ "@pp", \ "atom,ctx") \ - TP_DESC(KBASE_TL_NRET_ATOM_LPU, \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_LPU, \ "atom is released by lpu", \ "@pp", \ "atom,lpu") \ - TP_DESC(KBASE_TL_RET_AS_CTX, \ + TRACEPOINT_DESC(KBASE_TL_RET_AS_CTX, \ "address space is retained by context", \ "@pp", \ "address_space,ctx") \ - TP_DESC(KBASE_TL_NRET_AS_CTX, \ + TRACEPOINT_DESC(KBASE_TL_NRET_AS_CTX, \ "address space is released by context", \ "@pp", \ "address_space,ctx") \ - TP_DESC(KBASE_TL_RET_ATOM_AS, \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_AS, \ "atom is retained by address space", \ "@pp", \ "atom,address_space") \ - TP_DESC(KBASE_TL_NRET_ATOM_AS, \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_AS, \ "atom is released by address space", \ "@pp", \ "atom,address_space") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \ "atom job slot attributes", \ "@pLLI", \ "atom,descriptor,affinity,config") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \ "atom priority", \ "@pI", \ "atom,prio") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \ "atom state", \ "@pI", \ "atom,state") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \ "atom caused priority change", \ "@p", \ "atom") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \ "jit done for atom", \ "@pLLILILLL", \ "atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \ - TP_DESC(KBASE_TL_JIT_USEDPAGES, \ + TRACEPOINT_DESC(KBASE_TL_JIT_USEDPAGES, \ "used pages for jit", \ "@LI", \ "used_pages,j_id") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \ "Information about JIT allocations", \ "@pLLLIIIII", \ "atom,va_pgs,com_pgs,extent,j_id,bin_id,max_allocs,jit_flags,usg_id") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \ "Information about JIT frees", \ "@pI", \ "atom,j_id") \ - TP_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \ "address space attributes", \ "@pLLL", \ "address_space,transtab,memattr,transcfg") \ - TP_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \ + TRACEPOINT_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \ "softstop event on given lpu", \ "@p", \ "lpu") \ - TP_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \ "atom softstopped", \ "@p", \ "atom") \ - TP_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \ "atom softstop issued", \ "@p", \ "atom") \ - TP_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \ "atom soft job has started", \ "@p", \ "atom") \ - TP_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \ "atom soft job has completed", \ "@p", \ "atom") \ - TP_DESC(KBASE_JD_GPU_SOFT_RESET, \ + TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \ "gpu soft reset", \ "@p", \ "gpu") \ - TP_DESC(KBASE_TL_KBASE_NEW_DEVICE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \ "New KBase Device", \ "@III", \ "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs") \ - TP_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \ "CSG is programmed to a slot", \ "@III", \ "kbase_device_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index") \ - TP_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \ "CSG is deprogrammed from a slot", \ "@II", \ "kbase_device_id,kbase_device_csg_slot_index") \ - TP_DESC(KBASE_TL_KBASE_NEW_CTX, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \ "New KBase Context", \ "@II", \ "kernel_ctx_id,kbase_device_id") \ - TP_DESC(KBASE_TL_KBASE_DEL_CTX, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_CTX, \ "Delete KBase Context", \ "@I", \ "kernel_ctx_id") \ - TP_DESC(KBASE_TL_KBASE_NEW_KCPUQUEUE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_KCPUQUEUE, \ "New KCPU Queue", \ "@pII", \ "kcpu_queue,kernel_ctx_id,kcpuq_num_pending_cmds") \ - TP_DESC(KBASE_TL_KBASE_DEL_KCPUQUEUE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_KCPUQUEUE, \ "Delete KCPU Queue", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \ "KCPU Queue enqueues Signal on Fence", \ "@pp", \ "kcpu_queue,fence") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \ "KCPU Queue enqueues Wait on Fence", \ "@pp", \ "kcpu_queue,fence") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ "Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ "Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ "@pLI", \ "kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ "End array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \ "Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \ "Array item of KCPU Queue enqueues Set on Cross Queue Sync Object", \ "@pL", \ "kcpu_queue,cqs_obj_gpu_addr") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \ "End array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ "Begin array of KCPU Queue enqueues Debug Copy", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ "Array item of KCPU Queue enqueues Debug Copy", \ "@pL", \ "kcpu_queue,debugcopy_dst_size") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ "End array of KCPU Queue enqueues Debug Copy", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \ "KCPU Queue enqueues Map Import", \ "@pL", \ "kcpu_queue,map_import_buf_gpu_addr") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \ "KCPU Queue enqueues Unmap Import", \ "@pL", \ "kcpu_queue,map_import_buf_gpu_addr") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, \ "KCPU Queue enqueues Unmap Import ignoring reference count", \ "@pL", \ "kcpu_queue,map_import_buf_gpu_addr") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ "Begin array of KCPU Queue enqueues JIT Alloc", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ "Array item of KCPU Queue enqueues JIT Alloc", \ "@pLLLLIIIII", \ "kcpu_queue,jit_alloc_gpu_alloc_addr_dest,jit_alloc_va_pages,jit_alloc_commit_pages,jit_alloc_extent,jit_alloc_jit_id,jit_alloc_bin_id,jit_alloc_max_allocations,jit_alloc_flags,jit_alloc_usage_id") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ "End array of KCPU Queue enqueues JIT Alloc", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \ "Begin array of KCPU Queue enqueues JIT Free", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \ "Array item of KCPU Queue enqueues JIT Free", \ "@pI", \ "kcpu_queue,jit_alloc_jit_id") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \ "End array of KCPU Queue enqueues JIT Free", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \ "KCPU Queue starts a Signal on Fence", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \ "KCPU Queue ends a Signal on Fence", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \ "KCPU Queue starts a Wait on Fence", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \ "KCPU Queue ends a Wait on Fence", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \ "KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \ "KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \ "KCPU Queue executes a Set on an array of Cross Queue Sync Objects", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \ "KCPU Queue starts an array of Debug Copys", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \ "KCPU Queue ends an array of Debug Copys", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ "KCPU Queue starts a Map Import", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \ "KCPU Queue ends a Map Import", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \ "KCPU Queue starts an Unmap Import", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \ "KCPU Queue ends an Unmap Import", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \ "KCPU Queue starts an Unmap Import ignoring reference count", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \ "KCPU Queue ends an Unmap Import ignoring reference count", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \ "KCPU Queue starts an array of JIT Allocs", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ "Begin array of KCPU Queue ends an array of JIT Allocs", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ "Array item of KCPU Queue ends an array of JIT Allocs", \ "@pLL", \ "kcpu_queue,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ "End array of KCPU Queue ends an array of JIT Allocs", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, \ "KCPU Queue starts an array of JIT Frees", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ "Begin array of KCPU Queue ends an array of JIT Frees", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ "Array item of KCPU Queue ends an array of JIT Frees", \ "@pL", \ "kcpu_queue,jit_free_pages_used") \ - TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ "End array of KCPU Queue ends an array of JIT Frees", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, \ "KCPU Queue executes an Error Barrier", \ "@p", \ "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \ + "An overflow has happened with the CSFFW Timeline stream", \ + "@LL", \ + "csffw_timestamp,csffw_cycle") \ -#define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header -#define MIPE_HEADER_TP_LIST OBJ_TL_LIST -#define MIPE_HEADER_TP_LIST_COUNT KBASE_OBJ_MSG_COUNT -#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_OBJ +#define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header +#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL +#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_OBJ +#define MIPE_HEADER_TRACEPOINT_LIST OBJ_TP_LIST +#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_OBJ_MSG_COUNT #include "mali_kbase_mipe_gen_header.h" const char *obj_desc_header = (const char *) &__obj_desc_header; const size_t obj_desc_header_size = sizeof(__obj_desc_header); -#define AUX_TL_LIST \ - TP_DESC(KBASE_AUX_PM_STATE, \ +#define AUX_TP_LIST \ + TRACEPOINT_DESC(KBASE_AUX_PM_STATE, \ "PM state", \ "@IL", \ "core_type,core_state_bitset") \ - TP_DESC(KBASE_AUX_PAGEFAULT, \ + TRACEPOINT_DESC(KBASE_AUX_PAGEFAULT, \ "Page fault", \ "@IIL", \ "ctx_nr,as_nr,page_cnt_change") \ - TP_DESC(KBASE_AUX_PAGESALLOC, \ + TRACEPOINT_DESC(KBASE_AUX_PAGESALLOC, \ "Total alloc pages change", \ "@IL", \ "ctx_nr,page_cnt") \ - TP_DESC(KBASE_AUX_DEVFREQ_TARGET, \ + TRACEPOINT_DESC(KBASE_AUX_DEVFREQ_TARGET, \ "New device frequency target", \ "@L", \ "target_freq") \ - TP_DESC(KBASE_AUX_PROTECTED_ENTER_START, \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_START, \ "enter protected mode start", \ "@p", \ "gpu") \ - TP_DESC(KBASE_AUX_PROTECTED_ENTER_END, \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_END, \ "enter protected mode end", \ "@p", \ "gpu") \ - TP_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \ "leave protected mode start", \ "@p", \ "gpu") \ - TP_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \ "leave protected mode end", \ "@p", \ "gpu") \ - TP_DESC(KBASE_AUX_JIT_STATS, \ + TRACEPOINT_DESC(KBASE_AUX_JIT_STATS, \ "per-bin JIT statistics", \ "@IIIIII", \ "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \ - TP_DESC(KBASE_AUX_EVENT_JOB_SLOT, \ + TRACEPOINT_DESC(KBASE_AUX_EVENT_JOB_SLOT, \ "event on a given job slot", \ "@pIII", \ "ctx,slot_nr,atom_nr,event") \ -#define MIPE_HEADER_BLOB_VAR_NAME __aux_desc_header -#define MIPE_HEADER_TP_LIST AUX_TL_LIST -#define MIPE_HEADER_TP_LIST_COUNT KBASE_AUX_MSG_COUNT -#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_AUX +#define MIPE_HEADER_BLOB_VAR_NAME __aux_desc_header +#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL +#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_AUX +#define MIPE_HEADER_TRACEPOINT_LIST AUX_TP_LIST +#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_AUX_MSG_COUNT #include "mali_kbase_mipe_gen_header.h" @@ -2988,4 +2995,30 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + struct kbase_tlstream *stream, + u64 csffw_timestamp, + u64 csffw_cycle) +{ + const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(csffw_timestamp) + + sizeof(csffw_cycle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_timestamp, sizeof(csffw_timestamp)); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_cycle, sizeof(csffw_cycle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + /* clang-format on */ diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h index b2c20ae..fa2c399 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.h +++ b/mali_kbase/tl/mali_kbase_tracepoints.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -454,6 +454,10 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( struct kbase_tlstream *stream, const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + struct kbase_tlstream *stream, + u64 csffw_timestamp, + u64 csffw_cycle); struct kbase_tlstream; @@ -2467,6 +2471,27 @@ struct kbase_tlstream; kcpu_queue); \ } while (0) +/** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW - + * An overflow has happened with the CSFFW Timeline stream + * + * @kbdev: Kbase device + * @csffw_timestamp: Timestamp of a CSFFW event + * @csffw_cycle: Cycle number of a CSFFW event + */ +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ + kbdev, \ + csffw_timestamp, \ + csffw_cycle \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_is_enabled); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + csffw_timestamp, csffw_cycle); \ + } while (0) + /* Gator tracepoints are hooked into TLSTREAM interface. * When the following tracepoints are called, corresponding |