summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mali_kbase/Kbuild19
-rw-r--r--mali_kbase/Kconfig17
-rw-r--r--mali_kbase/backend/gpu/Kbuild2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_devfreq.c2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_device_internal.h4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_backend.c19
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_defs.h5
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c163
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_internal.h2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c31
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_defs.h17
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c32
-rw-r--r--mali_kbase/build.bp24
-rw-r--r--mali_kbase/context/backend/mali_kbase_context_jm.c12
-rw-r--r--mali_kbase/context/mali_kbase_context.c6
-rw-r--r--mali_kbase/context/mali_kbase_context.h2
-rw-r--r--mali_kbase/device/mali_kbase_device.c9
-rw-r--r--mali_kbase/device/mali_kbase_device.h9
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_coherency.h2
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_fault.h13
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_id.h4
-rw-r--r--mali_kbase/jm/mali_base_jm_kernel.h1002
-rw-r--r--mali_kbase/jm/mali_kbase_jm_defs.h807
-rw-r--r--mali_kbase/jm/mali_kbase_jm_ioctl.h134
-rw-r--r--mali_kbase/jm/mali_kbase_jm_js.h892
-rw-r--r--mali_kbase/mali_base_hwconfig_issues.h75
-rw-r--r--mali_kbase/mali_base_kernel.h1365
-rw-r--r--mali_kbase/mali_base_mem_priv.h4
-rw-r--r--mali_kbase/mali_kbase.h44
-rw-r--r--mali_kbase/mali_kbase_10969_workaround.c209
-rw-r--r--mali_kbase/mali_kbase_10969_workaround.h37
-rw-r--r--mali_kbase/mali_kbase_config_defaults.h11
-rw-r--r--mali_kbase/mali_kbase_core_linux.c86
-rw-r--r--mali_kbase/mali_kbase_cs_experimental.h36
-rw-r--r--mali_kbase/mali_kbase_ctx_sched.c12
-rw-r--r--mali_kbase/mali_kbase_defs.h797
-rw-r--r--mali_kbase/mali_kbase_disjoint_events.c2
-rw-r--r--mali_kbase/mali_kbase_dma_fence.c4
-rw-r--r--mali_kbase/mali_kbase_dummy_job_wa.c1
-rw-r--r--mali_kbase/mali_kbase_dummy_job_wa.h2
-rw-r--r--mali_kbase/mali_kbase_event.c17
-rw-r--r--mali_kbase/mali_kbase_fence.c4
-rw-r--r--mali_kbase/mali_kbase_fence.h4
-rw-r--r--mali_kbase/mali_kbase_gpuprops.c28
-rw-r--r--mali_kbase/mali_kbase_gpuprops.h6
-rw-r--r--mali_kbase/mali_kbase_gpuprops_types.h4
-rw-r--r--mali_kbase/mali_kbase_gwt.c2
-rw-r--r--mali_kbase/mali_kbase_hw.c5
-rw-r--r--mali_kbase/mali_kbase_hwaccess_instr.h11
-rw-r--r--mali_kbase/mali_kbase_hwaccess_jm.h2
-rw-r--r--mali_kbase/mali_kbase_hwcnt.c13
-rw-r--r--mali_kbase/mali_kbase_ioctl.h174
-rw-r--r--mali_kbase/mali_kbase_jd.c415
-rw-r--r--mali_kbase/mali_kbase_jm.c18
-rw-r--r--mali_kbase/mali_kbase_js.c1115
-rw-r--r--mali_kbase/mali_kbase_js.h872
-rw-r--r--mali_kbase/mali_kbase_js_defs.h50
-rw-r--r--mali_kbase/mali_kbase_mem.c391
-rw-r--r--mali_kbase/mali_kbase_mem.h211
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c115
-rw-r--r--mali_kbase/mali_kbase_mem_linux.h31
-rw-r--r--mali_kbase/mali_kbase_mipe_gen_header.h205
-rw-r--r--mali_kbase/mali_kbase_mipe_proto.h16
-rw-r--r--mali_kbase/mali_kbase_pm.c2
-rw-r--r--mali_kbase/mali_kbase_smc.c2
-rw-r--r--mali_kbase/mali_kbase_softjobs.c193
-rw-r--r--mali_kbase/mali_kbase_vinstr.c23
-rw-r--r--mali_kbase/mali_linux_trace.h335
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_jm.c31
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c51
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_internal.h16
-rw-r--r--mali_kbase/tests/kutf/build.bp21
-rw-r--r--mali_kbase/tests/kutf/kutf_suite.c18
-rw-r--r--mali_kbase/tests/mali_kutf_irq_test/build.bp21
-rw-r--r--mali_kbase/tl/mali_kbase_timeline.c4
-rw-r--r--mali_kbase/tl/mali_kbase_timeline_io.c91
-rw-r--r--mali_kbase/tl/mali_kbase_timeline_priv.h3
-rw-r--r--mali_kbase/tl/mali_kbase_tlstream.c31
-rw-r--r--mali_kbase/tl/mali_kbase_tlstream.h3
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.c245
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.h27
81 files changed, 6856 insertions, 3884 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 400ebe0..7abe8d3 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -21,7 +21,7 @@
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r23p0-01rel0"
+MALI_RELEASE_NAME ?= "r24p0-01rel0"
# Paths required for build
KBASE_PATH = $(src)
@@ -34,8 +34,15 @@ MALI_USE_CSF ?= 0
MALI_UNIT_TEST ?= 0
MALI_KERNEL_TEST_API ?= 0
MALI_COVERAGE ?= 0
-MALI_CS_EXPERIMENTAL ?= 0
CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
+# Experimental features (corresponding -D definition should be appended to
+# DEFINES below, e.g. for MALI_EXPERIMENTAL_FEATURE,
+# -DMALI_EXPERIMENTAL_FEATURE=$(MALI_EXPERIMENTAL_FEATURE) should be appended)
+#
+# Experimental features must default to disabled, e.g.:
+# MALI_EXPERIMENTAL_FEATURE ?= 0
+MALI_JIT_PRESSURE_LIMIT ?= 0
+MALI_INCREMENTAL_RENDERING ?= 0
# Set up our defines, which will be passed to gcc
DEFINES = \
@@ -45,7 +52,8 @@ DEFINES = \
-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
-DMALI_COVERAGE=$(MALI_COVERAGE) \
-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
- -DMALI_CS_EXPERIMENTAL=$(MALI_CS_EXPERIMENTAL)
+ -DMALI_JIT_PRESSURE_LIMIT=$(MALI_JIT_PRESSURE_LIMIT) \
+ -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING)
ifeq ($(KBUILD_EXTMOD),)
# in-tree
@@ -74,7 +82,6 @@ SRC := \
mali_kbase_jm.c \
mali_kbase_gpuprops.c \
mali_kbase_js.c \
- mali_kbase_event.c \
mali_kbase_pm.c \
mali_kbase_config.c \
mali_kbase_vinstr.c \
@@ -85,8 +92,6 @@ SRC := \
mali_kbase_hwcnt_types.c \
mali_kbase_hwcnt_virtualizer.c \
mali_kbase_softjobs.c \
- mali_kbase_10969_workaround.c \
- mali_kbase_dummy_job_wa.c \
mali_kbase_hw.c \
mali_kbase_debug.c \
mali_kbase_gpu_memory_debugfs.c \
@@ -122,7 +127,9 @@ ifeq ($(MALI_USE_CSF),1)
context/backend/mali_kbase_context_csf.c
else
SRC += \
+ mali_kbase_dummy_job_wa.c \
mali_kbase_debug_job_fault.c \
+ mali_kbase_event.c \
mali_kbase_jd.c \
mali_kbase_jd_debugfs.c \
mali_kbase_js_ctx_attr.c \
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index a739363..a46305d 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -1,5 +1,5 @@
#
-# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -265,5 +265,20 @@ config MALI_PRFCNT_SET_SECONDARY
If unsure, say N.
+config MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+ bool "Use secondary set of performance counters"
+ depends on MALI_MIDGARD && MALI_EXPERT && !MALI_PRFCNT_SET_SECONDARY && DEBUG_FS
+ default n
+ help
+ Select this option to make the secondary set of performance counters
+ available at runtime via debugfs. Kernel features that depend on an
+ access to the primary set of counters may become unavailable.
+
+ This feature is unsupported and unstable, and may break at any time.
+ Enabling this option will prevent power management from working
+ optimally and may cause instrumentation tools to return bogus results.
+
+ If unsure, say N.
+
source "drivers/gpu/arm/midgard/platform/Kconfig"
source "drivers/gpu/arm/midgard/tests/Kconfig"
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild
index 8fe7aba..2449e80 100644
--- a/mali_kbase/backend/gpu/Kbuild
+++ b/mali_kbase/backend/gpu/Kbuild
@@ -25,7 +25,6 @@ BACKEND += \
backend/gpu/mali_kbase_gpuprops_backend.c \
backend/gpu/mali_kbase_irq_linux.c \
backend/gpu/mali_kbase_instr_backend.c \
- backend/gpu/mali_kbase_jm_as.c \
backend/gpu/mali_kbase_js_backend.c \
backend/gpu/mali_kbase_pm_backend.c \
backend/gpu/mali_kbase_pm_driver.c \
@@ -41,6 +40,7 @@ ifeq ($(MALI_USE_CSF),1)
# empty
else
BACKEND += \
+ backend/gpu/mali_kbase_jm_as.c \
backend/gpu/mali_kbase_debug_job_fault_backend.c \
backend/gpu/mali_kbase_jm_hw.c \
backend/gpu/mali_kbase_jm_rb.c
diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
index e0c108c..2806f05 100644
--- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c
+++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/backend/gpu/mali_kbase_device_internal.h b/mali_kbase/backend/gpu/mali_kbase_device_internal.h
index c3e5c03..5ddc4a5 100644
--- a/mali_kbase/backend/gpu/mali_kbase_device_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_device_internal.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014,2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014,2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -90,7 +90,7 @@ void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev);
* Return: 0 if successful or a negative error code on failure.
*/
int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev,
- unsigned int wait_timeout_ms);
+ unsigned int wait_timeout_ms);
/**
* kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
index 724c664..cb3e1d3 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -71,7 +71,11 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
/* Configure */
prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+ if (kbdev->hwcnt.backend.use_secondary_override)
+#else
if (enable->use_secondary)
+#endif
prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
@@ -380,6 +384,10 @@ int kbase_instr_backend_init(struct kbase_device *kbdev)
kbdev->hwcnt.backend.triggered = 0;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+ kbdev->hwcnt.backend.use_secondary_override = false;
+#endif
+
kbdev->hwcnt.backend.cache_clean_wq =
alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
@@ -392,3 +400,12 @@ void kbase_instr_backend_term(struct kbase_device *kbdev)
{
destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
}
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev)
+{
+ debugfs_create_bool("hwcnt_use_secondary", S_IRUGO | S_IWUSR,
+ kbdev->mali_debugfs_directory,
+ &kbdev->hwcnt.backend.use_secondary_override);
+}
+#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
index b7d9d31..9930968 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014, 2016, 2018, 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016, 2018, 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -47,6 +47,9 @@ enum kbase_instr_state {
struct kbase_instr_backend {
wait_queue_head_t wait;
int triggered;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+ bool use_secondary_override;
+#endif
enum kbase_instr_state state;
struct workqueue_struct *cache_clean_wq;
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index 2692f05..819edaf 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -84,6 +84,17 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
kbdev->pm.debug_core_mask[js];
}
+ if (unlikely(!affinity)) {
+#ifdef CONFIG_MALI_DEBUG
+ u64 shaders_ready =
+ kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+ WARN_ON(!(shaders_ready & kbdev->pm.backend.shaders_avail));
+#endif
+
+ affinity = kbdev->pm.backend.shaders_avail;
+ }
+
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
affinity & 0xFFFFFFFF);
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
@@ -92,13 +103,86 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
return affinity;
}
+/**
+ * select_job_chain() - Select which job chain to submit to the GPU
+ * @katom: Pointer to the atom about to be submitted to the GPU
+ *
+ * Selects one of the fragment job chains attached to the special atom at the
+ * end of a renderpass, or returns the address of the single job chain attached
+ * to any other type of atom.
+ *
+ * Which job chain is selected depends upon whether the tiling phase of the
+ * renderpass completed normally or was soft-stopped because it used too
+ * much memory. It also depends upon whether one of the fragment job chains
+ * has already been run as part of the same renderpass.
+ *
+ * Return: GPU virtual address of the selected job chain
+ */
+static u64 select_job_chain(struct kbase_jd_atom *katom)
+{
+ struct kbase_context *const kctx = katom->kctx;
+ u64 jc = katom->jc;
+ struct kbase_jd_renderpass *rp;
+
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+ if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS))
+ return jc;
+
+ rp = &kctx->jctx.renderpasses[katom->renderpass_id];
+ /* We can read a subset of renderpass state without holding
+ * higher-level locks (but not end_katom, for example).
+ * If the end-of-renderpass atom is running with as-yet indeterminate
+ * OOM state then assume that the start atom was not soft-stopped.
+ */
+ switch (rp->state) {
+ case KBASE_JD_RP_OOM:
+ /* Tiling ran out of memory.
+ * Start of incremental rendering, used once.
+ */
+ jc = katom->jc_fragment.norm_read_forced_write;
+ break;
+ case KBASE_JD_RP_START:
+ case KBASE_JD_RP_PEND_OOM:
+ /* Tiling completed successfully first time.
+ * Single-iteration rendering, used once.
+ */
+ jc = katom->jc_fragment.norm_read_norm_write;
+ break;
+ case KBASE_JD_RP_RETRY_OOM:
+ /* Tiling ran out of memory again.
+ * Continuation of incremental rendering, used as
+ * many times as required.
+ */
+ jc = katom->jc_fragment.forced_read_forced_write;
+ break;
+ case KBASE_JD_RP_RETRY:
+ case KBASE_JD_RP_RETRY_PEND_OOM:
+ /* Tiling completed successfully this time.
+ * End of incremental rendering, used once.
+ */
+ jc = katom->jc_fragment.forced_read_norm_write;
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+
+ dev_dbg(kctx->kbdev->dev,
+ "Selected job chain 0x%llx for end atom %p in state %d\n",
+ jc, (void *)katom, (int)rp->state);
+
+ katom->jc = jc;
+ return jc;
+}
+
void kbase_job_hw_submit(struct kbase_device *kbdev,
struct kbase_jd_atom *katom,
int js)
{
struct kbase_context *kctx;
u32 cfg;
- u64 jc_head = katom->jc;
+ u64 const jc_head = select_job_chain(katom);
u64 affinity;
KBASE_DEBUG_ASSERT(kbdev);
@@ -109,6 +193,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
/* Command register must be available */
KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+ dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n",
+ jc_head, (void *)katom);
+
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
jc_head & 0xFFFFFFFF);
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
@@ -139,7 +226,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
cfg |= JS_CONFIG_THREAD_PRI(8);
- if (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED)
+ if ((katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) ||
+ (katom->core_req & BASE_JD_REQ_END_RENDERPASS))
cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
if (kbase_hw_has_feature(kbdev,
@@ -492,7 +580,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
/* We are about to issue a soft stop, so mark the atom as having
* been soft stopped */
- target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED;
/* Mark the point where we issue the soft-stop command */
KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom);
@@ -656,6 +744,70 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
}
}
+static int softstop_start_rp_nolock(
+ struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+ struct kbase_device *const kbdev = kctx->kbdev;
+ struct kbase_jd_atom *katom;
+ struct kbase_jd_renderpass *rp;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ katom = kbase_gpu_inspect(kbdev, 1, 0);
+
+ if (!katom) {
+ dev_dbg(kctx->kbdev->dev, "No atom on job slot\n");
+ return -ESRCH;
+ }
+
+ if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) {
+ dev_dbg(kctx->kbdev->dev,
+ "Atom %p on job slot is not start RP\n", (void *)katom);
+ return -EPERM;
+ }
+
+ if (WARN_ON(katom->renderpass_id >=
+ ARRAY_SIZE(kctx->jctx.renderpasses)))
+ return -EINVAL;
+
+ rp = &kctx->jctx.renderpasses[katom->renderpass_id];
+ if (WARN_ON(rp->state != KBASE_JD_RP_START &&
+ rp->state != KBASE_JD_RP_RETRY))
+ return -EINVAL;
+
+ dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n",
+ (int)rp->state, (void *)reg);
+
+ if (WARN_ON(katom != rp->start_katom))
+ return -EINVAL;
+
+ dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n",
+ (void *)reg, (void *)&rp->oom_reg_list);
+ list_move_tail(&reg->link, &rp->oom_reg_list);
+ dev_dbg(kctx->kbdev->dev, "Added region to list\n");
+
+ rp->state = (rp->state == KBASE_JD_RP_START ?
+ KBASE_JD_RP_PEND_OOM : KBASE_JD_RP_RETRY_PEND_OOM);
+
+ kbase_job_slot_softstop(kbdev, 1, katom);
+
+ return 0;
+}
+
+int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx,
+ struct kbase_va_region *const reg)
+{
+ struct kbase_device *const kbdev = kctx->kbdev;
+ int err;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ err = softstop_start_rp_nolock(kctx, reg);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return err;
+}
+
void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
{
struct kbase_device *kbdev = kctx->kbdev;
@@ -745,6 +897,9 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term);
void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom, u32 sw_flags)
{
+ dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n",
+ target_katom, sw_flags, js);
+
KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
JS_COMMAND_SOFT_STOP | sw_flags);
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
index 880a89b..d1ed42d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2016, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index c860bde..6daea01 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -32,7 +32,6 @@
#include <mali_kbase_js.h>
#include <tl/mali_kbase_tracepoints.h>
#include <mali_kbase_hwcnt_context.h>
-#include <mali_kbase_10969_workaround.h>
#include <mali_kbase_reset_gpu.h>
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
#include <backend/gpu/mali_kbase_device_internal.h>
@@ -832,8 +831,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
break;
case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
- if (katom[idx]->atom_flags &
- KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+ if (kbase_js_atom_blocked_on_x_dep(katom[idx]))
break;
katom[idx]->gpu_rb_state =
@@ -1007,6 +1005,8 @@ void kbase_backend_run_atom(struct kbase_device *kbdev,
struct kbase_jd_atom *katom)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
+ dev_dbg(kbdev->dev, "Backend running atom %p\n", (void *)katom);
+
kbase_gpu_enqueue_atom(kbdev, katom);
kbase_backend_slot_update(kbdev);
}
@@ -1065,6 +1065,10 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
struct kbase_context *kctx = katom->kctx;
+ dev_dbg(kbdev->dev,
+ "Atom %p completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n",
+ (void *)katom, completion_code, job_tail, js);
+
lockdep_assert_held(&kbdev->hwaccess_lock);
/*
@@ -1179,19 +1183,19 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
js, completion_code);
if (job_tail != 0 && job_tail != katom->jc) {
- bool was_updated = (job_tail != katom->jc);
+ /* Some of the job has been executed */
+ dev_dbg(kbdev->dev,
+ "Update job chain address of atom %p to resume from 0x%llx\n",
+ (void *)katom, job_tail);
- /* Some of the job has been executed, so we update the job chain
- * address to where we should resume from */
katom->jc = job_tail;
- if (was_updated)
- KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
- katom, job_tail, js);
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+ katom, job_tail, js);
}
/* Only update the event code for jobs that weren't cancelled */
if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
- katom->event_code = (base_jd_event_code)completion_code;
+ katom->event_code = (enum base_jd_event_code)completion_code;
/* Complete the job, and start new ones
*
@@ -1241,8 +1245,9 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
katom = kbase_jm_complete(kbdev, katom, end_timestamp);
if (katom) {
- /* Cross-slot dependency has now become runnable. Try to submit
- * it. */
+ dev_dbg(kbdev->dev,
+ "Cross-slot dependency %p has become runnable.\n",
+ (void *)katom);
/* Check if there are lower priority jobs to soft stop */
kbase_job_slot_ctx_priority_check_locked(kctx, katom);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
index 15b1f86..f4bcf3e 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -301,9 +301,17 @@ union kbase_pm_policy_data {
* @l2_always_on: If true, disable powering down of l2 cache.
* @shaders_state: The current state of the shader state machine.
* @shaders_avail: This is updated by the state machine when it is in a state
- * where it can handle changes to the core availability. This
- * is internal to the shader state machine and should *not* be
- * modified elsewhere.
+ * where it can write to the SHADER_PWRON or PWROFF registers
+ * to have the same set of available cores as specified by
+ * @shaders_desired_mask. So it would eventually have the same
+ * value as @shaders_desired_mask and would precisely indicate
+ * the cores that are currently available. This is internal to
+ * shader state machine and should *not* be modified elsewhere.
+ * @shaders_desired_mask: This is updated by the state machine when it is in
+ * a state where it can handle changes to the core
+ * availability (either by DVFS or sysfs). This is
+ * internal to the shader state machine and should
+ * *not* be modified elsewhere.
* @shaders_desired: True if the PM active count or power policy requires the
* shader cores to be on. This is used as an input to the
* shader power state machine. The current state of the
@@ -401,6 +409,7 @@ struct kbase_pm_backend_data {
enum kbase_l2_core_state l2_state;
enum kbase_shader_core_state shaders_state;
u64 shaders_avail;
+ u64 shaders_desired_mask;
bool l2_desired;
bool l2_always_on;
bool shaders_desired;
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index d53acb2..b04d705 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -319,7 +319,8 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
if (kbase_dummy_job_wa_enabled(kbdev) &&
action == ACTION_PWRON &&
core_type == KBASE_PM_CORE_SHADER &&
- !(kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) {
+ !(kbdev->dummy_job_wa.flags &
+ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) {
kbase_dummy_job_wa_execute(kbdev, cores);
} else {
if (lo != 0)
@@ -938,7 +939,8 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev)
* except at certain points where we can handle it,
* i.e. off and SHADERS_ON_CORESTACK_ON.
*/
- backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev);
+ backend->shaders_desired_mask =
+ kbase_pm_ca_get_core_mask(kbdev);
backend->pm_shaders_core_mask = 0;
if (backend->shaders_desired &&
@@ -965,6 +967,8 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev)
case KBASE_SHADERS_OFF_CORESTACK_PEND_ON:
if (!stacks_trans && stacks_ready == stacks_avail) {
+ backend->shaders_avail =
+ backend->shaders_desired_mask;
kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
backend->shaders_avail, ACTION_PWRON);
@@ -990,11 +994,12 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev)
break;
case KBASE_SHADERS_ON_CORESTACK_ON:
- backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev);
+ backend->shaders_desired_mask =
+ kbase_pm_ca_get_core_mask(kbdev);
/* If shaders to change state, trigger a counter dump */
if (!backend->shaders_desired ||
- (backend->shaders_avail != shaders_ready)) {
+ (backend->shaders_desired_mask != shaders_ready)) {
backend->hwcnt_desired = false;
if (!backend->hwcnt_disabled)
kbase_pm_trigger_hwcnt_disable(kbdev);
@@ -1004,7 +1009,7 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev)
break;
case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK:
- backend->shaders_avail =
+ backend->shaders_desired_mask =
kbase_pm_ca_get_core_mask(kbdev);
if (!backend->hwcnt_disabled) {
@@ -1038,19 +1043,20 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev)
backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON;
}
- } else if (backend->shaders_avail & ~shaders_ready) {
+ } else if (backend->shaders_desired_mask & ~shaders_ready) {
/* set cores ready but not available to
* meet KBASE_SHADERS_PEND_ON_CORESTACK_ON
* check pass
*/
- backend->shaders_avail |= shaders_ready;
+ backend->shaders_avail =
+ (backend->shaders_desired_mask | shaders_ready);
kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
backend->shaders_avail & ~shaders_ready,
ACTION_PWRON);
backend->shaders_state =
KBASE_SHADERS_PEND_ON_CORESTACK_ON;
- } else if (shaders_ready & ~backend->shaders_avail) {
+ } else if (shaders_ready & ~backend->shaders_desired_mask) {
backend->shaders_state =
KBASE_SHADERS_WAIT_GPU_IDLE;
} else {
@@ -1111,7 +1117,15 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev)
* meet KBASE_SHADERS_PEND_ON_CORESTACK_ON
* check pass
*/
- backend->shaders_avail &= shaders_ready;
+
+ /* shaders_desired_mask shall be a subset of
+ * shaders_ready
+ */
+ WARN_ON(backend->shaders_desired_mask & ~shaders_ready);
+ WARN_ON(!(backend->shaders_desired_mask & shaders_ready));
+
+ backend->shaders_avail =
+ backend->shaders_desired_mask;
kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
shaders_ready & ~backend->shaders_avail, ACTION_PWROFF);
backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON;
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index d331dd2..94189b1 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -1,13 +1,16 @@
/*
- * Copyright:
- * ----------------------------------------------------------------------------
- * This confidential and proprietary software may be used only as authorized
- * by a licensing agreement from ARM Limited.
- * (C) COPYRIGHT 2017-2019 ARM Limited, ALL RIGHTS RESERVED
- * The entire notice above must be reproduced on all authorized copies and
- * copies may only be made to the extent permitted by a licensing agreement
- * from ARM Limited.
- * ----------------------------------------------------------------------------
+ *
+ * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
*/
/* Kernel-side tests may include mali_kbase's headers. Therefore any config
@@ -121,6 +124,9 @@ bob_kernel_module {
cinstr_secondary_hwc: {
kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"],
},
+ cinstr_secondary_hwc_via_debug_fs: {
+ kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS=y"],
+ },
mali_2mb_alloc: {
kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
},
diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c
index 0fe61c4..2cd2551 100644
--- a/mali_kbase/context/backend/mali_kbase_context_jm.c
+++ b/mali_kbase/context/backend/mali_kbase_context_jm.c
@@ -56,6 +56,18 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx)
kbase_debug_job_fault_context_term(kctx);
}
KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term);
+#else
+void kbase_context_debugfs_init(struct kbase_context *const kctx)
+{
+ CSTD_UNUSED(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init);
+
+void kbase_context_debugfs_term(struct kbase_context *const kctx)
+{
+ CSTD_UNUSED(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term);
#endif /* CONFIG_DEBUG_FS */
static int kbase_context_kbase_timer_setup(struct kbase_context *kctx)
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 1ae149d..a539edb 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -51,8 +51,6 @@ int kbase_context_common_init(struct kbase_context *kctx)
kctx->process_mm = NULL;
atomic_set(&kctx->nonmapped_pages, 0);
atomic_set(&kctx->permanent_mapped_pages, 0);
- kctx->slots_pullable = 0;
-
kctx->tgid = current->tgid;
kctx->pid = current->pid;
@@ -67,6 +65,8 @@ int kbase_context_common_init(struct kbase_context *kctx)
INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
init_waitqueue_head(&kctx->event_queue);
+ atomic_set(&kctx->event_count, 0);
+ atomic_set(&kctx->event_closed, false);
bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG);
diff --git a/mali_kbase/context/mali_kbase_context.h b/mali_kbase/context/mali_kbase_context.h
index 12b8e4f..e4ed894 100644
--- a/mali_kbase/context/mali_kbase_context.h
+++ b/mali_kbase/context/mali_kbase_context.h
@@ -35,7 +35,6 @@
#include <linux/atomic.h>
-#ifdef CONFIG_DEBUG_FS
/**
* kbase_context_debugfs_init - Initialize the kctx platform
* specific debugfs
@@ -57,7 +56,6 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx);
* is compiled for.
*/
void kbase_context_debugfs_term(struct kbase_context *const kctx);
-#endif /* CONFIG_DEBUG_FS */
/**
* kbase_create_context() - Create a kernel base context.
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index 8eb3153..4c77929 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -270,7 +270,12 @@ void kbase_device_id_init(struct kbase_device *kbdev)
{
scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
kbase_dev_nr);
- kbdev->id = kbase_dev_nr++;
+ kbdev->id = kbase_dev_nr;
+}
+
+void kbase_increment_device_id(void)
+{
+ kbase_dev_nr++;
}
int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev)
diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h
index b1a3e1b..16f1d70 100644
--- a/mali_kbase/device/mali_kbase_device.h
+++ b/mali_kbase/device/mali_kbase_device.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -41,6 +41,13 @@ const struct list_head *kbase_device_get_list(void);
void kbase_device_put_list(const struct list_head *dev_list);
/**
+ * Kbase_increment_device_id - increment device id.
+ *
+ * Used to increment device id on successful initialization of the device.
+ */
+void kbase_increment_device_id(void);
+
+/**
* kbase_device_init - Device initialisation.
*
* This is called from device probe to initialise various other
diff --git a/mali_kbase/gpu/mali_kbase_gpu_coherency.h b/mali_kbase/gpu/mali_kbase_gpu_coherency.h
index 5ab67db..bb2b161 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_coherency.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_coherency.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/gpu/mali_kbase_gpu_fault.h b/mali_kbase/gpu/mali_kbase_gpu_fault.h
index 88d9d0f..b59b9d1 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_fault.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_fault.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -33,6 +33,17 @@
*/
const char *kbase_gpu_exception_name(u32 exception_code);
+/** Returns the name associated with a Mali fatal exception code
+ *
+ * @fatal_exception_code: fatal exception code
+ *
+ * This function is called from the interrupt handler when a GPU fatal
+ * exception occurs.
+ *
+ * Return: name associated with the fatal exception code
+ */
+const char *kbase_gpu_fatal_exception_name(u32 const fatal_exception_code);
+
/**
* kbase_gpu_access_type_name - Convert MMU_AS_CONTROL.FAULTSTATUS.ACCESS_TYPE
* into string.
diff --git a/mali_kbase/gpu/mali_kbase_gpu_id.h b/mali_kbase/gpu/mali_kbase_gpu_id.h
index ec883cb..9f3d6b1 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_id.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_id.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -102,7 +102,7 @@
#define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2)
#define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3)
#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4)
-#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 5)
+#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7)
#define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2)
#define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3)
#define GPU_ID2_PRODUCT_TE2X GPU_ID2_MODEL_MAKE(11, 1)
diff --git a/mali_kbase/jm/mali_base_jm_kernel.h b/mali_kbase/jm/mali_base_jm_kernel.h
new file mode 100644
index 0000000..b61e612
--- /dev/null
+++ b/mali_kbase/jm/mali_base_jm_kernel.h
@@ -0,0 +1,1002 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#ifndef _BASE_JM_KERNEL_H_
+#define _BASE_JM_KERNEL_H_
+
+/* Memory allocation, access/hint flags.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* IN */
+/* Read access CPU side
+ */
+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
+
+/* Write access CPU side
+ */
+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
+
+/* Read access GPU side
+ */
+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
+
+/* Write access GPU side
+ */
+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
+
+/* Execute allowed on the GPU side
+ */
+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
+
+/* Will be permanently mapped in kernel space.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
+
+/* The allocation will completely reside within the same 4GB chunk in the GPU
+ * virtual space.
+ * Since this flag is primarily required only for the TLS memory which will
+ * not be used to contain executable code and also not used for Tiler heap,
+ * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
+ */
+#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
+
+/* Userspace is not allowed to free this memory.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
+
+#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
+
+/* Grow backing store on GPU Page Fault
+ */
+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
+
+/* Page coherence Outer shareable, if available
+ */
+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
+
+/* Page coherence Inner shareable
+ */
+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
+
+/* Should be cached on the CPU
+ */
+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
+
+/* IN/OUT */
+/* Must have same VA on both the GPU and the CPU
+ */
+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
+
+/* OUT */
+/* Must call mmap to acquire a GPU address for the allocation
+ */
+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
+
+/* IN */
+/* Page coherence Outer shareable, required.
+ */
+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
+
+/* Protected memory
+ */
+#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
+
+/* Not needed physical memory
+ */
+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
+
+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
+ * addresses to be the same
+ */
+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
+
+/**
+ * Bit 19 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags
+ */
+#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+
+/**
+ * Memory starting from the end of the initial commit is aligned to 'extent'
+ * pages, where 'extent' must be a power of 2 and no more than
+ * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ */
+#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
+
+/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
+ * mode. Some components within the GPU might only be able to access memory
+ * that is GPU cacheable. Refer to the specific GPU implementation for more
+ * details. The 3 shareability flags will be ignored for GPU uncached memory.
+ * If used while importing USER_BUFFER type memory, then the import will fail
+ * if the memory is not aligned to GPU and CPU cache line width.
+ */
+#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
+
+/*
+ * Bits [22:25] for group_id (0~15).
+ *
+ * base_mem_group_id_set() should be used to pack a memory group ID into a
+ * base_mem_alloc_flags value instead of accessing the bits directly.
+ * base_mem_group_id_get() should be used to extract the memory group ID from
+ * a base_mem_alloc_flags value.
+ */
+#define BASEP_MEM_GROUP_ID_SHIFT 22
+#define BASE_MEM_GROUP_ID_MASK \
+ ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
+
+/* Must do CPU cache maintenance when imported memory is mapped/unmapped
+ * on GPU. Currently applicable to dma-buf type only.
+ */
+#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
+
+/* Use the GPU VA chosen by the kernel client */
+#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27)
+
+/* Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 28
+
+/* A mask of all the flags which are only valid for allocations within kbase,
+ * and may not be passed from user space.
+ */
+#define BASEP_MEM_FLAGS_KERNEL_ONLY \
+ (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \
+ BASE_MEM_FLAG_MAP_FIXED)
+
+/* A mask for all output bits, excluding IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/* A mask for all input bits, including IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/* A mask of all currently reserved flags
+ */
+#define BASE_MEM_FLAGS_RESERVED \
+ (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19)
+
+#define BASEP_MEM_INVALID_HANDLE (0ull << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12)
+/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE (64ul << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
+ BASE_MEM_COOKIE_BASE)
+
+/**
+ * typedef base_context_create_flags - Flags to pass to ::base_context_init.
+ *
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+typedef u32 base_context_create_flags;
+
+/* No flags set */
+#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
+
+/* Base context is embedded in a cctx object (flag used for CINSTR
+ * software counter macros)
+ */
+#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
+
+/* Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled.
+ */
+#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
+ ((base_context_create_flags)1 << 1)
+
+/* Bit-shift used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
+
+/* Bitmask used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
+ ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+
+/* Bitpattern describing the base_context_create_flags that can be
+ * passed to the kernel
+ */
+#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
+ (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
+ BASEP_CONTEXT_MMU_GROUP_ID_MASK)
+
+/* Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
+ */
+#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
+ (BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as base_context_create_flags, and so must
+ * not collide with them.
+ */
+
+/* Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \
+ ((base_context_create_flags)(1 << 31))
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
+ */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact.
+ */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
+ BASE_TLSTREAM_JOB_DUMPING_ENABLED)
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT 256
+
+/* Maximum number of concurrent render passes.
+ */
+#define BASE_JD_RP_COUNT (256)
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0)
+
+/**
+ * struct base_jd_udata - Per-job data
+ *
+ * This structure is used to store per-job data, and is completely unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ *
+ * @blob: per-job data array
+ */
+struct base_jd_udata {
+ u64 blob[2];
+};
+
+/**
+ * typedef base_jd_dep_type - Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a
+ * dependency is a data or ordering dependency (by putting it before/after
+ * 'core_req' in the structure it should be possible to add without changing
+ * the structure size).
+ * When the flag is set for a particular dependency to signal that it is an
+ * ordering only dependency then errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */
+
+/**
+ * typedef base_jd_core_req - Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly. By not specifying the
+ * correct settings can/will cause an early job termination. Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/* No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/* Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0)
+
+/* Requires compute shaders
+ *
+ * This covers any of the following GPU job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1)
+
+/* Requires tiling */
+#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2)
+
+/* Requires cache flushes */
+#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3)
+
+/* Requires value writeback */
+#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4)
+
+/* SW-only requirements - the HW does not expose these as part of the job slot
+ * capabilities
+ */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13)
+
+/* SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/* SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6)
+
+/* SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7)
+
+/* SW Only requirement: External resources are referenced by this atom.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and
+ * BASE_JD_REQ_SOFT_EVENT_WAIT.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8)
+
+/* SW Only requirement: Software defined job. Jobs with this bit set will not be
+ * submitted to the hardware but will cause some action to happen within the
+ * driver
+ */
+#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/* 0x4 RESERVED for now */
+
+/* SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ * other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ * possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/* SW only requirement: Just In Time allocation
+ *
+ * This job requests a single or multiple just-in-time allocations through a
+ * list of base_jit_alloc_info structure which is passed via the jc element of
+ * the atom. The number of base_jit_alloc_info structures present in the
+ * list is passed via the nr_extres element of the atom
+ *
+ * It should be noted that the id entry in base_jit_alloc_info must not
+ * be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9)
+
+/* SW only requirement: Just In Time free
+ *
+ * This job requests a single or multiple just-in-time allocations created by
+ * BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time
+ * allocations is passed via the jc element of the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/* SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb)
+
+/* SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains GPU jobs of the 'Compute
+ * Shaders' type.
+ *
+ * In contrast to BASE_JD_REQ_CS, this does not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10)
+
+/* HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag
+ * takes priority
+ *
+ * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned
+ * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/* SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12)
+
+/* SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use
+ * if the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use
+ * if the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/* Request the atom be executed on a specific job slot.
+ *
+ * When this flag is specified, it takes precedence over any existing job slot
+ * selection logic.
+ */
+#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17)
+
+/* SW-only requirement: The atom is the start of a renderpass.
+ *
+ * If this bit is set then the job chain will be soft-stopped if it causes the
+ * GPU to write beyond the end of the physical pages backing the tiler heap, and
+ * committing more memory to the heap would exceed an internal threshold. It may
+ * be resumed after running one of the job chains attached to an atom with
+ * BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be
+ * resumed multiple times until it completes without memory usage exceeding the
+ * threshold.
+ *
+ * Usually used with BASE_JD_REQ_T.
+ */
+#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18)
+
+/* SW-only requirement: The atom is the end of a renderpass.
+ *
+ * If this bit is set then the atom incorporates the CPU address of a
+ * base_jd_fragment object instead of the GPU address of a job chain.
+ *
+ * Which job chain is run depends upon whether the atom with the same renderpass
+ * ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or
+ * was soft-stopped when it exceeded an upper threshold for tiler heap memory
+ * usage.
+ *
+ * It also depends upon whether one of the job chains attached to the atom has
+ * already been run as part of the same renderpass (in which case it would have
+ * written unresolved multisampled and otherwise-discarded output to temporary
+ * buffers that need to be read back). The job chain for doing a forced read and
+ * forced write (from/to temporary buffers) is run as many times as necessary.
+ *
+ * Usually used with BASE_JD_REQ_FS.
+ */
+#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19)
+
+/* These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+ (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+ BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+ BASE_JD_REQ_EVENT_COALESCE | \
+ BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+ BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+ BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \
+ BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \
+ BASE_JD_REQ_END_RENDERPASS))
+
+/* Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE \
+ (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+ BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/* Returns non-zero value if core requirements passed define a soft job or
+ * a dependency only job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
+ (((core_req) & BASE_JD_REQ_SOFT_JOB) || \
+ ((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
+
+/**
+ * enum kbase_jd_atom_state
+ *
+ * @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used.
+ * @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD.
+ * @KBASE_JD_ATOM_STATE_IN_JS: Atom has been given to JS (is runnable/running).
+ * @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet
+ * handed back to job dispatcher for
+ * dependency resolution.
+ * @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed
+ * back to userspace.
+ */
+enum kbase_jd_atom_state {
+ KBASE_JD_ATOM_STATE_UNUSED,
+ KBASE_JD_ATOM_STATE_QUEUED,
+ KBASE_JD_ATOM_STATE_IN_JS,
+ KBASE_JD_ATOM_STATE_HW_COMPLETED,
+ KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+/**
+ * typedef base_atom_id - Type big enough to store an atom number in.
+ */
+typedef u8 base_atom_id;
+
+/**
+ * struct base_dependency -
+ *
+ * @atom_id: An atom number
+ * @dependency_type: Dependency type
+ */
+struct base_dependency {
+ base_atom_id atom_id;
+ base_jd_dep_type dependency_type;
+};
+
+/**
+ * struct base_jd_fragment - Set of GPU fragment job chains used for rendering.
+ *
+ * @norm_read_norm_write: Job chain for full rendering.
+ * GPU address of a fragment job chain to render in the
+ * circumstance where the tiler job chain did not exceed
+ * its memory usage threshold and no fragment job chain
+ * was previously run for the same renderpass.
+ * It is used no more than once per renderpass.
+ * @norm_read_forced_write: Job chain for starting incremental
+ * rendering.
+ * GPU address of a fragment job chain to render in
+ * the circumstance where the tiler job chain exceeded
+ * its memory usage threshold for the first time and
+ * no fragment job chain was previously run for the
+ * same renderpass.
+ * Writes unresolved multisampled and normally-
+ * discarded output to temporary buffers that must be
+ * read back by a subsequent forced_read job chain
+ * before the renderpass is complete.
+ * It is used no more than once per renderpass.
+ * @forced_read_forced_write: Job chain for continuing incremental
+ * rendering.
+ * GPU address of a fragment job chain to render in
+ * the circumstance where the tiler job chain
+ * exceeded its memory usage threshold again
+ * and a fragment job chain was previously run for
+ * the same renderpass.
+ * Reads unresolved multisampled and
+ * normally-discarded output from temporary buffers
+ * written by a previous forced_write job chain and
+ * writes the same to temporary buffers again.
+ * It is used as many times as required until
+ * rendering completes.
+ * @forced_read_norm_write: Job chain for ending incremental rendering.
+ * GPU address of a fragment job chain to render in the
+ * circumstance where the tiler job chain did not
+ * exceed its memory usage threshold this time and a
+ * fragment job chain was previously run for the same
+ * renderpass.
+ * Reads unresolved multisampled and normally-discarded
+ * output from temporary buffers written by a previous
+ * forced_write job chain in order to complete a
+ * renderpass.
+ * It is used no more than once per renderpass.
+ *
+ * This structure is referenced by the main atom structure if
+ * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req.
+ */
+struct base_jd_fragment {
+ u64 norm_read_norm_write;
+ u64 norm_read_forced_write;
+ u64 forced_read_forced_write;
+ u64 forced_read_norm_write;
+};
+
+/**
+ * typedef base_jd_prio - Base Atom priority.
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling after the atoms have had dependencies
+ * resolved. For example, a low priority atom that has had its dependencies
+ * resolved might run before a higher priority atom that has not had its
+ * dependencies resolved.
+ *
+ * In general, fragment atoms do not affect non-fragment atoms with
+ * lower priorities, and vice versa. One exception is that there is only one
+ * priority value for each context. So a high-priority (e.g.) fragment atom
+ * could increase its context priority, causing its non-fragment atoms to also
+ * be scheduled sooner.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * * Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ * resolved, and atom 'X' has a higher priority than atom 'Y'
+ * * If atom 'Y' is currently running on the HW, then it is interrupted to
+ * allow atom 'X' to run soon after
+ * * If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ * the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * * Any two atoms that have the same priority could run in any order with
+ * respect to each other. That is, there is no ordering constraint between
+ * atoms of the same priority.
+ *
+ * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are
+ * scheduled between contexts. The default value, 0, will cause higher-priority
+ * atoms to be scheduled first, regardless of their context. The value 1 will
+ * use a round-robin algorithm when deciding which context's atoms to schedule
+ * next, so higher-priority atoms can only preempt lower priority atoms within
+ * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
+ * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW
+ */
+#define BASE_JD_PRIO_HIGH ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting
+ */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+/**
+ * struct base_jd_atom_v2 - Node of a dependency graph used to submit a
+ * GPU job chain or soft-job to the kernel driver.
+ *
+ * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
+ * is set in the base_jd_core_req) the CPU address of a
+ * base_jd_fragment object.
+ * @udata: User data.
+ * @extres_list: List of external resources.
+ * @nr_extres: Number of external resources or JIT allocations.
+ * @jit_id: Zero-terminated array of IDs of just-in-time memory
+ * allocations written to by the atom. When the atom
+ * completes, the value stored at the
+ * &struct_base_jit_alloc_info.heap_info_gpu_addr of
+ * each allocation is read in order to enforce an
+ * overall physical memory usage limit.
+ * @pre_dep: Pre-dependencies. One need to use SETTER function to assign
+ * this field; this is done in order to reduce possibility of
+ * improper assignment of a dependency field.
+ * @atom_number: Unique number to identify the atom.
+ * @prio: Atom priority. Refer to base_jd_prio for more details.
+ * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
+ * specified.
+ * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
+ * @core_req: Core requirements.
+ * @renderpass_id: Renderpass identifier used to associate an atom that has
+ * BASE_JD_REQ_START_RENDERPASS set in its core requirements
+ * with an atom that has BASE_JD_REQ_END_RENDERPASS set.
+ * @padding: Unused. Must be zero.
+ *
+ * This structure has changed since UK 10.2 for which base_jd_core_req was a
+ * u16 value.
+ *
+ * In UK 10.3 a core_req field of a u32 type was added to the end of the
+ * structure, and the place in the structure previously occupied by u16
+ * core_req was kept but renamed to compat_core_req.
+ *
+ * From UK 11.20 - compat_core_req is now occupied by u8 jit_id[2].
+ * Compatibility with UK 10.x from UK 11.y is not handled because
+ * the major version increase prevents this.
+ *
+ * For UK 11.20 jit_id[2] must be initialized to zero.
+ */
+struct base_jd_atom_v2 {
+ u64 jc;
+ struct base_jd_udata udata;
+ u64 extres_list;
+ u16 nr_extres;
+ u8 jit_id[2];
+ struct base_dependency pre_dep[2];
+ base_atom_id atom_number;
+ base_jd_prio prio;
+ u8 device_nr;
+ u8 jobslot;
+ base_jd_core_req core_req;
+ u8 renderpass_id;
+ u8 padding[7];
+};
+
+/* Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+ BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */
+ BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */
+ /* Event indicates success (SW events only) */
+ BASE_JD_SW_EVENT_SUCCESS = (1u << 13),
+ BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */
+ BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */
+ BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */
+ BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */
+ /* Mask to extract the type from an event code */
+ BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)
+};
+
+/**
+ * enum base_jd_event_code - Job chain event codes
+ *
+ * @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status
+ * codes.
+ * Obscurely, BASE_JD_EVENT_TERMINATED
+ * indicates a real fault, because the
+ * job was hard-stopped.
+ * @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as
+ * 'previous job done'.
+ * @BASE_JD_EVENT_STOPPED: Can't be seen by userspace, becomes
+ * TERMINATED, DONE or JOB_CANCELLED.
+ * @BASE_JD_EVENT_TERMINATED: This is actually a fault status code - the job
+ * was hard stopped.
+ * @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on
+ * complete/fail/cancel.
+ * @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes.
+ * Obscurely, BASE_JD_EVENT_TERMINATED
+ * indicates a real fault,
+ * because the job was hard-stopped.
+ * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and
+ * software error status codes.
+ * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and
+ * software error status codes.
+ * @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status
+ * codes.
+ * @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes.
+ * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes.
+ * Such codes are never returned to
+ * user-space.
+ * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes.
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see @BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a &struct base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * * 10:0 - subtype
+ * * 12:11 - type
+ * * 13 - SW success (only valid if the SW bit is set)
+ * * 14 - SW event (HW event if not set)
+ * * 15 - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * * BASE_JD_EVENT_RANGE_<description>_START
+ * * BASE_JD_EVENT_RANGE_<description>_END
+ *
+ * code is in <description>'s range when:
+ * BASE_JD_EVENT_RANGE_<description>_START <= code <
+ * BASE_JD_EVENT_RANGE_<description>_END
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+enum base_jd_event_code {
+ /* HW defined exceptions */
+ BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+ /* non-fatal exceptions */
+ BASE_JD_EVENT_NOT_STARTED = 0x00,
+ BASE_JD_EVENT_DONE = 0x01,
+ BASE_JD_EVENT_STOPPED = 0x03,
+ BASE_JD_EVENT_TERMINATED = 0x04,
+ BASE_JD_EVENT_ACTIVE = 0x08,
+
+ BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+ BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+ /* job exceptions */
+ BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+ BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+ BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+ BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+ BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+ BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+ BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+ BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+ BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+ BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+ BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+ BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+ BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+ BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+ BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+ BASE_JD_EVENT_STATE_FAULT = 0x5A,
+ BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+ BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+ /* GPU exceptions */
+ BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+ BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+ /* MMU exceptions */
+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+ BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+ BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+ /* SW defined exceptions */
+ BASE_JD_EVENT_MEM_GROWTH_FAILED =
+ BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+ BASE_JD_EVENT_TIMED_OUT =
+ BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+ BASE_JD_EVENT_JOB_CANCELLED =
+ BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+ BASE_JD_EVENT_JOB_INVALID =
+ BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+ BASE_JD_EVENT_PM_EVENT =
+ BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+
+ BASE_JD_EVENT_BAG_INVALID =
+ BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+ BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT |
+ BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+ BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT |
+ BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+ BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT |
+ BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+ BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS |
+ BASE_JD_SW_EVENT_BAG | 0x000,
+ BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT |
+ BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+ BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT |
+ BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+ BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT |
+ BASE_JD_SW_EVENT_KERNEL | 0x000,
+ BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT |
+ BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+ BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT |
+ BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x001,
+
+ BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT |
+ BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+};
+
+/**
+ * struct base_jd_event_v2 - Event reporting structure
+ *
+ * @event_code: event code.
+ * @atom_number: the atom number that has completed.
+ * @udata: user data.
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. They can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with BASE_JD_SW_EVENT_TYPE_MASK.
+ */
+struct base_jd_event_v2 {
+ enum base_jd_event_code event_code;
+ base_atom_id atom_number;
+ struct base_jd_udata udata;
+};
+
+/**
+ * struct base_dump_cpu_gpu_counters - Structure for
+ * BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS
+ * jobs.
+ *
+ * This structure is stored into the memory pointed to by the @jc field
+ * of &struct base_jd_atom_v2.
+ *
+ * It must not occupy the same CPU cache line(s) as any neighboring data.
+ * This is to avoid cases where access to pages containing the structure
+ * is shared between cached and un-cached memory regions, which would
+ * cause memory corruption.
+ */
+
+struct base_dump_cpu_gpu_counters {
+ u64 system_time;
+ u64 cycle_counter;
+ u64 sec;
+ u32 usec;
+ u8 padding[36];
+};
+
+#endif /* _BASE_JM_KERNEL_H_ */
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
new file mode 100644
index 0000000..172217f
--- /dev/null
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -0,0 +1,807 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Definitions (types, defines, etcs) specific to Job Manager Kbase.
+ * They are placed here to allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_JM_DEFS_H_
+#define _KBASE_JM_DEFS_H_
+
+/* Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/*
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped"
+ * from the hardware. Note that the time is actually
+ * ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and
+ * the GPU actually being reset to give other contexts time for their jobs
+ * to be soft-stopped and removed from the hardware before resetting.
+ */
+#define ZAP_TIMEOUT 1000
+
+/*
+ * Prevent soft-stops from occurring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more
+ * predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * Soft stop will still be used for non-scheduling purposes e.g. when
+ * terminating a context.
+ *
+ * if not in use, define this value to 0 instead of being undefined.
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/*
+ * Prevent hard-stops from occurring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more
+ * predictable.
+ *
+ * Hard stop will still be used for non-scheduling purposes e.g. when
+ * terminating a context.
+ *
+ * if not in use, define this value to 0 instead of being undefined.
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/* Atom has been previously soft-stopped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED (1<<1)
+/* Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps
+ * to disambiguate short-running job chains during soft/hard stopping of jobs
+ */
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/* Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/* Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in protected mode */
+#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
+/* Atom has been stored in runnable_tree */
+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
+/* Atom is waiting for L2 caches to power up in order to enter protected mode */
+#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK
+ */
+
+/* This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/* Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error "JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK." \
+ "Must update JS_COMMAND_SW_<..> bitmasks"
+#endif
+
+/* Soft-stop command that causes a Disjoint event. This of course isn't
+ * entirely masked off by JS_COMMAND_MASK
+ */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+ (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+/* Serialize atoms within a slot (ie only one atom per job slot) */
+#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0)
+/* Serialize atoms between slots (ie only one job slot running at any time) */
+#define KBASE_SERIALIZE_INTER_SLOT (1 << 1)
+/* Reset the GPU after each atom completion */
+#define KBASE_SERIALIZE_RESET (1 << 2)
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * struct base_job_fault_event - keeps track of the atom which faulted or which
+ * completed after the faulty atom but before the
+ * debug data for faulty atom was dumped.
+ *
+ * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for
+ * the atom which faulted.
+ * @katom: pointer to the atom for which job fault occurred or which
+ * completed after the faulty atom.
+ * @job_fault_work: work item, queued only for the faulty atom, which waits for
+ * the dumping to get completed and then does the bottom half
+ * of job done for the atoms which followed the faulty atom.
+ * @head: List head used to store the atom in the global list of
+ * faulty atoms or context specific list of atoms which got
+ * completed during the dump.
+ * @reg_offset: offset of the register to be dumped next, only applicable
+ * for the faulty atom.
+ */
+struct base_job_fault_event {
+
+ u32 event_code;
+ struct kbase_jd_atom *katom;
+ struct work_struct job_fault_work;
+ struct list_head head;
+ int reg_offset;
+};
+#endif
+
+/**
+ * struct kbase_jd_atom_dependency - Contains the dependency info for an atom.
+ * @atom: pointer to the dependee atom.
+ * @dep_type: type of dependency on the dependee @atom, i.e. order or data
+ * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency.
+ */
+struct kbase_jd_atom_dependency {
+ struct kbase_jd_atom *atom;
+ u8 dep_type;
+};
+
+/**
+ * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the
+ * dependee atom.
+ * @dep: pointer to the dependency info structure.
+ *
+ * Return: readonly reference to dependee atom.
+ */
+static inline const struct kbase_jd_atom *
+kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+ LOCAL_ASSERT(dep != NULL);
+
+ return (const struct kbase_jd_atom *)(dep->atom);
+}
+
+/**
+ * kbase_jd_katom_dep_type - Retrieves the dependency type info
+ *
+ * @dep: pointer to the dependency info structure.
+ *
+ * Return: the type of dependency there is on the dependee atom.
+ */
+static inline u8 kbase_jd_katom_dep_type(
+ const struct kbase_jd_atom_dependency *dep)
+{
+ LOCAL_ASSERT(dep != NULL);
+
+ return dep->dep_type;
+}
+
+/**
+ * kbase_jd_katom_dep_set - sets up the dependency info structure
+ * as per the values passed.
+ * @const_dep: pointer to the dependency info structure to be setup.
+ * @a: pointer to the dependee atom.
+ * @type: type of dependency there is on the dependee atom.
+ */
+static inline void kbase_jd_katom_dep_set(
+ const struct kbase_jd_atom_dependency *const_dep,
+ struct kbase_jd_atom *a, u8 type)
+{
+ struct kbase_jd_atom_dependency *dep;
+
+ LOCAL_ASSERT(const_dep != NULL);
+
+ dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+ dep->atom = a;
+ dep->dep_type = type;
+}
+
+/**
+ * kbase_jd_katom_dep_clear - resets the dependency info structure
+ *
+ * @const_dep: pointer to the dependency info structure to be setup.
+ */
+static inline void kbase_jd_katom_dep_clear(
+ const struct kbase_jd_atom_dependency *const_dep)
+{
+ struct kbase_jd_atom_dependency *dep;
+
+ LOCAL_ASSERT(const_dep != NULL);
+
+ dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+ dep->atom = NULL;
+ dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+/**
+ * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it
+ * becomes runnable, with respect to job slot
+ * ringbuffer/fifo.
+ * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo,
+ * which implies that either atom has not become
+ * runnable due to dependency or has completed
+ * the execution on GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is
+ * blocked due to cross slot dependency,
+ * can't be submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot
+ * fifo but is waiting for the completion of
+ * previously added atoms in current & other
+ * slots, as their protected mode requirements
+ * do not match with the current atom.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo
+ * and is waiting for completion of protected
+ * mode transition, needed before the atom is
+ * submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is
+ * waiting for the cores, which are needed to
+ * execute the job chain represented by the atom,
+ * to become available
+ * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to
+ * GPU.
+ * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted
+ * to GPU.
+ * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some
+ * failure, but only after the previously added
+ * atoms in fifo have completed or have also
+ * been returned to JS.
+ */
+enum kbase_atom_gpu_rb_state {
+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+ KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
+ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+ KBASE_ATOM_GPU_RB_READY,
+ KBASE_ATOM_GPU_RB_SUBMITTED,
+ KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
+};
+
+/**
+ * enum kbase_atom_enter_protected_state - The state of an atom with respect to
+ * the preparation for GPU's entry into protected mode,
+ * becomes pertinent only after atom's state with respect
+ * to slot ringbuffer is
+ * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any
+ * atoms currently submitted to GPU and protected mode
+ * transition is not already in progress.
+ * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to
+ * become disabled before entry into protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in
+ * preparation for the coherency change. L2 shall be
+ * powered down and GPU shall come out of fully
+ * coherent mode before entering protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change;
+ * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on
+ * so that coherency register contains correct value when
+ * GPU enters protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for
+ * BASE_HW_ISSUE_TGOX_R1_1234 check
+ * that L2 is powered up and switch GPU to protected mode.
+ */
+enum kbase_atom_enter_protected_state {
+ /*
+ * NOTE: The integer value of this must match
+ * KBASE_ATOM_EXIT_PROTECTED_CHECK.
+ */
+ KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
+ KBASE_ATOM_ENTER_PROTECTED_HWCNT,
+ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
+ KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY,
+ KBASE_ATOM_ENTER_PROTECTED_FINISHED,
+};
+
+/**
+ * enum kbase_atom_exit_protected_state - The state of an atom with respect to
+ * the preparation for GPU's exit from protected mode,
+ * becomes pertinent only after atom's state with respect
+ * to slot ngbuffer is
+ * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any
+ * atoms currently submitted to GPU and protected mode
+ * transition is not already in progress.
+ * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in
+ * preparation for the reset, as exiting protected mode
+ * requires a reset.
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from
+ * protected mode
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to
+ * complete
+ */
+enum kbase_atom_exit_protected_state {
+ /*
+ * NOTE: The integer value of this must match
+ * KBASE_ATOM_ENTER_PROTECTED_CHECK.
+ */
+ KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
+ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
+ KBASE_ATOM_EXIT_PROTECTED_RESET,
+ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
+};
+
+/**
+ * struct kbase_ext_res - Contains the info for external resources referred
+ * by an atom, which have been mapped on GPU side.
+ * @gpu_address: Start address of the memory region allocated for
+ * the resource from GPU virtual address space.
+ * @alloc: pointer to physical pages tracking object, set on
+ * mapping the external resource on GPU side.
+ */
+struct kbase_ext_res {
+ u64 gpu_address;
+ struct kbase_mem_phy_alloc *alloc;
+};
+
+/**
+ * struct kbase_jd_atom - object representing the atom, containing the complete
+ * state and attributes of an atom.
+ * @work: work item for the bottom half processing of the atom,
+ * by JD or JS, after it got executed on GPU or the
+ * input fence got signaled
+ * @start_timestamp: time at which the atom was submitted to the GPU, by
+ * updating the JS_HEAD_NEXTn register.
+ * @udata: copy of the user data sent for the atom in
+ * base_jd_submit.
+ * @kctx: Pointer to the base context with which the atom is
+ * associated.
+ * @dep_head: Array of 2 list heads, pointing to the two list of
+ * atoms
+ * which are blocked due to dependency on this atom.
+ * @dep_item: Array of 2 list heads, used to store the atom in the
+ * list of other atoms depending on the same dependee
+ * atom.
+ * @dep: Array containing the dependency info for the 2 atoms
+ * on which the atom depends upon.
+ * @jd_item: List head used during job dispatch job_done
+ * processing - as dependencies may not be entirely
+ * resolved at this point,
+ * we need to use a separate list head.
+ * @in_jd_list: flag set to true if atom's @jd_item is currently on
+ * a list, prevents atom being processed twice.
+ * @jit_ids: Zero-terminated array of IDs of just-in-time memory
+ * allocations written to by the atom. When the atom
+ * completes, the value stored at the
+ * &struct_base_jit_alloc_info.heap_info_gpu_addr of
+ * each allocation is read in order to enforce an
+ * overall physical memory usage limit.
+ * @nr_extres: number of external resources referenced by the atom.
+ * @extres: pointer to the location containing info about
+ * @nr_extres external resources referenced by the atom.
+ * @device_nr: indicates the coregroup with which the atom is
+ * associated, when
+ * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified.
+ * @jc: GPU address of the job-chain.
+ * @softjob_data: Copy of data read from the user space buffer that @jc
+ * points to.
+ * @fence: Stores either an input or output sync fence,
+ * depending on soft-job type
+ * @sync_waiter: Pointer to the sync fence waiter structure passed to
+ * the callback function on signaling of the input
+ * fence.
+ * @dma_fence: object containing pointers to both input & output
+ * fences and other related members used for explicit
+ * sync through soft jobs and for the implicit
+ * synchronization required on access to external
+ * resources.
+ * @event_code: Event code for the job chain represented by the atom,
+ * both HW and low-level SW events are represented by
+ * event codes.
+ * @core_req: bitmask of BASE_JD_REQ_* flags specifying either
+ * Hw or Sw requirements for the job chain represented
+ * by the atom.
+ * @ticks: Number of scheduling ticks for which atom has been
+ * running on the GPU.
+ * @sched_priority: Priority of the atom for Job scheduling, as per the
+ * KBASE_JS_ATOM_SCHED_PRIO_*.
+ * @completed: Wait queue to wait upon for the completion of atom.
+ * @status: Indicates at high level at what stage the atom is in,
+ * as per KBASE_JD_ATOM_STATE_*, that whether it is not
+ * in use or its queued in JD or given to JS or
+ * submitted to Hw or it completed the execution on Hw.
+ * @work_id: used for GPU tracepoints, its a snapshot of the
+ * 'work_id' counter in kbase_jd_context which is
+ * incremented on every call to base_jd_submit.
+ * @slot_nr: Job slot chosen for the atom.
+ * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the
+ * excat low level state of the atom.
+ * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely
+ * tracking atom's state after it has entered
+ * Job scheduler on becoming runnable. Atom
+ * could be blocked due to cross slot dependency
+ * or waiting for the shader cores to become available
+ * or waiting for protected mode transitions to
+ * complete.
+ * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU
+ * cache is needed for the atom and the shader cores
+ * used for atom have been kept on.
+ * @blocked: flag indicating that atom's resubmission to GPU is
+ * blocked till the work item is scheduled to return the
+ * atom to JS.
+ * @pre_dep: Pointer to atom that this atom has same-slot
+ * dependency on
+ * @post_dep: Pointer to atom that has same-slot dependency on
+ * this atom
+ * @x_pre_dep: Pointer to atom that this atom has cross-slot
+ * dependency on
+ * @x_post_dep: Pointer to atom that has cross-slot dependency on
+ * this atom
+ * @flush_id: The GPU's flush count recorded at the time of
+ * submission,
+ * used for the cache flush optimization
+ * @fault_event: Info for dumping the debug data on Job fault.
+ * @queue: List head used for 4 different purposes :
+ * Adds atom to the list of dma-buf fence waiting atoms.
+ * Adds atom to the list of atoms blocked due to cross
+ * slot dependency.
+ * Adds atom to the list of softjob atoms for which JIT
+ * allocation has been deferred
+ * Adds atom to the list of softjob atoms waiting for
+ * the signaling of fence.
+ * @jit_node: Used to keep track of all JIT free/alloc jobs in
+ * submission order
+ * @jit_blocked: Flag indicating that JIT allocation requested through
+ * softjob atom will be reattempted after the impending
+ * free of other active JIT allocations.
+ * @will_fail_event_code: If non-zero, this indicates that the atom will fail
+ * with the set event_code when the atom is processed.
+ * Used for special handling of atoms, which have a data
+ * dependency on the failed atoms.
+ * @protected_state: State of the atom, as per
+ * KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*,
+ * when transitioning into or out of protected mode.
+ * Atom will be either entering or exiting the
+ * protected mode.
+ * @runnable_tree_node: The node added to context's job slot specific rb tree
+ * when the atom becomes runnable.
+ * @age: Age of atom relative to other atoms in the context,
+ * is snapshot of the age_count counter in kbase
+ * context.
+ */
+struct kbase_jd_atom {
+ struct work_struct work;
+ ktime_t start_timestamp;
+
+ struct base_jd_udata udata;
+ struct kbase_context *kctx;
+
+ struct list_head dep_head[2];
+ struct list_head dep_item[2];
+ const struct kbase_jd_atom_dependency dep[2];
+ struct list_head jd_item;
+ bool in_jd_list;
+
+#if MALI_JIT_PRESSURE_LIMIT
+ u8 jit_ids[2];
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
+ u16 nr_extres;
+ struct kbase_ext_res *extres;
+
+ u32 device_nr;
+ u64 jc;
+ void *softjob_data;
+#if defined(CONFIG_SYNC)
+ struct sync_fence *fence;
+ struct sync_fence_waiter sync_waiter;
+#endif /* CONFIG_SYNC */
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+ struct {
+ /* Use the functions/API defined in mali_kbase_fence.h to
+ * when working with this sub struct
+ */
+#if defined(CONFIG_SYNC_FILE)
+ /* Input fence */
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ struct fence *fence_in;
+#else
+ struct dma_fence *fence_in;
+#endif
+#endif
+ /* This points to the dma-buf output fence for this atom. If
+ * this is NULL then there is no fence for this atom and the
+ * following fields related to dma_fence may have invalid data.
+ *
+ * The context and seqno fields contain the details for this
+ * fence.
+ *
+ * This fence is signaled when the katom is completed,
+ * regardless of the event_code of the katom (signal also on
+ * failure).
+ */
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ struct fence *fence;
+#else
+ struct dma_fence *fence;
+#endif
+ /* The dma-buf fence context number for this atom. A unique
+ * context number is allocated to each katom in the context on
+ * context creation.
+ */
+ unsigned int context;
+ /* The dma-buf fence sequence number for this atom. This is
+ * increased every time this katom uses dma-buf fence.
+ */
+ atomic_t seqno;
+ /* This contains a list of all callbacks set up to wait on
+ * other fences. This atom must be held back from JS until all
+ * these callbacks have been called and dep_count have reached
+ * 0. The initial value of dep_count must be equal to the
+ * number of callbacks on this list.
+ *
+ * This list is protected by jctx.lock. Callbacks are added to
+ * this list when the atom is built and the wait are set up.
+ * All the callbacks then stay on the list until all callbacks
+ * have been called and the atom is queued, or cancelled, and
+ * then all callbacks are taken off the list and freed.
+ */
+ struct list_head callbacks;
+ /* Atomic counter of number of outstandind dma-buf fence
+ * dependencies for this atom. When dep_count reaches 0 the
+ * atom may be queued.
+ *
+ * The special value "-1" may only be set after the count
+ * reaches 0, while holding jctx.lock. This indicates that the
+ * atom has been handled, either queued in JS or cancelled.
+ *
+ * If anyone but the dma-fence worker sets this to -1 they must
+ * ensure that any potentially queued worker must have
+ * completed before allowing the atom to be marked as unused.
+ * This can be done by flushing the fence work queue:
+ * kctx->dma_fence.wq.
+ */
+ atomic_t dep_count;
+ } dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */
+
+ /* Note: refer to kbasep_js_atom_retained_state, which will take a copy
+ * of some of the following members
+ */
+ enum base_jd_event_code event_code;
+ base_jd_core_req core_req;
+ u8 jobslot;
+ u8 renderpass_id;
+ struct base_jd_fragment jc_fragment;
+
+ u32 ticks;
+ int sched_priority;
+
+ wait_queue_head_t completed;
+ enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+ int work_id;
+#endif
+ int slot_nr;
+
+ u32 atom_flags;
+
+ int retry_count;
+
+ enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+ bool need_cache_flush_cores_retained;
+
+ atomic_t blocked;
+
+ struct kbase_jd_atom *pre_dep;
+ struct kbase_jd_atom *post_dep;
+
+ struct kbase_jd_atom *x_pre_dep;
+ struct kbase_jd_atom *x_post_dep;
+
+ u32 flush_id;
+
+#ifdef CONFIG_DEBUG_FS
+ struct base_job_fault_event fault_event;
+#endif
+ struct list_head queue;
+
+ struct list_head jit_node;
+ bool jit_blocked;
+
+ enum base_jd_event_code will_fail_event_code;
+
+ union {
+ enum kbase_atom_enter_protected_state enter;
+ enum kbase_atom_exit_protected_state exit;
+ } protected_state;
+
+ struct rb_node runnable_tree_node;
+
+ u32 age;
+};
+
+static inline bool kbase_jd_katom_is_protected(
+ const struct kbase_jd_atom *katom)
+{
+ return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies,
+ * one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+/**
+ * enum kbase_jd_renderpass_state - State of a renderpass
+ * @KBASE_JD_RP_COMPLETE: Unused or completed renderpass. Can only transition to
+ * START.
+ * @KBASE_JD_RP_START: Renderpass making a first attempt at tiling.
+ * Can transition to PEND_OOM or COMPLETE.
+ * @KBASE_JD_RP_PEND_OOM: Renderpass whose first attempt at tiling used too much
+ * memory and has a soft-stop pending. Can transition to
+ * OOM or COMPLETE.
+ * @KBASE_JD_RP_OOM: Renderpass whose first attempt at tiling used too much
+ * memory and therefore switched to incremental
+ * rendering. The fragment job chain is forced to run.
+ * Can only transition to RETRY.
+ * @KBASE_JD_RP_RETRY: Renderpass making a second or subsequent attempt at
+ * tiling. Can transition to RETRY_PEND_OOM or COMPLETE.
+ * @KBASE_JD_RP_RETRY_PEND_OOM: Renderpass whose second or subsequent attempt at
+ * tiling used too much memory again and has a
+ * soft-stop pending. Can transition to RETRY_OOM
+ * or COMPLETE.
+ * @KBASE_JD_RP_RETRY_OOM: Renderpass whose second or subsequent attempt at
+ * tiling used too much memory again. The fragment job
+ * chain is forced to run. Can only transition to RETRY.
+ *
+ * A state machine is used to control incremental rendering.
+ */
+enum kbase_jd_renderpass_state {
+ KBASE_JD_RP_COMPLETE, /* COMPLETE => START */
+ KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */
+ KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */
+ KBASE_JD_RP_OOM, /* OOM => RETRY */
+ KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or
+ * COMPLETE
+ */
+ KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or
+ * COMPLETE
+ */
+ KBASE_JD_RP_RETRY_OOM, /* RETRY_OOM => RETRY */
+};
+
+/**
+ * struct kbase_jd_renderpass - Data for a renderpass
+ * @state: Current state of the renderpass. If KBASE_JD_RP_COMPLETE then
+ * all other members are invalid.
+ * Both the job dispatcher context and hwaccess_lock must be
+ * locked to modify this so that it can be read with either
+ * (or both) locked.
+ * @start_katom: Address of the atom that is the start of a renderpass.
+ * Both the job dispatcher context and hwaccess_lock must be
+ * locked to modify this so that it can be read with either
+ * (or both) locked.
+ * @end_katom: Address of the atom that is the end of a renderpass, or NULL
+ * if that atom hasn't been added to the job scheduler yet.
+ * The job dispatcher context and hwaccess_lock must be
+ * locked to modify this so that it can be read with either
+ * (or both) locked.
+ * @oom_reg_list: A list of region structures which triggered out-of-memory.
+ * The hwaccess_lock must be locked to access this.
+ *
+ * Atoms tagged with BASE_JD_REQ_START_RENDERPASS or BASE_JD_REQ_END_RENDERPASS
+ * are associated with an object of this type, which is created and maintained
+ * by kbase to keep track of each renderpass.
+ */
+struct kbase_jd_renderpass {
+ enum kbase_jd_renderpass_state state;
+ struct kbase_jd_atom *start_katom;
+ struct kbase_jd_atom *end_katom;
+ struct list_head oom_reg_list;
+};
+
+/**
+ * struct kbase_jd_context - per context object encapsulating all the
+ * Job dispatcher related state.
+ * @lock: lock to serialize the updates made to the
+ * Job dispatcher state and kbase_jd_atom objects.
+ * @sched_info: Structure encapsulating all the Job scheduling
+ * info.
+ * @atoms: Array of the objects representing atoms,
+ * containing the complete state and attributes
+ * of an atom.
+ * @renderpasses: Array of renderpass state for incremental
+ * rendering, indexed by user-specified renderpass
+ * ID.
+ * @job_nr: Tracks the number of atoms being processed by the
+ * kbase. This includes atoms that are not tracked by
+ * scheduler: 'not ready to run' & 'dependency-only'
+ * jobs.
+ * @zero_jobs_wait: Waitq that reflects whether there are no jobs
+ * (including SW-only dependency jobs). This is set
+ * when no jobs are present on the ctx, and clear
+ * when there are jobs.
+ * This must be updated atomically with @job_nr.
+ * note: Job Dispatcher knows about more jobs than
+ * the Job Scheduler as it is unaware of jobs that
+ * are blocked on dependencies and SW-only dependency
+ * jobs. This waitq can be waited upon to find out
+ * when the context jobs are all done/cancelled
+ * (including those that might've been blocked
+ * on dependencies) - and so, whether it can be
+ * terminated. However, it should only be terminated
+ * once it is not present in the run-pool.
+ * Since the waitq is only set under @lock,
+ * the waiter should also briefly obtain and drop
+ * @lock to guarantee that the setter has completed
+ * its work on the kbase_context
+ * @job_done_wq: Workqueue to which the per atom work item is
+ * queued for bottom half processing when the
+ * atom completes
+ * execution on GPU or the input fence get signaled.
+ * @tb_lock: Lock to serialize the write access made to @tb to
+ * to store the register access trace messages.
+ * @tb: Pointer to the Userspace accessible buffer storing
+ * the trace messages for register read/write
+ * accesses made by the Kbase. The buffer is filled
+ * in circular fashion.
+ * @tb_wrap_offset: Offset to the end location in the trace buffer,
+ * the write pointer is moved to the beginning on
+ * reaching this offset.
+ * @work_id: atomic variable used for GPU tracepoints,
+ * incremented on every call to base_jd_submit.
+ */
+struct kbase_jd_context {
+ struct mutex lock;
+ struct kbasep_js_kctx_info sched_info;
+ struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+ struct kbase_jd_renderpass renderpasses[BASE_JD_RP_COUNT];
+ struct workqueue_struct *job_done_wq;
+
+ wait_queue_head_t zero_jobs_wait;
+ spinlock_t tb_lock;
+ u32 *tb;
+ u32 job_nr;
+ size_t tb_wrap_offset;
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+ atomic_t work_id;
+#endif
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue
+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
+ * job slot.
+ * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot
+ * dependencies. Atoms on this list will be moved to the
+ * runnable_tree when the blocking atom completes.
+ *
+ * hwaccess_lock must be held when accessing this structure.
+ */
+struct jsctx_queue {
+ struct rb_root runnable_tree;
+ struct list_head x_dep_head;
+};
+
+#endif /* _KBASE_JM_DEFS_H_ */
diff --git a/mali_kbase/jm/mali_kbase_jm_ioctl.h b/mali_kbase/jm/mali_kbase_jm_ioctl.h
new file mode 100644
index 0000000..127d990
--- /dev/null
+++ b/mali_kbase/jm/mali_kbase_jm_ioctl.h
@@ -0,0 +1,134 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_JM_IOCTL_H_
+#define _KBASE_JM_IOCTL_H_
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * 11.1:
+ * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags
+ * 11.2:
+ * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED,
+ * which some user-side clients prior to 11.2 might fault if they received
+ * them
+ * 11.3:
+ * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and
+ * KBASE_IOCTL_STICKY_RESOURCE_UNMAP
+ * 11.4:
+ * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
+ * 11.5:
+ * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
+ * 11.6:
+ * - Added flags field to base_jit_alloc_info structure, which can be used to
+ * specify pseudo chunked tiler alignment for JIT allocations.
+ * 11.7:
+ * - Removed UMP support
+ * 11.8:
+ * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags
+ * 11.9:
+ * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY
+ * under base_mem_alloc_flags
+ * 11.10:
+ * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for
+ * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations
+ * with one softjob.
+ * 11.11:
+ * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
+ * 11.12:
+ * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS
+ * 11.13:
+ * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT
+ * 11.14:
+ * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set
+ * under base_mem_alloc_flags
+ * 11.15:
+ * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags.
+ * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be
+ * passed to mmap().
+ * 11.16:
+ * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf.
+ * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for
+ * dma-buf. Now, buffers are mapped on GPU when first imported, no longer
+ * requiring external resource or sticky resource tracking. UNLESS,
+ * CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled.
+ * 11.17:
+ * - Added BASE_JD_REQ_JOB_SLOT.
+ * - Reused padding field in base_jd_atom_v2 to pass job slot number.
+ * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO
+ * 11.18:
+ * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags
+ * 11.19:
+ * - Extended base_jd_atom_v2 to allow a renderpass ID to be specified.
+ * 11.20:
+ * - Added new phys_pages member to kbase_ioctl_mem_jit_init for
+ * KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2
+ * (replacing '_OLD') and _11_5 suffixes
+ * - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in
+ * base_jd_atom_v2. It must currently be initialized to zero.
+ * - Added heap_info_gpu_addr to base_jit_alloc_info, and
+ * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's
+ * flags member. Previous variants of this structure are kept and given _10_2
+ * and _11_5 suffixes.
+ * - The above changes are checked for safe values in usual builds
+ */
+#define BASE_UK_VERSION_MAJOR 11
+#define BASE_UK_VERSION_MINOR 20
+
+/**
+ * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
+ *
+ * @addr: Memory address of an array of struct base_jd_atom_v2
+ * @nr_atoms: Number of entries in the array
+ * @stride: sizeof(struct base_jd_atom_v2)
+ */
+struct kbase_ioctl_job_submit {
+ __u64 addr;
+ __u32 nr_atoms;
+ __u32 stride;
+};
+
+#define KBASE_IOCTL_JOB_SUBMIT \
+ _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
+
+#define KBASE_IOCTL_POST_TERM \
+ _IO(KBASE_IOCTL_TYPE, 4)
+
+/**
+ * struct kbase_ioctl_soft_event_update - Update the status of a soft-event
+ * @event: GPU address of the event which has been updated
+ * @new_status: The new status to set
+ * @flags: Flags for future expansion
+ */
+struct kbase_ioctl_soft_event_update {
+ __u64 event;
+ __u32 new_status;
+ __u32 flags;
+};
+
+#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
+ _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
+
+
+#endif /* _KBASE_JM_IOCTL_H_ */
diff --git a/mali_kbase/jm/mali_kbase_jm_js.h b/mali_kbase/jm/mali_kbase_jm_js.h
new file mode 100644
index 0000000..6c222ce
--- /dev/null
+++ b/mali_kbase/jm/mali_kbase_jm_js.h
@@ -0,0 +1,892 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Job Scheduler Interface.
+ * These interfaces are Internal to KBase.
+ */
+
+#ifndef _KBASE_JM_JS_H_
+#define _KBASE_JM_JS_H_
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * kbasep_js_devdata_init - Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * kbasep_js_devdata_halt - Halt the Job Scheduler.
+ *
+ * It is safe to call this on kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of kbdev
+ * must be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a programming error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * kbasep_js_devdata_term - Terminate the Job Scheduler
+ *
+ * It is safe to call this on kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of kbdev
+ * must be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a programming error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * kbasep_js_kctx_init - Initialize the Scheduling Component of a
+ * struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with
+ * the scheduler. Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero initialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context *const kctx);
+
+/**
+ * kbasep_js_kctx_term - Terminate the Scheduling Component of a
+ * struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or
+ * failed initialization of its jctx.sched_info member, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before
+ * passing to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * kbasep_js_add_job - Add a job chain to the Job Scheduler,
+ * and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * * Update the numbers of jobs information
+ * * Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * * If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * * Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * * In the above case that this is the first job on the context
+ * * If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guaranteed not to update the Policy Queue. And so, the caller is
+ * guaranteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * * it must not hold hwaccess_lock (as this will be obtained internally)
+ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used
+ * internally).
+ *
+ * Return: true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool,
+ * false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * kbasep_js_remove_job - Remove a job chain from the Job Scheduler,
+ * except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ * the atom
+ * * kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * * kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ * remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more
+ * optimal for spinlock purposes than having kbasep_js_remove_job() handle all
+ * of the actions.
+ *
+ * In the case of canceling atoms, it is easier to call
+ * kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * * a atom is not a job belonging to kctx.
+ * * a atom has already been removed from the Job Scheduler.
+ * * a atom is still in the runpool
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+ struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * kbasep_js_remove_cancelled_job - Completely remove a job chain from the
+ * Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * * a atom is not a job belonging to kctx.
+ * * a atom has already been removed from the Job Scheduler.
+ * * a atom is still in the runpool:
+ * * it is not being killed with kbasep_jd_cancel()
+ *
+ * The following locking conditions are made on the caller:
+ * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * * it must not hold the hwaccess_lock, (as this will be obtained
+ * internally)
+ * * it must not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * Return: true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs and
+ * false otherwise.
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ struct kbase_jd_atom *katom);
+
+/**
+ * kbasep_js_runpool_requeue_or_kill_ctx - Handling the requeuing/killing of a
+ * context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * * If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * * Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * kbasep_js_runpool_release_ctx - Release a refcount of a context being busy,
+ * allowing it to be scheduled out.
+ *
+ * When the refcount reaches zero and the context might be scheduled out
+ * (depending on whether the Scheduling Policy has deemed it so, or if it has
+ * run out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * For the context being descheduled:
+ * * If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ * * If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ * * Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ * * In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released
+ * (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * * Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * * Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * * Attempt scheduling in a new context (if one is available), and if
+ * necessary, running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * * it must not hold the hwaccess_lock, because it will be used internally.
+ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * * it must not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx);
+
+/**
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of
+ * kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * * Releasing the atom's context attributes
+ * * Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+ struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * kbasep_js_runpool_release_ctx_nolock - Variant of
+ * kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+ struct kbase_context *kctx);
+
+/**
+ * kbasep_js_schedule_privileged_ctx - Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the
+ * function will wait for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space
+ * reserved) until kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * * it must not hold the hwaccess_lock, because it will be used internally.
+ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * * it must not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used
+ * internally).
+ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx);
+
+/**
+ * kbasep_js_release_privileged_ctx - Release a privileged context,
+ * allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * * it must not hold the hwaccess_lock, because it will be used internally.
+ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * * it must not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx);
+
+/**
+ * kbase_js_try_run_jobs - Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * * kbasep_js_device_data::runpool_mutex
+ * * hwaccess_lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * kbasep_js_suspend - Suspend the job scheduler during a Power Management
+ * Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Management active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time.
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbasep_js_resume - Resume the Job Scheduler after a Power Management
+ * Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * * Schedules contexts back into the runpool
+ * * Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_js_dep_resolved_submit - Submit an atom to the job scheduler.
+ *
+ * @kctx: Context pointer
+ * @atom: Pointer to the atom to submit
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * Return: true if the context requires to be enqueued, otherwise false.
+ */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+ struct kbase_jd_atom *katom);
+
+/**
+ * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+ * @kctx: Context Pointer
+ * @prio: Priority (specifies the queue together with js).
+ * @js: Job slot (specifies the queue together with prio).
+ *
+ * Pushes all possible atoms from the linked list to the ringbuffer.
+ * Number of atoms are limited to free space in the ringbuffer and
+ * number of available atoms in the linked list.
+ *
+ */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+
+/**
+ * kbase_js_pull - Pull an atom from a context in the job scheduler for
+ * execution.
+ *
+ * @kctx: Context to pull from
+ * @js: Job slot to pull from
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * Return: a pointer to an atom, or NULL if there are no atoms for this
+ * slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * kbase_js_unpull - Return an atom to the job scheduler ringbuffer.
+ *
+ * @kctx: Context pointer
+ * @atom: Pointer to the atom to unpull
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped. Another reason is if an end-of-renderpass atom completed
+ * but will need to be run again as part of the same renderpass.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_complete_atom_wq - Complete an atom from jd_done_worker(),
+ * removing it from the job
+ * scheduler ringbuffer.
+ * @kctx: Context pointer
+ * @katom: Pointer to the atom to complete
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * Return: true if the context is now idle (no jobs pulled) false otherwise.
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+ struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_complete_atom - Complete an atom.
+ *
+ * @katom: Pointer to the atom to complete
+ * @end_timestamp: The time that the atom completed (may be NULL)
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * Return: a atom that has now been unblocked and can now be run, or NULL
+ * if none
+ */
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+ ktime_t *end_timestamp);
+
+/**
+ * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot
+ * dependency
+ * @katom: Pointer to an atom in the slot ringbuffer
+ *
+ * A cross-slot dependency is ignored if necessary to unblock incremental
+ * rendering. If the atom at the start of a renderpass used too much memory
+ * and was soft-stopped then the atom at the end of a renderpass is submitted
+ * to hardware regardless of its dependency on the start-of-renderpass atom.
+ * This can happen multiple times for the same pair of atoms.
+ *
+ * Return: true to block the atom or false to allow it to be submitted to
+ * hardware.
+ */
+bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_sched - Submit atoms from all available contexts.
+ *
+ * @kbdev: Device pointer
+ * @js_mask: Mask of job slots to submit to
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ * destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * kbase_js_is_atom_valid - Validate an atom
+ *
+ * @kbdev: Device pointer
+ * @katom: Atom to validate
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * Return: true if atom is valid false otherwise.
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ *
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+void kbase_js_set_timeouts(struct kbase_device *kbdev);
+
+/**
+ * kbase_js_set_ctx_priority - set the context priority
+ *
+ * @kctx: Context pointer
+ * @new_priority: New priority value for the Context
+ *
+ * The context priority is set to a new value and it is moved to the
+ * pullable/unpullable list as per the new priority.
+ */
+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority);
+
+
+/**
+ * kbase_js_update_ctx_priority - update the context priority
+ *
+ * @kctx: Context pointer
+ *
+ * The context priority gets updated as per the priority of atoms currently in
+ * use for that context, but only if system priority mode for context scheduling
+ * is being used.
+ */
+void kbase_js_update_ctx_priority(struct kbase_context *kctx);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * kbasep_js_is_submit_allowed - Check that a context is allowed to submit
+ * jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size,
+ * and wrap up the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(
+ struct kbasep_js_device_data *js_devdata,
+ struct kbase_context *kctx)
+{
+ u16 test_bit;
+ bool is_allowed;
+
+ /* Ensure context really is scheduled in */
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+ test_bit = (u16) (1u << kctx->as_nr);
+
+ is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+ dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %p (as=%d)",
+ is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr);
+ return is_allowed;
+}
+
+/**
+ * kbasep_js_set_submit_allowed - Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size,
+ * and wrap up the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_set_submit_allowed(
+ struct kbasep_js_device_data *js_devdata,
+ struct kbase_context *kctx)
+{
+ u16 set_bit;
+
+ /* Ensure context really is scheduled in */
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+ set_bit = (u16) (1u << kctx->as_nr);
+
+ dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)",
+ kctx, kctx->as_nr);
+
+ js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * kbasep_js_clear_submit_allowed - Prevent a context from submitting more
+ * jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size,
+ * and wrap up the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(
+ struct kbasep_js_device_data *js_devdata,
+ struct kbase_context *kctx)
+{
+ u16 clear_bit;
+ u16 clear_mask;
+
+ /* Ensure context really is scheduled in */
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+ clear_bit = (u16) (1u << kctx->as_nr);
+ clear_mask = ~clear_bit;
+
+ dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)",
+ kctx, kctx->as_nr);
+
+ js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(
+ struct kbasep_js_atom_retained_state *retained_state)
+{
+ retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+ retained_state->core_req =
+ KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(
+ struct kbasep_js_atom_retained_state *retained_state,
+ const struct kbase_jd_atom *katom)
+{
+ retained_state->event_code = katom->event_code;
+ retained_state->core_req = katom->core_req;
+ retained_state->sched_priority = katom->sched_priority;
+ retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * kbasep_js_has_atom_finished - Determine whether an atom has finished
+ * (given its retained state),
+ * and so should be given back to
+ * userspace/removed from the system.
+ *
+ * @katom_retained_state: the retained state of the atom to check
+ *
+ * Reasons for an atom not finishing include:
+ * * Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ * * It is an end of renderpass atom that was run to consume the output of a
+ * start-of-renderpass atom that was soft-stopped because it used too much
+ * memory. In this case, it will have to be run again later.
+ *
+ * Return: false if the atom has not finished, true otherwise.
+ */
+static inline bool kbasep_js_has_atom_finished(
+ const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+ return (bool) (katom_retained_state->event_code !=
+ BASE_JD_EVENT_STOPPED &&
+ katom_retained_state->event_code !=
+ BASE_JD_EVENT_REMOVED_FROM_NEXT &&
+ katom_retained_state->event_code !=
+ BASE_JD_EVENT_END_RP_DONE);
+}
+
+/**
+ * kbasep_js_atom_retained_state_is_valid - Determine whether a struct
+ * kbasep_js_atom_retained_state
+ * is valid
+ * @katom_retained_state the atom's retained state to check
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates
+ * that the code should just ignore it.
+ *
+ * Return: false if the retained state is invalid, true otherwise.
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(
+ const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+ return (bool) (katom_retained_state->core_req !=
+ KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+/**
+ * kbase_js_runpool_inc_context_count - Increment number of running contexts.
+ *
+ * The following locking conditions are made on the caller:
+ * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * * The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+ struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ js_devdata = &kbdev->js_data;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+
+ /* Track total contexts */
+ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+ ++(js_devdata->nr_all_contexts_running);
+
+ if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+ /* Track contexts that can submit jobs */
+ KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+ S8_MAX);
+ ++(js_devdata->nr_user_contexts_running);
+ }
+}
+
+/**
+ * kbase_js_runpool_dec_context_count - decrement number of running contexts.
+ *
+ * The following locking conditions are made on the caller:
+ * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * * The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+ struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ js_devdata = &kbdev->js_data;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+
+ /* Track total contexts */
+ --(js_devdata->nr_all_contexts_running);
+ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+ if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+ /* Track contexts that can submit jobs */
+ --(js_devdata->nr_user_contexts_running);
+ KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+ }
+}
+
+/**
+ * kbase_js_sched_all - Submit atoms from all available contexts to all
+ * job slots.
+ *
+ * @kbdev: Device pointer
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+ kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ * to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treats priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ * KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+ if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+ return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+ return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+ unsigned int prio_idx;
+
+ KBASE_DEBUG_ASSERT(sched_prio >= 0 &&
+ sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+ prio_idx = (unsigned int)sched_prio;
+
+ return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+#endif /* _KBASE_JM_JS_H_ */
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index 399d1b6..94c89fa 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -57,6 +57,7 @@ enum base_hw_issue {
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -209,6 +210,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
BASE_HW_ISSUE_TSIX_1792,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -221,6 +223,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
BASE_HW_ISSUE_TSIX_1792,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -232,6 +235,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -242,6 +246,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -251,6 +256,7 @@ static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -261,6 +267,7 @@ static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -270,6 +277,7 @@ static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -281,6 +289,7 @@ static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
BASE_HW_ISSUE_TNOX_1194,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -290,6 +299,7 @@ static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -301,6 +311,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
BASE_HW_ISSUE_TNOX_1194,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -312,6 +323,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
BASE_HW_ISSUE_TGOX_R1_1234,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -321,6 +333,7 @@ static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -335,6 +348,7 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -349,6 +363,22 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = {
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = {
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+ BASE_HW_ISSUE_TTRX_3076,
+ BASE_HW_ISSUE_TTRX_921,
+ BASE_HW_ISSUE_TTRX_3414,
+ BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TTRX_3083,
+ BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -360,6 +390,7 @@ static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
BASE_HW_ISSUE_TTRX_3414,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -374,6 +405,7 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -388,6 +420,7 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = {
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -399,6 +432,7 @@ static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
BASE_HW_ISSUE_TTRX_3414,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -411,6 +445,20 @@ static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
BASE_HW_ISSUE_TTRX_3414,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = {
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+ BASE_HW_ISSUE_TTRX_921,
+ BASE_HW_ISSUE_TTRX_3414,
+ BASE_HW_ISSUE_TTRX_3083,
+ BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -423,6 +471,20 @@ static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = {
BASE_HW_ISSUE_TTRX_3414,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = {
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+ BASE_HW_ISSUE_TTRX_921,
+ BASE_HW_ISSUE_TTRX_3414,
+ BASE_HW_ISSUE_TTRX_3083,
+ BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -434,6 +496,7 @@ static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
BASE_HW_ISSUE_TTRX_3414,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
+ BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
@@ -444,7 +507,6 @@ static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_TTRX_3414,
BASE_HW_ISSUE_TTRX_3083,
- BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_END
};
@@ -455,7 +517,6 @@ static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TTRX_3414,
BASE_HW_ISSUE_TTRX_3083,
- BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_END
};
@@ -463,7 +524,6 @@ static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_END
};
@@ -472,7 +532,6 @@ static const enum base_hw_issue base_hw_issues_model_tODx[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_END
};
@@ -480,7 +539,6 @@ static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_END
};
@@ -489,7 +547,6 @@ static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_END
};
@@ -497,7 +554,6 @@ static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_END
};
@@ -506,7 +562,6 @@ static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_END
};
@@ -532,7 +587,6 @@ static const enum base_hw_issue base_hw_issues_tE2x_r0p0[] = {
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_TTRX_3414,
BASE_HW_ISSUE_TTRX_3083,
- BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_END
};
@@ -543,7 +597,6 @@ static const enum base_hw_issue base_hw_issues_model_tE2x[] = {
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TTRX_3414,
BASE_HW_ISSUE_TTRX_3083,
- BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_END
};
diff --git a/mali_kbase/mali_base_kernel.h b/mali_kbase/mali_base_kernel.h
index 8687736..1e2744d 100644
--- a/mali_kbase/mali_base_kernel.h
+++ b/mali_kbase/mali_base_kernel.h
@@ -29,27 +29,16 @@
#ifndef _BASE_KERNEL_H_
#define _BASE_KERNEL_H_
-typedef struct base_mem_handle {
+struct base_mem_handle {
struct {
u64 handle;
} basep;
-} base_mem_handle;
+};
#include "mali_base_mem_priv.h"
#include "gpu/mali_kbase_gpu_coherency.h"
#include "gpu/mali_kbase_gpu_id.h"
-/*
- * Dependency stuff, keep it private for now. May want to expose it if
- * we decide to make the number of semaphores a configurable
- * option.
- */
-#define BASE_JD_ATOM_COUNT 256
-
-/* Set/reset values for a software event */
-#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1)
-#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0)
-
#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
#define BASE_MAX_COHERENT_GROUPS 16
@@ -76,16 +65,6 @@ typedef struct base_mem_handle {
#endif
#endif
-/**
- * @addtogroup base_user_api User-side Base APIs
- * @{
- */
-
-/**
- * @addtogroup base_user_api_memory User-side Base Memory APIs
- * @{
- */
-
/* Physical memory group ID for normal usage.
*/
#define BASE_MEM_GROUP_DEFAULT (0)
@@ -108,193 +87,6 @@ typedef struct base_mem_handle {
*/
typedef u32 base_mem_alloc_flags;
-/* Memory allocation, access/hint flags.
- *
- * See base_mem_alloc_flags.
- */
-
-/* IN */
-/* Read access CPU side
- */
-#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
-
-/* Write access CPU side
- */
-#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
-
-/* Read access GPU side
- */
-#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
-
-/* Write access GPU side
- */
-#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
-
-/* Execute allowed on the GPU side
- */
-#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
-
-/* Will be permanently mapped in kernel space.
- * Flag is only allowed on allocations originating from kbase.
- */
-#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
-
-/* The allocation will completely reside within the same 4GB chunk in the GPU
- * virtual space.
- * Since this flag is primarily required only for the TLS memory which will
- * not be used to contain executable code and also not used for Tiler heap,
- * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
- */
-#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
-
-/* Userspace is not allowed to free this memory.
- * Flag is only allowed on allocations originating from kbase.
- */
-#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
-
-#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
-
-/* Grow backing store on GPU Page Fault
- */
-#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
-
-/* Page coherence Outer shareable, if available
- */
-#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
-
-/* Page coherence Inner shareable
- */
-#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
-
-/* Should be cached on the CPU
- */
-#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
-
-/* IN/OUT */
-/* Must have same VA on both the GPU and the CPU
- */
-#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
-
-/* OUT */
-/* Must call mmap to acquire a GPU address for the alloc
- */
-#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
-
-/* IN */
-/* Page coherence Outer shareable, required.
- */
-#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
-
-/* Protected memory
- */
-#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
-
-/* Not needed physical memory
- */
-#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
-
-/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
- * addresses to be the same
- */
-#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
-
-/**
- * Bit 19 is reserved.
- *
- * Do not remove, use the next unreserved bit for new flags
- */
-#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
-#define BASE_MEM_MAYBE_RESERVED_BIT_19 BASE_MEM_RESERVED_BIT_19
-
-/**
- * Memory starting from the end of the initial commit is aligned to 'extent'
- * pages, where 'extent' must be a power of 2 and no more than
- * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
- */
-#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
-
-/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu mode.
- * Some components within the GPU might only be able to access memory that is
- * GPU cacheable. Refer to the specific GPU implementation for more details.
- * The 3 shareability flags will be ignored for GPU uncached memory.
- * If used while importing USER_BUFFER type memory, then the import will fail
- * if the memory is not aligned to GPU and CPU cache line width.
- */
-#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
-
-/*
- * Bits [22:25] for group_id (0~15).
- *
- * base_mem_group_id_set() should be used to pack a memory group ID into a
- * base_mem_alloc_flags value instead of accessing the bits directly.
- * base_mem_group_id_get() should be used to extract the memory group ID from
- * a base_mem_alloc_flags value.
- */
-#define BASEP_MEM_GROUP_ID_SHIFT 22
-#define BASE_MEM_GROUP_ID_MASK \
- ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
-
-/* Must do CPU cache maintenance when imported memory is mapped/unmapped
- * on GPU. Currently applicable to dma-buf type only.
- */
-#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
-
-/* Use the GPU VA chosen by the kernel client */
-#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27)
-
-/**
- * Number of bits used as flags for base memory management
- *
- * Must be kept in sync with the base_mem_alloc_flags flags
- */
-#define BASE_MEM_FLAGS_NR_BITS 28
-
-/* A mask for all output bits, excluding IN/OUT bits.
- */
-#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
-
-/* A mask for all input bits, including IN/OUT bits.
- */
-#define BASE_MEM_FLAGS_INPUT_MASK \
- (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
-
-/**
- * base_mem_group_id_get() - Get group ID from flags
- * @flags: Flags to pass to base_mem_alloc
- *
- * This inline function extracts the encoded group ID from flags
- * and converts it into numeric value (0~15).
- *
- * Return: group ID(0~15) extracted from the parameter
- */
-static inline int base_mem_group_id_get(base_mem_alloc_flags flags)
-{
- LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0);
- return (int)((flags & BASE_MEM_GROUP_ID_MASK) >>
- BASEP_MEM_GROUP_ID_SHIFT);
-}
-
-/**
- * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags
- * @id: group ID(0~15) you want to encode
- *
- * This inline function encodes specific group ID into base_mem_alloc_flags.
- * Parameter 'id' should lie in-between 0 to 15.
- *
- * Return: base_mem_alloc_flags with the group ID (id) encoded
- *
- * The return value can be combined with other flags against base_mem_alloc
- * to identify a specific memory group.
- */
-static inline base_mem_alloc_flags base_mem_group_id_set(int id)
-{
- LOCAL_ASSERT(id >= 0);
- LOCAL_ASSERT(id < BASE_MEM_GROUP_COUNT);
-
- return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) &
- BASE_MEM_GROUP_ID_MASK;
-}
-
/* A mask for all the flags which are modifiable via the base_mem_set_flags
* interface.
*/
@@ -302,19 +94,6 @@ static inline base_mem_alloc_flags base_mem_group_id_set(int id)
(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
BASE_MEM_COHERENT_LOCAL)
-
-/* A mask of all currently reserved flags
- */
-#define BASE_MEM_FLAGS_RESERVED \
- (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_MAYBE_RESERVED_BIT_19)
-
-/* A mask of all the flags which are only valid for allocations within kbase,
- * and may not be passed from user space.
- */
-#define BASEP_MEM_FLAGS_KERNEL_ONLY \
- (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \
- BASE_MEM_FLAG_MAP_FIXED)
-
/* A mask of all the flags that can be returned via the base_mem_get_flags()
* interface.
*/
@@ -340,14 +119,14 @@ static inline base_mem_alloc_flags base_mem_group_id_set(int id)
* as future releases from ARM might include other new types
* which could clash with your custom types.
*/
-typedef enum base_mem_import_type {
+enum base_mem_import_type {
BASE_MEM_IMPORT_TYPE_INVALID = 0,
/**
* Import type with value 1 is deprecated.
*/
BASE_MEM_IMPORT_TYPE_UMM = 2,
BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
-} base_mem_import_type;
+};
/**
* struct base_mem_import_user_buffer - Handle of an imported user buffer
@@ -363,45 +142,12 @@ struct base_mem_import_user_buffer {
u64 length;
};
-/**
- * @brief Invalid memory handle.
- *
- * Return value from functions returning @ref base_mem_handle on error.
- *
- * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
- * in C++ code or other situations where compound literals cannot be used.
- */
-#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
-
-/**
- * @brief Special write-alloc memory handle.
- *
- * A special handle is used to represent a region where a special page is mapped
- * with a write-alloc cache setup, typically used when the write result of the
- * GPU isn't needed, but the GPU must write anyway.
- *
- * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
- * in C++ code or other situations where compound literals cannot be used.
- */
-#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
-
-#define BASEP_MEM_INVALID_HANDLE (0ull << 12)
-#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12)
-#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12)
-#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
-#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12)
-/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
-#define BASE_MEM_COOKIE_BASE (64ul << 12)
-#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
- BASE_MEM_COOKIE_BASE)
-
/* Mask to detect 4GB boundary alignment */
#define BASE_MEM_MASK_4GB 0xfffff000UL
/* Mask to detect 4GB boundary (in page units) alignment */
#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
-/**
- * Limit on the 'extent' parameter for an allocation with the
+/* Limit on the 'extent' parameter for an allocation with the
* BASE_MEM_TILER_ALIGN_TOP flag set
*
* This is the same as the maximum limit for a Buffer Descriptor's chunk size
@@ -417,82 +163,22 @@ struct base_mem_import_user_buffer {
/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
-
-/**
- * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
- * @{
- */
-
-/**
- * @brief a basic memory operation (sync-set).
- *
- * The content of this structure is private, and should only be used
- * by the accessors.
- */
-typedef struct base_syncset {
- struct basep_syncset basep_sset;
-} base_syncset;
-
-/** @} end group base_user_api_memory_defered */
-
-/**
- * Handle to represent imported memory object.
- * Simple opague handle to imported memory, can't be used
- * with anything but base_external_resource_init to bind to an atom.
- */
-typedef struct base_import_handle {
- struct {
- u64 handle;
- } basep;
-} base_import_handle;
-
-/** @} end group base_user_api_memory */
-
-/**
- * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
- * @{
- */
-
-typedef int platform_fence_type;
-#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
-
/**
- * Base stream handle.
+ * struct base_fence - Cross-device synchronisation fence.
*
- * References an underlying base stream object.
+ * A fence is used to signal when the GPU has finished accessing a resource that
+ * may be shared with other devices, and also to delay work done asynchronously
+ * by the GPU until other devices have finished accessing a shared resource.
*/
-typedef struct base_stream {
- struct {
- int fd;
- } basep;
-} base_stream;
-
-/**
- * Base fence handle.
- *
- * References an underlying base fence object.
- */
-typedef struct base_fence {
+struct base_fence {
struct {
int fd;
int stream_fd;
} basep;
-} base_fence;
-
-/**
- * @brief Per-job data
- *
- * This structure is used to store per-job data, and is completely unused
- * by the Base driver. It can be used to store things such as callback
- * function pointer, data to handle job completion. It is guaranteed to be
- * untouched by the Base driver.
- */
-typedef struct base_jd_udata {
- u64 blob[2]; /**< per-job data array */
-} base_jd_udata;
+};
/**
- * @brief Memory aliasing info
+ * struct base_mem_aliasing_info - Memory aliasing info
*
* Describes a memory handle to be aliased.
* A subset of the handle can be chosen for aliasing, given an offset and a
@@ -506,26 +192,99 @@ typedef struct base_jd_udata {
* Offset must be within the size of the handle.
* Offset+length must not overrun the size of the handle.
*
- * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
- * @offset Offset within the handle to start aliasing from, in pages.
- * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
- * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
- * specifies the number of times the special page is needed.
+ * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset: Offset within the handle to start aliasing from, in pages.
+ * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * specifies the number of times the special page is needed.
*/
struct base_mem_aliasing_info {
- base_mem_handle handle;
+ struct base_mem_handle handle;
u64 offset;
u64 length;
};
-/**
- * Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
+/* Maximum percentage of just-in-time memory allocation trimming to perform
+ * on free.
+ */
+#define BASE_JIT_MAX_TRIM_LEVEL (100)
+
+/* Maximum number of concurrent just-in-time memory allocations.
+ */
+#define BASE_JIT_ALLOC_COUNT (255)
+
+/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
* initial commit is aligned to 'extent' pages, where 'extent' must be a power
* of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
*/
#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0)
/**
+ * If set, the heap info address points to a u32 holding the used size in bytes;
+ * otherwise it points to a u64 holding the lowest address of unused memory.
+ */
+#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1)
+
+/**
+ * Valid set of just-in-time memory allocation flags
+ *
+ * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr
+ * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set
+ * and heap_info_gpu_addr being 0 will be rejected).
+ */
+#define BASE_JIT_ALLOC_VALID_FLAGS \
+ (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
+
+/* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5
+ *
+ * jit_version is 1
+ *
+ * Due to the lack of padding specified, user clients between 32 and 64-bit
+ * may have assumed a different size of the struct
+ *
+ * An array of structures was not supported
+ */
+struct base_jit_alloc_info_10_2 {
+ u64 gpu_alloc_addr;
+ u64 va_pages;
+ u64 commit_pages;
+ u64 extent;
+ u8 id;
+};
+
+/* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up
+ * to 11.19
+ *
+ * This structure had a number of modifications during and after kernel driver
+ * version 11.5, but remains size-compatible throughout its version history, and
+ * with earlier variants compatible with future variants by requiring
+ * zero-initialization to the unused space in the structure.
+ *
+ * jit_version is 2
+ *
+ * Kernel driver version history:
+ * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes
+ * must be zero. Kbase minor version was not incremented, so some
+ * versions of 11.5 do not have this change.
+ * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase
+ * minor version not incremented)
+ * 11.6: Added 'flags', replacing 1 padding byte
+ * 11.10: Arrays of this structure are supported
+ */
+struct base_jit_alloc_info_11_5 {
+ u64 gpu_alloc_addr;
+ u64 va_pages;
+ u64 commit_pages;
+ u64 extent;
+ u8 id;
+ u8 bin_id;
+ u8 max_allocations;
+ u8 flags;
+ u8 padding[2];
+ u16 usage_id;
+};
+
+/**
* struct base_jit_alloc_info - Structure which describes a JIT allocation
* request.
* @gpu_alloc_addr: The GPU virtual address to write the JIT
@@ -543,13 +302,29 @@ struct base_mem_aliasing_info {
* type of JIT allocation.
* @max_allocations: The maximum number of allocations allowed within
* the bin specified by @bin_id. Should be the same
- * for all JIT allocations within the same bin.
+ * for all allocations within the same bin.
* @flags: flags specifying the special requirements for
- * the JIT allocation.
+ * the JIT allocation, see
+ * %BASE_JIT_ALLOC_VALID_FLAGS
* @padding: Expansion space - should be initialised to zero
* @usage_id: A hint about which allocation should be reused.
* The kernel should attempt to use a previous
* allocation with the same usage_id
+ * @heap_info_gpu_addr: Pointer to an object in GPU memory describing
+ * the actual usage of the region.
+ *
+ * jit_version is 3.
+ *
+ * When modifications are made to this structure, it is still compatible with
+ * jit_version 3 when: a) the size is unchanged, and b) new members only
+ * replace the padding bytes.
+ *
+ * Previous jit_version history:
+ * jit_version == 1, refer to &base_jit_alloc_info_10_2
+ * jit_version == 2, refer to &base_jit_alloc_info_11_5
+ *
+ * Kbase version history:
+ * 11.20: added @heap_info_gpu_addr
*/
struct base_jit_alloc_info {
u64 gpu_alloc_addr;
@@ -562,379 +337,17 @@ struct base_jit_alloc_info {
u8 flags;
u8 padding[2];
u16 usage_id;
+ u64 heap_info_gpu_addr;
};
-/**
- * @brief Job dependency type.
- *
- * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
- * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
- * changing the structure size).
- * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
- * errors will not be propagated.
- */
-typedef u8 base_jd_dep_type;
-
-
-#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */
-#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */
-#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */
-
-/**
- * @brief Job chain hardware requirements.
- *
- * A job chain must specify what GPU features it needs to allow the
- * driver to schedule the job correctly. By not specifying the
- * correct settings can/will cause an early job termination. Multiple
- * values can be ORed together to specify multiple requirements.
- * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
- * dependencies, and that doesn't execute anything on the hardware.
- */
-typedef u32 base_jd_core_req;
-
-/* Requirements that come from the HW */
-
-/**
- * No requirement, dependency only
- */
-#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
-
-/**
- * Requires fragment shaders
- */
-#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0)
-
-/**
- * Requires compute shaders
- * This covers any of the following Midgard Job types:
- * - Vertex Shader Job
- * - Geometry Shader Job
- * - An actual Compute Shader Job
- *
- * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
- * job is specifically just the "Compute Shader" job type, and not the "Vertex
- * Shader" nor the "Geometry Shader" job type.
- */
-#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1)
-#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) /**< Requires tiling */
-#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) /**< Requires cache flushes */
-#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) /**< Requires value writeback */
-
-/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
-
-/* Requires fragment job with AFBC encoding */
-#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13)
-
-/**
- * SW-only requirement: coalesce completion events.
- * If this bit is set then completion of this atom will not cause an event to
- * be sent to userspace, whether successful or not; completion events will be
- * deferred until an atom completes which does not have this bit set.
- *
- * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
- */
-#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
-
-/**
- * SW Only requirement: the job chain requires a coherent core group. We don't
- * mind which coherent core group is used.
- */
-#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6)
-
-/**
- * SW Only requirement: The performance counters should be enabled only when
- * they are needed, to reduce power consumption.
- */
-
-#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7)
-
-/**
- * SW Only requirement: External resources are referenced by this atom.
- * When external resources are referenced no syncsets can be bundled with the atom
- * but should instead be part of a NULL jobs inserted into the dependency tree.
- * The first pre_dep object must be configured for the external resouces to use,
- * the second pre_dep object can be used to create other dependencies.
- *
- * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and
- * BASE_JD_REQ_SOFT_EVENT_WAIT.
- */
-#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8)
-
-/**
- * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
- * to the hardware but will cause some action to happen within the driver
- */
-#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9)
-
-#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1)
-#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2)
-#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3)
-
-/* 0x4 RESERVED for now */
-
-/**
- * SW only requirement: event wait/trigger job.
- *
- * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
- * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
- * other waiting jobs. It completes immediately.
- * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
- * possible for other jobs to wait upon. It completes immediately.
- */
-#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5)
-#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6)
-#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7)
-
-#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8)
-
-/**
- * SW only requirement: Just In Time allocation
- *
- * This job requests a single or multiple JIT allocations through a list
- * of @base_jit_alloc_info structure which is passed via the jc element of
- * the atom. The number of @base_jit_alloc_info structures present in the
- * list is passed via the nr_extres element of the atom
- *
- * It should be noted that the id entry in @base_jit_alloc_info must not
- * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
- *
- * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
- * soft job to free the JIT allocation is still made.
- *
- * The job will complete immediately.
- */
-#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9)
-/**
- * SW only requirement: Just In Time free
- *
- * This job requests a single or multiple JIT allocations created by
- * @BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the JIT
- * allocations is passed via the jc element of the atom.
- *
- * The job will complete immediately.
- */
-#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa)
-
-/**
- * SW only requirement: Map external resource
- *
- * This job requests external resource(s) are mapped once the dependencies
- * of the job have been satisfied. The list of external resources are
- * passed via the jc element of the atom which is a pointer to a
- * @base_external_resource_list.
- */
-#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb)
-/**
- * SW only requirement: Unmap external resource
- *
- * This job requests external resource(s) are unmapped once the dependencies
- * of the job has been satisfied. The list of external resources are
- * passed via the jc element of the atom which is a pointer to a
- * @base_external_resource_list.
- */
-#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc)
-
-/**
- * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
- *
- * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
- *
- * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
- * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
- */
-#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10)
-
-/**
- * HW Requirement: Use the base_jd_atom::device_nr field to specify a
- * particular core group
- *
- * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
- *
- * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms.
- *
- * If the core availability policy is keeping the required core group turned off, then
- * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code.
- */
-#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
-
-/**
- * SW Flag: If this bit is set then the successful completion of this atom
- * will not cause an event to be sent to userspace
- */
-#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12)
-
-/**
- * SW Flag: If this bit is set then completion of this atom will not cause an
- * event to be sent to userspace, whether successful or not.
- */
-#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
-
-/**
- * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
- *
- * If this bit is set then the GPU's cache will not be cleaned and invalidated
- * until a GPU job starts which does not have this bit set or a job completes
- * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if
- * the CPU may have written to memory addressed by the job since the last job
- * without this bit set was submitted.
- */
-#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
-
-/**
- * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
- *
- * If this bit is set then the GPU's cache will not be cleaned and invalidated
- * until a GPU job completes which does not have this bit set or a job starts
- * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if
- * the CPU may read from or partially overwrite memory addressed by the job
- * before the next job without this bit set completes.
- */
-#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
-
-/**
- * Request the atom be executed on a specific job slot.
- *
- * When this flag is specified, it takes precedence over any existing job slot
- * selection logic.
- */
-#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17)
-
-/**
- * These requirement bits are currently unused in base_jd_core_req
- */
-#define BASEP_JD_REQ_RESERVED \
- (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
- BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
- BASE_JD_REQ_EVENT_COALESCE | \
- BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
- BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
- BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \
- BASE_JD_REQ_JOB_SLOT))
-
-/**
- * Mask of all bits in base_jd_core_req that control the type of the atom.
- *
- * This allows dependency only atoms to have flags set
- */
-#define BASE_JD_REQ_ATOM_TYPE \
- (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
- BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
-
-/**
- * Mask of all bits in base_jd_core_req that control the type of a soft job.
- */
-#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
-
-/*
- * Returns non-zero value if core requirements passed define a soft job or
- * a dependency only job.
- */
-#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
- ((core_req & BASE_JD_REQ_SOFT_JOB) || \
- (core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
-
-/*
- * Base Atom priority
- *
- * Only certain priority levels are actually implemented, as specified by the
- * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
- * level that is not one of those defined below.
- *
- * Priority levels only affect scheduling after the atoms have had dependencies
- * resolved. For example, a low priority atom that has had its dependencies
- * resolved might run before a higher priority atom that has not had its
- * dependencies resolved.
- *
- * In general, fragment atoms do not affect non-fragment atoms with
- * lower priorities, and vice versa. One exception is that there is only one
- * priority value for each context. So a high-priority (e.g.) fragment atom
- * could increase its context priority, causing its non-fragment atoms to also
- * be scheduled sooner.
- *
- * The atoms are scheduled as follows with respect to their priorities:
- * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
- * resolved, and atom 'X' has a higher priority than atom 'Y'
- * - If atom 'Y' is currently running on the HW, then it is interrupted to
- * allow atom 'X' to run soon after
- * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
- * the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
- * - Any two atoms that have the same priority could run in any order with
- * respect to each other. That is, there is no ordering constraint between
- * atoms of the same priority.
- *
- * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are
- * scheduled between contexts. The default value, 0, will cause higher-priority
- * atoms to be scheduled first, regardless of their context. The value 1 will
- * use a round-robin algorithm when deciding which context's atoms to schedule
- * next, so higher-priority atoms can only preempt lower priority atoms within
- * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
- * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
- */
-typedef u8 base_jd_prio;
-
-/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
-#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0)
-/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
- * BASE_JD_PRIO_LOW */
-#define BASE_JD_PRIO_HIGH ((base_jd_prio)1)
-/* Low atom priority. */
-#define BASE_JD_PRIO_LOW ((base_jd_prio)2)
-
-/* Count of the number of priority levels. This itself is not a valid
- * base_jd_prio setting */
-#define BASE_JD_NR_PRIO_LEVELS 3
-
-enum kbase_jd_atom_state {
- /** Atom is not used */
- KBASE_JD_ATOM_STATE_UNUSED,
- /** Atom is queued in JD */
- KBASE_JD_ATOM_STATE_QUEUED,
- /** Atom has been given to JS (is runnable/running) */
- KBASE_JD_ATOM_STATE_IN_JS,
- /** Atom has been completed, but not yet handed back to job dispatcher
- * for dependency resolution */
- KBASE_JD_ATOM_STATE_HW_COMPLETED,
- /** Atom has been completed, but not yet handed back to userspace */
- KBASE_JD_ATOM_STATE_COMPLETED
-};
-
-typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
-
-struct base_dependency {
- base_atom_id atom_id; /**< An atom number */
- base_jd_dep_type dependency_type; /**< Dependency type */
-};
-
-/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value.
- * In order to keep the size of the structure same, padding field has been adjusted
- * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines)
- * is added at the end of the structure. Place in the structure previously occupied by u16 core_req
- * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission
- * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left
- * for possible future use. */
-typedef struct base_jd_atom_v2 {
- u64 jc; /**< job-chain GPU address */
- struct base_jd_udata udata; /**< user data */
- u64 extres_list; /**< list of external resources */
- u16 nr_extres; /**< nr of external resources or JIT allocations */
- u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */
- struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field,
- this is done in order to reduce possibility of improper assigment of a dependency field */
- base_atom_id atom_number; /**< unique number to identify the atom */
- base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */
- u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
- u8 jobslot; /**< Job slot to use when BASE_JD_REQ_JOB_SLOT is specified */
- base_jd_core_req core_req; /**< core requirements */
-} base_jd_atom_v2;
-
-typedef enum base_external_resource_access {
+enum base_external_resource_access {
BASE_EXT_RES_ACCESS_SHARED,
BASE_EXT_RES_ACCESS_EXCLUSIVE
-} base_external_resource_access;
+};
-typedef struct base_external_resource {
+struct base_external_resource {
u64 ext_resource;
-} base_external_resource;
+};
/**
@@ -961,351 +374,32 @@ struct base_jd_debug_copy_buffer {
struct base_external_resource extres;
};
-/**
- * @brief Setter for a dependency structure
- *
- * @param[in] dep The kbase jd atom dependency to be initialized.
- * @param id The atom_id to be assigned.
- * @param dep_type The dep_type to be assigned.
- *
- */
-static inline void base_jd_atom_dep_set(struct base_dependency *dep,
- base_atom_id id, base_jd_dep_type dep_type)
-{
- LOCAL_ASSERT(dep != NULL);
-
- /*
- * make sure we don't set not allowed combinations
- * of atom_id/dependency_type.
- */
- LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
- (id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
-
- dep->atom_id = id;
- dep->dependency_type = dep_type;
-}
-
-/**
- * @brief Make a copy of a dependency structure
- *
- * @param[in,out] dep The kbase jd atom dependency to be written.
- * @param[in] from The dependency to make a copy from.
- *
- */
-static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
- const struct base_dependency *from)
-{
- LOCAL_ASSERT(dep != NULL);
-
- base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
-}
-
-/**
- * @brief Soft-atom fence trigger setup.
- *
- * Sets up an atom to be a SW-only atom signaling a fence
- * when it reaches the run state.
- *
- * Using the existing base dependency system the fence can
- * be set to trigger when a GPU job has finished.
- *
- * The base fence object must not be terminated until the atom
- * has been submitted to @ref base_jd_submit and @ref base_jd_submit
- * has returned.
- *
- * @a fence must be a valid fence set up with @a base_fence_init.
- * Calling this function with a uninitialized fence results in undefined behavior.
- *
- * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
- * @param[in] fence The base fence object to trigger.
- *
- * @pre @p fence must reference a @ref base_fence successfully initialized by
- * calling @ref base_fence_init.
- * @pre @p fence was @e not initialized by calling @ref base_fence_import, nor
- * is it associated with a fence-trigger job that was already submitted
- * by calling @ref base_jd_submit.
- * @post @p atom can be submitted by calling @ref base_jd_submit.
- */
-static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
-{
- LOCAL_ASSERT(atom);
- LOCAL_ASSERT(fence);
- LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
- LOCAL_ASSERT(fence->basep.stream_fd >= 0);
- atom->jc = (uintptr_t) fence;
- atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
-}
-
-/**
- * @brief Soft-atom fence wait setup.
- *
- * Sets up an atom to be a SW-only atom waiting on a fence.
- * When the fence becomes triggered the atom becomes runnable
- * and completes immediately.
- *
- * Using the existing base dependency system the fence can
- * be set to block a GPU job until it has been triggered.
- *
- * The base fence object must not be terminated until the atom
- * has been submitted to @ref base_jd_submit and
- * @ref base_jd_submit has returned.
- *
- * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
- * @param[in] fence The base fence object to wait on
- *
- * @pre @p fence must reference a @ref base_fence successfully initialized by
- * calling @ref base_fence_import, or it must be associated with a
- * fence-trigger job that was already submitted by calling
- * @ref base_jd_submit.
- * @post @p atom can be submitted by calling @ref base_jd_submit.
- */
-static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
-{
- LOCAL_ASSERT(atom);
- LOCAL_ASSERT(fence);
- LOCAL_ASSERT(fence->basep.fd >= 0);
- atom->jc = (uintptr_t) fence;
- atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
-}
-
-/**
- * @brief External resource info initialization.
- *
- * Sets up an external resource object to reference
- * a memory allocation and the type of access requested.
- *
- * @param[in] res The resource object to initialize
- * @param handle The handle to the imported memory object, must be
- * obtained by calling @ref base_mem_as_import_handle().
- * @param access The type of access requested
- */
-static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
-{
- u64 address;
-
- address = handle.basep.handle;
-
- LOCAL_ASSERT(res != NULL);
- LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
- LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
-
- res->ext_resource = address | (access & LOCAL_PAGE_LSB);
-}
-
-/**
- * @brief Job chain event code bits
- * Defines the bits used to create ::base_jd_event_code
- */
-enum {
- BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
- BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
- BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
- BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
- BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
- BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
- BASE_JD_SW_EVENT_RESERVED = (3u << 11), /**< Reserved event type */
- BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) /**< Mask to extract the type from an event code */
-};
-
-/**
- * @brief Job chain event codes
- *
- * HW and low-level SW events are represented by event codes.
- * The status of jobs which succeeded are also represented by
- * an event code (see ::BASE_JD_EVENT_DONE).
- * Events are usually reported as part of a ::base_jd_event.
- *
- * The event codes are encoded in the following way:
- * @li 10:0 - subtype
- * @li 12:11 - type
- * @li 13 - SW success (only valid if the SW bit is set)
- * @li 14 - SW event (HW event if not set)
- * @li 15 - Kernel event (should never be seen in userspace)
- *
- * Events are split up into ranges as follows:
- * - BASE_JD_EVENT_RANGE_\<description\>_START
- * - BASE_JD_EVENT_RANGE_\<description\>_END
- *
- * \a code is in \<description\>'s range when:
- * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
- *
- * Ranges can be asserted for adjacency by testing that the END of the previous
- * is equal to the START of the next. This is useful for optimizing some tests
- * for range.
- *
- * A limitation is that the last member of this enum must explicitly be handled
- * (with an assert-unreachable statement) in switch statements that use
- * variables of this type. Otherwise, the compiler warns that we have not
- * handled that enum value.
- */
-typedef enum base_jd_event_code {
- /* HW defined exceptions */
-
- /** Start of HW Non-fault status codes
- *
- * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
- * because the job was hard-stopped
- */
- BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
-
- /* non-fatal exceptions */
- BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
- BASE_JD_EVENT_DONE = 0x01,
- BASE_JD_EVENT_STOPPED = 0x03, /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
- BASE_JD_EVENT_TERMINATED = 0x04, /**< This is actually a fault status code - the job was hard stopped */
- BASE_JD_EVENT_ACTIVE = 0x08, /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
-
- /** End of HW Non-fault status codes
- *
- * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
- * because the job was hard-stopped
- */
- BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
-
- /** Start of HW fault and SW Error status codes */
- BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
-
- /* job exceptions */
- BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
- BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
- BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
- BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
- BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
- BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
- BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
- BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
- BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
- BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
- BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
- BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
- BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
- BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
- BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
- BASE_JD_EVENT_STATE_FAULT = 0x5A,
- BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
- BASE_JD_EVENT_UNKNOWN = 0x7F,
-
- /* GPU exceptions */
- BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
- BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
-
- /* MMU exceptions */
- BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
- BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
- BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
- BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
- BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
- BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
- BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
- BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
- BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
- BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
-
- /* SW defined exceptions */
- BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
- BASE_JD_EVENT_TIMED_OUT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
- BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
- BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
- BASE_JD_EVENT_PM_EVENT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
-
- BASE_JD_EVENT_BAG_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
-
- /** End of HW fault and SW Error status codes */
- BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
-
- /** Start of SW Success status codes */
- BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
-
- BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
- BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
- BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
-
- /** End of SW Success status codes */
- BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
-
- /** Start of Kernel-only status codes. Such codes are never returned to user-space */
- BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
- BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
-
- /** End of Kernel-only status codes. */
- BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
-} base_jd_event_code;
-
-/**
- * @brief Event reporting structure
- *
- * This structure is used by the kernel driver to report information
- * about GPU events. The can either be HW-specific events or low-level
- * SW events, such as job-chain completion.
- *
- * The event code contains an event type field which can be extracted
- * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
- *
- * Based on the event type base_jd_event::data holds:
- * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
- * job-chain
- * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
- * been completed (ie all contained job-chains have been completed).
- * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
- */
-typedef struct base_jd_event_v2 {
- base_jd_event_code event_code; /**< event code */
- base_atom_id atom_number; /**< the atom number that has completed */
- struct base_jd_udata udata; /**< user data */
-} base_jd_event_v2;
-
-/**
- * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
- *
- * This structure is stored into the memory pointed to by the @c jc field
- * of @ref base_jd_atom.
- *
- * It must not occupy the same CPU cache line(s) as any neighboring data.
- * This is to avoid cases where access to pages containing the structure
- * is shared between cached and un-cached memory regions, which would
- * cause memory corruption.
- */
-
-typedef struct base_dump_cpu_gpu_counters {
- u64 system_time;
- u64 cycle_counter;
- u64 sec;
- u32 usec;
- u8 padding[36];
-} base_dump_cpu_gpu_counters;
-
-/** @} end group base_user_api_job_dispatch */
-
#define GPU_MAX_JOB_SLOTS 16
/**
- * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ * User-side Base GPU Property Queries
*
- * The User-side Base GPU Property Query API encapsulates two
+ * The User-side Base GPU Property Query interface encapsulates two
* sub-modules:
*
- * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
- * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
- *
- * There is a related third module outside of Base, which is owned by the MIDG
- * module:
- * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ * - "Dynamic GPU Properties"
+ * - "Base Platform Config GPU Properties"
*
- * Base only deals with properties that vary between different Midgard
+ * Base only deals with properties that vary between different GPU
* implementations - the Dynamic GPU properties and the Platform Config
* properties.
*
- * For properties that are constant for the Midgard Architecture, refer to the
- * MIDG module. However, we will discuss their relevance here <b>just to
- * provide background information.</b>
+ * For properties that are constant for the GPU Architecture, refer to the
+ * GPU module. However, we will discuss their relevance here just to
+ * provide background information.
*
- * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ * About the GPU Properties in Base and GPU modules
*
- * The compile-time properties (Platform Config, Midgard Compile-time
+ * The compile-time properties (Platform Config, GPU Compile-time
* properties) are exposed as pre-processor macros.
*
* Complementing the compile-time properties are the Dynamic GPU
- * Properties, which act as a conduit for the Midgard Configuration
+ * Properties, which act as a conduit for the GPU Configuration
* Discovery.
*
* In general, the dynamic properties are present to verify that the platform
@@ -1314,13 +408,13 @@ typedef struct base_dump_cpu_gpu_counters {
*
* As a consistent guide across the entire DDK, the choice for dynamic or
* compile-time should consider the following, in order:
- * -# Can the code be written so that it doesn't need to know the
+ * 1. Can the code be written so that it doesn't need to know the
* implementation limits at all?
- * -# If you need the limits, get the information from the Dynamic Property
+ * 2. If you need the limits, get the information from the Dynamic Property
* lookup. This should be done once as you fetch the context, and then cached
* as part of the context data structure, so it's cheap to access.
- * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
- * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * 3. If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or GPU Compile-time
* property). Examples of where this might be sensible follow:
* - Part of a critical inner-loop
* - Frequent re-use throughout the driver, causing significant extra load
@@ -1333,25 +427,25 @@ typedef struct base_dump_cpu_gpu_counters {
* devices would benefit much more from a single DDK binary, instead of
* insignificant micro-optimizations.
*
- * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ * Dynamic GPU Properties
*
* Dynamic GPU properties are presented in two sets:
- * -# the commonly used properties in @ref base_gpu_props, which have been
+ * 1. the commonly used properties in @ref base_gpu_props, which have been
* unpacked from GPU register bitfields.
- * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
- * (also a member of @ref base_gpu_props). All of these are presented in
+ * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props
+ * (also a member of base_gpu_props). All of these are presented in
* the packed form, as presented by the GPU registers themselves.
*
- * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * The raw properties in gpu_raw_gpu_props are necessary to
* allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
* behaving differently?". In this case, all information about the
- * configuration is potentially useful, but it <b>does not need to be processed
- * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * configuration is potentially useful, but it does not need to be processed
+ * by the driver. Instead, the raw registers can be processed by the Mali
* Tools software on the host PC.
*
- * The properties returned extend the Midgard Configuration Discovery
- * registers. For example, GPU clock speed is not specified in the Midgard
- * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ * The properties returned extend the GPU Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the GPU
+ * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function.
*
* The GPU properties are obtained by a call to
* base_get_gpu_props(). This simply returns a pointer to a const
@@ -1365,12 +459,12 @@ typedef struct base_dump_cpu_gpu_counters {
* base_context.
*
*
- * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ * Kernel Operation
*
* During Base Context Create time, user-side makes a single kernel call:
* - A call to fill user memory with GPU information structures
*
- * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * The kernel-side will fill the provided the entire processed base_gpu_props
* structure, because this information is required in both
* user and kernel side; it does not make sense to decode it twice.
*
@@ -1379,7 +473,8 @@ typedef struct base_dump_cpu_gpu_counters {
* be known kernel-side, to support chains that specify a 'Only Coherent Group'
* SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
*
- * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Coherency Group calculation
+ *
* Creation of the coherent group data is done at device-driver startup, and so
* is one-time. This will most likely involve a loop with CLZ, shifting, and
* bit clearing on the L2_PRESENT mask, depending on whether the
@@ -1393,16 +488,6 @@ typedef struct base_dump_cpu_gpu_counters {
* 16 coherent groups, since core groups are typically 4 cores.
*/
-/**
- * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
- * @{
- */
-
-/**
- * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
- * @{
- */
-
#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
#define BASE_MAX_COHERENT_GROUPS 16
@@ -1507,7 +592,7 @@ struct mali_base_gpu_thread_props {
};
/**
- * @brief descriptor for a coherent group
+ * struct mali_base_gpu_coherent_group - descriptor for a coherent group
*
* \c core_mask exposes all cores in that coherent group, and \c num_cores
* provides a cached population-count for that mask.
@@ -1524,7 +609,7 @@ struct mali_base_gpu_coherent_group {
};
/**
- * @brief Coherency group information
+ * struct mali_base_gpu_coherent_group_info - Coherency group information
*
* Note that the sizes of the members could be reduced. However, the \c group
* member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
@@ -1549,7 +634,7 @@ struct mali_base_gpu_coherent_group_info {
u32 num_core_groups;
/**
- * Coherency features of the memory, accessed by @ref gpu_mem_features
+ * Coherency features of the memory, accessed by gpu_mem_features
* methods
*/
u32 coherency;
@@ -1563,18 +648,18 @@ struct mali_base_gpu_coherent_group_info {
};
/**
- * A complete description of the GPU's Hardware Configuration Discovery
- * registers.
+ * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware
+ * Configuration Discovery registers.
*
* The information is presented inefficiently for access. For frequent access,
* the values should be better expressed in an unpacked form in the
* base_gpu_props structure.
*
- * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * The raw properties in gpu_raw_gpu_props are necessary to
* allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
* behaving differently?". In this case, all information about the
- * configuration is potentially useful, but it <b>does not need to be processed
- * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * configuration is potentially useful, but it does not need to be processed
+ * by the driver. Instead, the raw registers can be processed by the Mali
* Tools software on the host PC.
*
*/
@@ -1613,95 +698,65 @@ struct gpu_raw_gpu_props {
};
/**
- * Return structure for base_get_gpu_props().
+ * struct base_gpu_props - Return structure for base_get_gpu_props().
*
* NOTE: the raw_props member in this data structure contains the register
* values from which the value of the other members are derived. The derived
* members exist to allow for efficient access and/or shielding the details
* of the layout of the registers.
*
+ * @unused_1: Keep for backwards compatibility.
+ * @raw_props: This member is large, likely to be 128 bytes.
+ * @coherency_info: This must be last member of the structure.
*/
-typedef struct base_gpu_props {
+struct base_gpu_props {
struct mali_base_gpu_core_props core_props;
struct mali_base_gpu_l2_cache_props l2_props;
- u64 unused_1; /* keep for backwards compatibility */
+ u64 unused_1;
struct mali_base_gpu_tiler_props tiler_props;
struct mali_base_gpu_thread_props thread_props;
-
- /** This member is large, likely to be 128 bytes */
struct gpu_raw_gpu_props raw_props;
-
- /** This must be last member of the structure */
struct mali_base_gpu_coherent_group_info coherency_info;
-} base_gpu_props;
-
-/** @} end group base_user_api_gpuprops_dyn */
-
-/** @} end group base_user_api_gpuprops */
+};
-/**
- * @addtogroup base_user_api_core User-side Base core APIs
- * @{
- */
+#include "jm/mali_base_jm_kernel.h"
/**
- * Flags to pass to ::base_context_init.
- * Flags can be ORed together to enable multiple things.
+ * base_mem_group_id_get() - Get group ID from flags
+ * @flags: Flags to pass to base_mem_alloc
*
- * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
- * not collide with them.
- */
-typedef u32 base_context_create_flags;
-
-/** No flags set */
-#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
-
-/** Base context is embedded in a cctx object (flag used for CINSTR
- * software counter macros)
- */
-#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
-
-/** Base context is a 'System Monitor' context for Hardware counters.
+ * This inline function extracts the encoded group ID from flags
+ * and converts it into numeric value (0~15).
*
- * One important side effect of this is that job submission is disabled.
- */
-#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
- ((base_context_create_flags)1 << 1)
-
-
-/* Bit-shift used to encode a memory group ID in base_context_create_flags
- */
-#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
-
-/* Bitmask used to encode a memory group ID in base_context_create_flags
- */
-#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
- ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
-
-/* Bitpattern describing the base_context_create_flags that can be
- * passed to the kernel
- */
-#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
- (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
- BASEP_CONTEXT_MMU_GROUP_ID_MASK)
-
-/* Bitpattern describing the ::base_context_create_flags that can be
- * passed to base_context_init()
+ * Return: group ID(0~15) extracted from the parameter
*/
-#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
- (BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
+static inline int base_mem_group_id_get(base_mem_alloc_flags flags)
+{
+ LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0);
+ return (int)((flags & BASE_MEM_GROUP_ID_MASK) >>
+ BASEP_MEM_GROUP_ID_SHIFT);
+}
-/*
- * Private flags used on the base context
+/**
+ * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags
+ * @id: group ID(0~15) you want to encode
*
- * These start at bit 31, and run down to zero.
+ * This inline function encodes specific group ID into base_mem_alloc_flags.
+ * Parameter 'id' should lie in-between 0 to 15.
+ *
+ * Return: base_mem_alloc_flags with the group ID (id) encoded
*
- * They share the same space as @ref base_context_create_flags, and so must
- * not collide with them.
+ * The return value can be combined with other flags against base_mem_alloc
+ * to identify a specific memory group.
*/
-/** Private flag tracking whether job descriptor dumping is disabled */
-#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \
- ((base_context_create_flags)(1 << 31))
+static inline base_mem_alloc_flags base_mem_group_id_set(int id)
+{
+ LOCAL_ASSERT(id >= 0);
+ LOCAL_ASSERT(id < BASE_MEM_GROUP_COUNT);
+
+ return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) &
+ BASE_MEM_GROUP_ID_MASK;
+}
/**
* base_context_mmu_group_id_set - Encode a memory group ID in
@@ -1741,46 +796,7 @@ static inline int base_context_mmu_group_id_get(
BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
}
-/** @} end group base_user_api_core */
-
-/** @} end group base_user_api */
-
-/**
- * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
- * @{
- *
- * C Pre-processor macros are exposed here to do with Platform
- * Config.
- *
- * These include:
- * - GPU Properties that are constant on a particular Midgard Family
- * Implementation e.g. Maximum samples per pixel on Mali-T600.
- * - General platform config for the GPU, such as the GPU major and minor
- * revison.
- */
-
-/** @} end group base_plat_config_gpuprops */
-
-/**
- * @addtogroup base_api Base APIs
- * @{
- */
-
-/** @} end group base_api */
-
-/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
- * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
-#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
-
-/* Indicate that job dumping is enabled. This could affect certain timers
- * to account for the performance impact. */
-#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
-
-
-#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
- BASE_TLSTREAM_JOB_DUMPING_ENABLED)
-
-/**
+/*
* A number of bit flags are defined for requesting cpu_gpu_timeinfo. These
* flags are also used, where applicable, for specifying which fields
* are valid following the request operation.
@@ -1804,5 +820,4 @@ static inline int base_context_mmu_group_id_get(
BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \
BASE_TIMEINFO_USER_SOURCE_FLAG)
-
#endif /* _BASE_KERNEL_H_ */
diff --git a/mali_kbase/mali_base_mem_priv.h b/mali_kbase/mali_base_mem_priv.h
index 52c8a4f..844a025 100644
--- a/mali_kbase/mali_base_mem_priv.h
+++ b/mali_kbase/mali_base_mem_priv.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -47,7 +47,7 @@
* - offset is ignored.
*/
struct basep_syncset {
- base_mem_handle mem_handle;
+ struct base_mem_handle mem_handle;
u64 user_addr;
u64 size;
u8 type;
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index 8a5088c..66e4349 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -77,6 +77,8 @@
#include <trace/events/gpu.h>
#endif
+#include "mali_linux_trace.h"
+
#ifndef u64_to_user_ptr
/* Introduced in Linux v4.6 */
@@ -155,7 +157,6 @@ int assign_irqs(struct kbase_device *kbdev);
int kbase_sysfs_init(struct kbase_device *kbdev);
void kbase_sysfs_term(struct kbase_device *kbdev);
-void kbase_device_debugfs_term(struct kbase_device *kbdev);
int kbase_protected_mode_init(struct kbase_device *kbdev);
void kbase_protected_mode_term(struct kbase_device *kbdev);
@@ -163,7 +164,17 @@ void kbase_protected_mode_term(struct kbase_device *kbdev);
int power_control_init(struct kbase_device *kbdev);
void power_control_term(struct kbase_device *kbdev);
+#ifdef CONFIG_DEBUG_FS
+void kbase_device_debugfs_term(struct kbase_device *kbdev);
int kbase_device_debugfs_init(struct kbase_device *kbdev);
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+ return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
int registers_map(struct kbase_device *kbdev);
void registers_unmap(struct kbase_device *kbdev);
@@ -219,9 +230,6 @@ void kbase_jd_zap_context(struct kbase_context *kctx);
bool jd_done_nolock(struct kbase_jd_atom *katom,
struct list_head *completed_jobs_ctx);
void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
-bool jd_submit_atom(struct kbase_context *kctx,
- const struct base_jd_atom_v2 *user_atom,
- struct kbase_jd_atom *katom);
void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
/**
@@ -251,6 +259,22 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done);
void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
struct kbase_jd_atom *katom);
+/**
+ * kbase_job_slot_softstop_start_rp() - Soft-stop the atom at the start
+ * of a renderpass.
+ * @kctx: Pointer to a kernel base context.
+ * @reg: Reference of a growable GPU memory region in the same context.
+ * Takes ownership of the reference if successful.
+ *
+ * Used to switch to incremental rendering if we have nearly run out of
+ * virtual address space in a growable memory region and the atom currently
+ * executing on a job slot is the tiler job chain at the start of a renderpass.
+ *
+ * Return 0 if successful, otherwise a negative error code.
+ */
+int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx,
+ struct kbase_va_region *reg);
+
void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom);
void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
@@ -282,6 +306,16 @@ void kbase_event_wakeup(struct kbase_context *kctx);
*/
int kbasep_jit_alloc_validate(struct kbase_context *kctx,
struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_retry_pending_alloc() - Retry blocked just-in-time memory
+ * allocations.
+ *
+ * @kctx: Pointer to the kbase context within which the just-in-time
+ * memory allocations are to be retried.
+ */
+void kbase_jit_retry_pending_alloc(struct kbase_context *kctx);
+
/**
* kbase_free_user_buffer() - Free memory allocated for struct
* @kbase_debug_copy_buffer.
diff --git a/mali_kbase/mali_kbase_10969_workaround.c b/mali_kbase/mali_kbase_10969_workaround.c
deleted file mode 100644
index 118511a..0000000
--- a/mali_kbase/mali_kbase_10969_workaround.c
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2013-2015,2017-2018 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-#include <linux/dma-mapping.h>
-#include <mali_kbase.h>
-#include <mali_kbase_10969_workaround.h>
-
-/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
-#define X_COORDINATE_MASK 0x00000FFF
-#define Y_COORDINATE_MASK 0x0FFF0000
-/* Max number of words needed from the fragment shader job descriptor */
-#define JOB_HEADER_SIZE_IN_WORDS 10
-#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
-
-/* Word 0: Status Word */
-#define JOB_DESC_STATUS_WORD 0
-/* Word 1: Restart Index */
-#define JOB_DESC_RESTART_INDEX_WORD 1
-/* Word 2: Fault address low word */
-#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
-/* Word 8: Minimum Tile Coordinates */
-#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
-/* Word 9: Maximum Tile Coordinates */
-#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
-
-int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
-{
- struct device *dev = katom->kctx->kbdev->dev;
- u32 clamped = 0;
- struct kbase_va_region *region;
- struct tagged_addr *page_array;
- u64 page_index;
- u32 offset = katom->jc & (~PAGE_MASK);
- u32 *page_1 = NULL;
- u32 *page_2 = NULL;
- u32 job_header[JOB_HEADER_SIZE_IN_WORDS];
- void *dst = job_header;
- u32 minX, minY, maxX, maxY;
- u32 restartX, restartY;
- struct page *p;
- u32 copy_size;
-
- dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
- if (!(katom->core_req & BASE_JD_REQ_FS))
- return 0;
-
- kbase_gpu_vm_lock(katom->kctx);
- region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
- katom->jc);
- if (kbase_is_region_invalid_or_free(region))
- goto out_unlock;
-
- page_array = kbase_get_cpu_phy_pages(region);
- if (!page_array)
- goto out_unlock;
-
- page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
-
- p = as_page(page_array[page_index]);
-
- /* we need the first 10 words of the fragment shader job descriptor.
- * We need to check that the offset + 10 words is less that the page
- * size otherwise we need to load the next page.
- * page_size_overflow will be equal to 0 in case the whole descriptor
- * is within the page > 0 otherwise.
- */
- copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
-
- page_1 = kmap_atomic(p);
-
- /* page_1 is a u32 pointer, offset is expressed in bytes */
- page_1 += offset>>2;
-
- kbase_sync_single_for_cpu(katom->kctx->kbdev,
- kbase_dma_addr(p) + offset,
- copy_size, DMA_BIDIRECTIONAL);
-
- memcpy(dst, page_1, copy_size);
-
- /* The data needed overflows page the dimension,
- * need to map the subsequent page */
- if (copy_size < JOB_HEADER_SIZE) {
- p = as_page(page_array[page_index + 1]);
- page_2 = kmap_atomic(p);
-
- kbase_sync_single_for_cpu(katom->kctx->kbdev,
- kbase_dma_addr(p),
- JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
-
- memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
- }
-
- /* We managed to correctly map one or two pages (in case of overflow) */
- /* Get Bounding Box data and restart index from fault address low word */
- minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
- minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
- maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
- maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
- restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
- restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
-
- dev_warn(dev, "Before Clamping:\n"
- "Jobstatus: %08x\n"
- "restartIdx: %08x\n"
- "Fault_addr_low: %08x\n"
- "minCoordsX: %08x minCoordsY: %08x\n"
- "maxCoordsX: %08x maxCoordsY: %08x\n",
- job_header[JOB_DESC_STATUS_WORD],
- job_header[JOB_DESC_RESTART_INDEX_WORD],
- job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
- minX, minY,
- maxX, maxY);
-
- /* Set the restart index to the one which generated the fault*/
- job_header[JOB_DESC_RESTART_INDEX_WORD] =
- job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
-
- if (restartX < minX) {
- job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
- dev_warn(dev,
- "Clamping restart X index to minimum. %08x clamped to %08x\n",
- restartX, minX);
- clamped = 1;
- }
- if (restartY < minY) {
- job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
- dev_warn(dev,
- "Clamping restart Y index to minimum. %08x clamped to %08x\n",
- restartY, minY);
- clamped = 1;
- }
- if (restartX > maxX) {
- job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
- dev_warn(dev,
- "Clamping restart X index to maximum. %08x clamped to %08x\n",
- restartX, maxX);
- clamped = 1;
- }
- if (restartY > maxY) {
- job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
- dev_warn(dev,
- "Clamping restart Y index to maximum. %08x clamped to %08x\n",
- restartY, maxY);
- clamped = 1;
- }
-
- if (clamped) {
- /* Reset the fault address low word
- * and set the job status to STOPPED */
- job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
- job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
- dev_warn(dev, "After Clamping:\n"
- "Jobstatus: %08x\n"
- "restartIdx: %08x\n"
- "Fault_addr_low: %08x\n"
- "minCoordsX: %08x minCoordsY: %08x\n"
- "maxCoordsX: %08x maxCoordsY: %08x\n",
- job_header[JOB_DESC_STATUS_WORD],
- job_header[JOB_DESC_RESTART_INDEX_WORD],
- job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
- minX, minY,
- maxX, maxY);
-
- /* Flush CPU cache to update memory for future GPU reads*/
- memcpy(page_1, dst, copy_size);
- p = as_page(page_array[page_index]);
-
- kbase_sync_single_for_device(katom->kctx->kbdev,
- kbase_dma_addr(p) + offset,
- copy_size, DMA_TO_DEVICE);
-
- if (copy_size < JOB_HEADER_SIZE) {
- memcpy(page_2, dst + copy_size,
- JOB_HEADER_SIZE - copy_size);
- p = as_page(page_array[page_index + 1]);
-
- kbase_sync_single_for_device(katom->kctx->kbdev,
- kbase_dma_addr(p),
- JOB_HEADER_SIZE - copy_size,
- DMA_TO_DEVICE);
- }
- }
- if (copy_size < JOB_HEADER_SIZE)
- kunmap_atomic(page_2);
-
- kunmap_atomic(page_1);
-
-out_unlock:
- kbase_gpu_vm_unlock(katom->kctx);
- return clamped;
-}
diff --git a/mali_kbase/mali_kbase_10969_workaround.h b/mali_kbase/mali_kbase_10969_workaround.h
deleted file mode 100644
index 379a05a..0000000
--- a/mali_kbase/mali_kbase_10969_workaround.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2013-2014, 2018 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-#ifndef _KBASE_10969_WORKAROUND_
-#define _KBASE_10969_WORKAROUND_
-
-/**
- * kbasep_10969_workaround_clamp_coordinates - Apply the WA to clamp the restart indices
- * @katom: atom representing the fragment job for which the WA has to be applied
- *
- * This workaround is used to solve an HW issue with single iterator GPUs.
- * If a fragment job is soft-stopped on the edge of its bounding box, it can happen
- * that the restart index is out of bounds and the rerun causes a tile range
- * fault. If this happens we try to clamp the restart index to a correct value.
- */
-int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
-
-#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index a4c72da..e079281 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -200,5 +200,14 @@ enum {
*/
#define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024)
+/**
+ * Default threshold at which to switch to incremental rendering
+ *
+ * Fraction of the maximum size of an allocation that grows on GPU page fault
+ * that can be used up before the driver switches to incremental rendering,
+ * in 256ths. 0 means disable incremental rendering.
+ */
+#define DEFAULT_IR_THRESHOLD (192)
+
#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index fe2ae0e..3f3d5cc 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -48,6 +48,9 @@
#include <mali_kbase_hwaccess_backend.h>
#include <mali_kbase_hwaccess_time.h>
#include <mali_kbase_hwaccess_jm.h>
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+#include <mali_kbase_hwaccess_instr.h>
+#endif
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_reset_gpu.h>
#include <backend/gpu/mali_kbase_device_internal.h>
@@ -672,12 +675,12 @@ static int kbase_api_set_flags(struct kbase_file *kfile,
js_kctx_info = &kctx->jctx.sched_info;
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
-
/* Translate the flags */
if ((flags->create_flags &
BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
}
@@ -918,18 +921,43 @@ static int kbase_api_get_ddk_version(struct kbase_context *kctx,
return len;
}
-/* Defaults for legacy JIT init ioctl */
+/* Defaults for legacy just-in-time memory allocator initialization
+ * kernel calls
+ */
#define DEFAULT_MAX_JIT_ALLOCATIONS 255
#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */
-static int kbase_api_mem_jit_init_old(struct kbase_context *kctx,
- struct kbase_ioctl_mem_jit_init_old *jit_init)
+static int kbase_api_mem_jit_init_10_2(struct kbase_context *kctx,
+ struct kbase_ioctl_mem_jit_init_10_2 *jit_init)
{
kctx->jit_version = 1;
+ /* since no phys_pages parameter, use the maximum: va_pages */
return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
DEFAULT_MAX_JIT_ALLOCATIONS,
- JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT);
+ JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT,
+ jit_init->va_pages);
+}
+
+static int kbase_api_mem_jit_init_11_5(struct kbase_context *kctx,
+ struct kbase_ioctl_mem_jit_init_11_5 *jit_init)
+{
+ int i;
+
+ kctx->jit_version = 2;
+
+ for (i = 0; i < sizeof(jit_init->padding); i++) {
+ /* Ensure all padding bytes are 0 for potential future
+ * extension
+ */
+ if (jit_init->padding[i])
+ return -EINVAL;
+ }
+
+ /* since no phys_pages parameter, use the maximum: va_pages */
+ return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
+ jit_init->max_allocations, jit_init->trim_level,
+ jit_init->group_id, jit_init->va_pages);
}
static int kbase_api_mem_jit_init(struct kbase_context *kctx,
@@ -937,7 +965,7 @@ static int kbase_api_mem_jit_init(struct kbase_context *kctx,
{
int i;
- kctx->jit_version = 2;
+ kctx->jit_version = 3;
for (i = 0; i < sizeof(jit_init->padding); i++) {
/* Ensure all padding bytes are 0 for potential future
@@ -949,7 +977,7 @@ static int kbase_api_mem_jit_init(struct kbase_context *kctx,
return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
jit_init->max_allocations, jit_init->trim_level,
- jit_init->group_id);
+ jit_init->group_id, jit_init->phys_pages);
}
static int kbase_api_mem_exec_init(struct kbase_context *kctx,
@@ -1381,10 +1409,16 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_get_ddk_version,
kctx);
break;
- case KBASE_IOCTL_MEM_JIT_INIT_OLD:
- KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_OLD,
- kbase_api_mem_jit_init_old,
- struct kbase_ioctl_mem_jit_init_old,
+ case KBASE_IOCTL_MEM_JIT_INIT_10_2:
+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_10_2,
+ kbase_api_mem_jit_init_10_2,
+ struct kbase_ioctl_mem_jit_init_10_2,
+ kctx);
+ break;
+ case KBASE_IOCTL_MEM_JIT_INIT_11_5:
+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_11_5,
+ kbase_api_mem_jit_init_11_5,
+ struct kbase_ioctl_mem_jit_init_11_5,
kctx);
break;
case KBASE_IOCTL_MEM_JIT_INIT:
@@ -1476,12 +1510,14 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_mem_profile_add,
kctx);
break;
+
case KBASE_IOCTL_SOFT_EVENT_UPDATE:
KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE,
kbase_api_soft_event_update,
struct kbase_ioctl_soft_event_update,
kctx);
break;
+
case KBASE_IOCTL_STICKY_RESOURCE_MAP:
KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP,
kbase_api_sticky_resource_map,
@@ -1563,7 +1599,7 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_tlstream_stats,
kctx);
break;
-#endif
+#endif /* MALI_UNIT_TEST */
}
dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd));
@@ -3692,6 +3728,9 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
kbasep_gpu_memory_debugfs_init(kbdev);
kbase_as_fault_debugfs_init(kbdev);
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+ kbase_instr_backend_debugfs_init(kbdev);
+#endif
/* fops_* variables created by invocations of macro
* MAKE_QUIRK_ACCESSORS() above. */
debugfs_create_file("quirks_sc", 0644,
@@ -3746,7 +3785,6 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
kbdev->mali_debugfs_directory, kbdev,
&kbasep_serialize_jobs_debugfs_fops);
-
return 0;
out:
@@ -3758,14 +3796,6 @@ void kbase_device_debugfs_term(struct kbase_device *kbdev)
{
debugfs_remove_recursive(kbdev->mali_debugfs_directory);
}
-
-#else /* CONFIG_DEBUG_FS */
-static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
-{
- return 0;
-}
-
-static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
#endif /* CONFIG_DEBUG_FS */
#endif /* MALI_KBASE_BUILD */
@@ -3961,8 +3991,13 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
dev_set_drvdata(kbdev->dev, kbdev);
err = kbase_device_init(kbdev);
+
if (err) {
- dev_err(kbdev->dev, "Device initialization failed\n");
+ if (err == -EPROBE_DEFER)
+ dev_err(kbdev->dev, "Device initialization Deferred\n");
+ else
+ dev_err(kbdev->dev, "Device initialization failed\n");
+
dev_set_drvdata(kbdev->dev, NULL);
kbase_device_free(kbdev);
} else {
@@ -3970,6 +4005,7 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
dev_info(kbdev->dev,
"Probed as %s\n", dev_name(kbdev->mdev.this_device));
#endif /* MALI_KBASE_BUILD */
+ kbase_increment_device_id();
}
return err;
@@ -4212,14 +4248,12 @@ MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
__stringify(BASE_UK_VERSION_MAJOR) "." \
__stringify(BASE_UK_VERSION_MINOR) ")");
-#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
-#define CREATE_TRACE_POINTS
-#endif
-#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define CREATE_TRACE_POINTS
/* Create the trace points (otherwise we just get code to call a tracepoint) */
#include "mali_linux_trace.h"
+#ifdef CONFIG_MALI_GATOR_SUPPORT
EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
diff --git a/mali_kbase/mali_kbase_cs_experimental.h b/mali_kbase/mali_kbase_cs_experimental.h
index b68a105..e1fffc3 100644
--- a/mali_kbase/mali_kbase_cs_experimental.h
+++ b/mali_kbase/mali_kbase_cs_experimental.h
@@ -21,7 +21,7 @@
*//* SPDX-License-Identifier: GPL-2.0 */
/*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,36 +35,18 @@
#include <linux/kernel.h>
-#if MALI_CS_EXPERIMENTAL
-
-/**
- * mali_kbase_has_cs_experimental() - Has the driver been built with
- * CS_EXPERIMENTAL=y
- *
- * It is preferable to guard cs_experimental code with this rather than #ifdef
- * all through the code.
- *
- * Return: true if built with CS_EXPERIMENTAL false otherwise
- */
-static inline bool mali_kbase_has_cs_experimental(void)
-{
- return true;
-}
-#else
-static inline bool mali_kbase_has_cs_experimental(void)
-{
- return false;
-}
-#endif
-
/**
- * mali_kbase_print_cs_experimental() - Print a string if built with
- * CS_EXPERIMENTAL=y
+ * mali_kbase_print_cs_experimental() - Print a string for every Core Services
+ * experimental feature that is enabled
*/
static inline void mali_kbase_print_cs_experimental(void)
{
- if (mali_kbase_has_cs_experimental())
- pr_info("mali_kbase: EXPERIMENTAL (MALI_CS_EXPERIMENTAL) flag enabled");
+#if MALI_JIT_PRESSURE_LIMIT
+ pr_info("mali_kbase: JIT_PRESSURE_LIMIT (experimental) enabled");
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+#if MALI_INCREMENTAL_RENDERING
+ pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled");
+#endif /* MALI_INCREMENTAL_RENDERING */
}
#endif /* _KBASE_CS_EXPERIMENTAL_H_ */
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index 35853a3..3922260 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -145,8 +145,14 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
lockdep_assert_held(&kbdev->hwaccess_lock);
- if (atomic_dec_return(&kctx->refcount) == 0)
+ if (atomic_dec_return(&kctx->refcount) == 0) {
kbdev->as_free |= (1u << kctx->as_nr);
+ if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
+ kbdev->as_to_kctx[kctx->as_nr] = NULL;
+ kctx->as_nr = KBASEP_AS_NR_INVALID;
+ kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT);
+ }
+ }
}
void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
@@ -186,6 +192,8 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
kbase_mmu_update(kbdev, &kctx->mmu,
kctx->as_nr);
+ kbase_ctx_flag_clear(kctx,
+ KCTX_AS_DISABLED_ON_FAULT);
} else {
/* This context might have been assigned an
* AS before, clear it.
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 059d850..ce32b53 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -89,45 +89,10 @@
#endif /* CONFIG_MALI_DEBUG */
#endif /* KBASE_TRACE_ENABLE */
-/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
-#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
-
-/**
- * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
- * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
- * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
- * before resetting.
- */
-#define ZAP_TIMEOUT 1000
-
/** Number of milliseconds before we time out on a GPU soft/hard reset */
#define RESET_TIMEOUT 500
/**
- * Prevent soft-stops from occuring in scheduling situations
- *
- * This is not due to HW issues, but when scheduling is desired to be more predictable.
- *
- * Therefore, soft stop may still be disabled due to HW issues.
- *
- * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
- *
- * @note if not in use, define this value to 0 instead of \#undef'ing it
- */
-#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
-
-/**
- * Prevent hard-stops from occuring in scheduling situations
- *
- * This is not due to HW issues, but when scheduling is desired to be more predictable.
- *
- * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
- *
- * @note if not in use, define this value to 0 instead of \#undef'ing it
- */
-#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
-
-/**
* The maximum number of Job Slots to support in the Hardware.
*
* You can optimize this down if your target devices will only ever support a
@@ -177,61 +142,6 @@
*/
#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((32 * 1024ul * 1024ul) >> \
PAGE_SHIFT)
-
-/** Atom has been previously soft-stoppped */
-#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
-/** Atom has been previously retried to execute */
-#define KBASE_KATOM_FLAGS_RERUN (1<<2)
-/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps to
- * disambiguate short-running job chains during soft/hard stopping of jobs
- */
-#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
-/** Atom has been previously hard-stopped. */
-#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
-/** Atom has caused us to enter disjoint state */
-#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
-/* Atom blocked on cross-slot dependency */
-#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
-/* Atom has fail dependency on cross-slot dependency */
-#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
-/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
-#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
-/* Atom is currently holding a context reference */
-#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
-/* Atom requires GPU to be in protected mode */
-#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
-/* Atom has been stored in runnable_tree */
-#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
-/* Atom is waiting for L2 caches to power up in order to enter protected mode */
-#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13)
-
-/* SW related flags about types of JS_COMMAND action
- * NOTE: These must be masked off by JS_COMMAND_MASK */
-
-/** This command causes a disjoint event */
-#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
-
-/** Bitmask of all SW related flags */
-#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT)
-
-#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
-#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
-#endif
-
-/** Soft-stop command that causes a Disjoint event. This of course isn't
- * entirely masked off by JS_COMMAND_MASK */
-#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
- (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
-
-#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
-
-/* Serialize atoms within a slot (ie only one atom per job slot) */
-#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0)
-/* Serialize atoms between slots (ie only one job slot running at any time) */
-#define KBASE_SERIALIZE_INTER_SLOT (1 << 1)
-/* Reset the GPU after each atom completion */
-#define KBASE_SERIALIZE_RESET (1 << 2)
-
/* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer
* clients, to reduce undesired system load.
* If a virtualizer client requests a dump within this threshold period after
@@ -259,47 +169,6 @@ struct kbase_as;
struct kbase_mmu_setup;
struct kbase_ipa_model_vinstr_data;
-#ifdef CONFIG_DEBUG_FS
-/**
- * struct base_job_fault_event - keeps track of the atom which faulted or which
- * completed after the faulty atom but before the
- * debug data for faulty atom was dumped.
- *
- * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for the
- * atom which faulted.
- * @katom: pointer to the atom for which job fault occurred or which completed
- * after the faulty atom.
- * @job_fault_work: work item, queued only for the faulty atom, which waits for
- * the dumping to get completed and then does the bottom half
- * of job done for the atoms which followed the faulty atom.
- * @head: List head used to store the atom in the global list of faulty
- * atoms or context specific list of atoms which got completed
- * during the dump.
- * @reg_offset: offset of the register to be dumped next, only applicable for
- * the faulty atom.
- */
-struct base_job_fault_event {
-
- u32 event_code;
- struct kbase_jd_atom *katom;
- struct work_struct job_fault_work;
- struct list_head head;
- int reg_offset;
-};
-
-#endif
-
-/**
- * struct kbase_jd_atom_dependency - Contains the dependency info for an atom.
- * @atom: pointer to the dependee atom.
- * @dep_type: type of dependency on the dependee @atom, i.e. order or data
- * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency.
- */
-struct kbase_jd_atom_dependency {
- struct kbase_jd_atom *atom;
- u8 dep_type;
-};
-
/**
* struct kbase_io_access - holds information about 1 register access
*
@@ -334,418 +203,6 @@ struct kbase_io_history {
};
/**
- * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the
- * dependee atom.
- * @dep: pointer to the dependency info structure.
- *
- * Return: readonly reference to dependee atom.
- */
-static inline const struct kbase_jd_atom *
-kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
-{
- LOCAL_ASSERT(dep != NULL);
-
- return (const struct kbase_jd_atom *)(dep->atom);
-}
-
-/**
- * kbase_jd_katom_dep_type - Retrieves the dependency type info
- *
- * @dep: pointer to the dependency info structure.
- *
- * Return: the type of dependency there is on the dependee atom.
- */
-static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
-{
- LOCAL_ASSERT(dep != NULL);
-
- return dep->dep_type;
-}
-
-/**
- * kbase_jd_katom_dep_set - sets up the dependency info structure
- * as per the values passed.
- * @const_dep: pointer to the dependency info structure to be setup.
- * @a: pointer to the dependee atom.
- * @type: type of dependency there is on the dependee atom.
- */
-static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
- struct kbase_jd_atom *a, u8 type)
-{
- struct kbase_jd_atom_dependency *dep;
-
- LOCAL_ASSERT(const_dep != NULL);
-
- dep = (struct kbase_jd_atom_dependency *)const_dep;
-
- dep->atom = a;
- dep->dep_type = type;
-}
-
-/**
- * kbase_jd_katom_dep_clear - resets the dependency info structure
- *
- * @const_dep: pointer to the dependency info structure to be setup.
- */
-static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
-{
- struct kbase_jd_atom_dependency *dep;
-
- LOCAL_ASSERT(const_dep != NULL);
-
- dep = (struct kbase_jd_atom_dependency *)const_dep;
-
- dep->atom = NULL;
- dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
-}
-
-/**
- * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it becomes
- * runnable, with respect to job slot ringbuffer/fifo.
- * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, which
- * implies that either atom has not become runnable
- * due to dependency or has completed the execution
- * on GPU.
- * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is blocked
- * due to cross slot dependency, can't be submitted to GPU.
- * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot fifo but
- * is waiting for the completion of previously added atoms
- * in current & other slots, as their protected mode
- * requirements do not match with the current atom.
- * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo and is
- * waiting for completion of protected mode transition,
- * needed before the atom is submitted to GPU.
- * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is waiting
- * for the cores, which are needed to execute the job
- * chain represented by the atom, to become available
- * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to GPU.
- * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted to GPU.
- * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some failure,
- * but only after the previously added atoms in fifo
- * have completed or have also been returned to JS.
- */
-enum kbase_atom_gpu_rb_state {
- KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
- KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
- KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
- KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
- KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
- KBASE_ATOM_GPU_RB_READY,
- KBASE_ATOM_GPU_RB_SUBMITTED,
- KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
-};
-
-/**
- * enum kbase_atom_enter_protected_state - The state of an atom with respect to the
- * preparation for GPU's entry into protected mode, becomes
- * pertinent only after atom's state with respect to slot
- * ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
- * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any atoms
- * currently submitted to GPU and protected mode transition is
- * not already in progress.
- * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to
- * become disabled before entry into protected mode.
- * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
- * for the coherency change. L2 shall be powered down and GPU shall
- * come out of fully coherent mode before entering protected mode.
- * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change;
- * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on so that
- * coherency register contains correct value when GPU enters
- * protected mode.
- * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for BASE_HW_ISSUE_TGOX_R1_1234 check
- * that L2 is powered up and switch GPU to protected mode.
- */
-enum kbase_atom_enter_protected_state {
- /**
- * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK.
- */
- KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
- KBASE_ATOM_ENTER_PROTECTED_HWCNT,
- KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
- KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY,
- KBASE_ATOM_ENTER_PROTECTED_FINISHED,
-};
-
-/**
- * enum kbase_atom_exit_protected_state - The state of an atom with respect to the
- * preparation for GPU's exit from protected mode, becomes
- * pertinent only after atom's state with respect to slot
- * ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
- * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any atoms
- * currently submitted to GPU and protected mode transition is
- * not already in progress.
- * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
- * for the reset, as exiting protected mode requires a reset.
- * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from protected mode
- * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to complete
- */
-enum kbase_atom_exit_protected_state {
- /**
- * NOTE: The integer value of this must match KBASE_ATOM_ENTER_PROTECTED_CHECK.
- */
- KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
- KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
- KBASE_ATOM_EXIT_PROTECTED_RESET,
- KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
-};
-
-/**
- * struct kbase_ext_res - Contains the info for external resources referred
- * by an atom, which have been mapped on GPU side.
- * @gpu_address: Start address of the memory region allocated for
- * the resource from GPU virtual address space.
- * @alloc: pointer to physical pages tracking object, set on
- * mapping the external resource on GPU side.
- */
-struct kbase_ext_res {
- u64 gpu_address;
- struct kbase_mem_phy_alloc *alloc;
-};
-
-/**
- * struct kbase_jd_atom - object representing the atom, containing the complete
- * state and attributes of an atom.
- * @work: work item for the bottom half processing of the atom,
- * by JD or JS, after it got executed on GPU or the input
- * fence got signaled
- * @start_timestamp: time at which the atom was submitted to the GPU, by
- * updating the JS_HEAD_NEXTn register.
- * @udata: copy of the user data sent for the atom in base_jd_submit.
- * @kctx: Pointer to the base context with which the atom is associated.
- * @dep_head: Array of 2 list heads, pointing to the two list of atoms
- * which are blocked due to dependency on this atom.
- * @dep_item: Array of 2 list heads, used to store the atom in the list of
- * other atoms depending on the same dependee atom.
- * @dep: Array containing the dependency info for the 2 atoms on which
- * the atom depends upon.
- * @jd_item: List head used during job dispatch job_done processing - as
- * dependencies may not be entirely resolved at this point,
- * we need to use a separate list head.
- * @in_jd_list: flag set to true if atom's @jd_item is currently on a list,
- * prevents atom being processed twice.
- * @nr_extres: number of external resources referenced by the atom.
- * @extres: pointer to the location containing info about @nr_extres
- * external resources referenced by the atom.
- * @device_nr: indicates the coregroup with which the atom is associated,
- * when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified.
- * @jc: GPU address of the job-chain.
- * @softjob_data: Copy of data read from the user space buffer that @jc
- * points to.
- * @fence: Stores either an input or output sync fence, depending
- * on soft-job type
- * @sync_waiter: Pointer to the sync fence waiter structure passed to the
- * callback function on signaling of the input fence.
- * @dma_fence: object containing pointers to both input & output fences
- * and other related members used for explicit sync through
- * soft jobs and for the implicit synchronization required
- * on access to external resources.
- * @event_code: Event code for the job chain represented by the atom, both
- * HW and low-level SW events are represented by event codes.
- * @core_req: bitmask of BASE_JD_REQ_* flags specifying either Hw or Sw
- * requirements for the job chain represented by the atom.
- * @ticks: Number of scheduling ticks for which atom has been running
- * on the GPU.
- * @sched_priority: Priority of the atom for Job scheduling, as per the
- * KBASE_JS_ATOM_SCHED_PRIO_*.
- * @completed: Wait queue to wait upon for the completion of atom.
- * @status: Indicates at high level at what stage the atom is in,
- * as per KBASE_JD_ATOM_STATE_*, that whether it is not in
- * use or its queued in JD or given to JS or submitted to Hw
- * or it completed the execution on Hw.
- * @work_id: used for GPU tracepoints, its a snapshot of the 'work_id'
- * counter in kbase_jd_context which is incremented on
- * every call to base_jd_submit.
- * @slot_nr: Job slot chosen for the atom.
- * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the exact
- * low level state of the atom.
- * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely tracking
- * atom's state after it has entered Job scheduler on becoming
- * runnable. Atom could be blocked due to cross slot dependency
- * or waiting for the shader cores to become available or
- * waiting for protected mode transitions to complete.
- * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU
- * cache is needed for the atom and the shader cores used
- * for atom have been kept on.
- * @blocked: flag indicating that atom's resubmission to GPU is
- * blocked till the work item is scheduled to return the
- * atom to JS.
- * @pre_dep: Pointer to atom that this atom has same-slot dependency on
- * @post_dep: Pointer to atom that has same-slot dependency on this atom
- * @x_pre_dep: Pointer to atom that this atom has cross-slot dependency on
- * @x_post_dep: Pointer to atom that has cross-slot dependency on this atom
- * @flush_id: The GPU's flush count recorded at the time of submission,
- * used for the cache flush optimisation
- * @fault_event: Info for dumping the debug data on Job fault.
- * @queue: List head used for 4 different purposes :
- * Adds atom to the list of dma-buf fence waiting atoms.
- * Adds atom to the list of atoms blocked due to cross
- * slot dependency.
- * Adds atom to the list of softjob atoms for which JIT
- * allocation has been deferred
- * Adds atom to the list of softjob atoms waiting for the
- * signaling of fence.
- * @jit_node: Used to keep track of all JIT free/alloc jobs in submission order
- * @jit_blocked: Flag indicating that JIT allocation requested through
- * softjob atom will be reattempted after the impending
- * free of other active JIT allocations.
- * @will_fail_event_code: If non-zero, this indicates that the atom will fail
- * with the set event_code when the atom is processed.
- * Used for special handling of atoms, which have a data
- * dependency on the failed atoms.
- * @protected_state: State of the atom, as per KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*,
- * when transitioning into or out of protected mode. Atom will
- * be either entering or exiting the protected mode.
- * @runnable_tree_node: The node added to context's job slot specific rb tree
- * when the atom becomes runnable.
- * @age: Age of atom relative to other atoms in the context, is
- * snapshot of the age_count counter in kbase context.
- */
-struct kbase_jd_atom {
- struct work_struct work;
- ktime_t start_timestamp;
-
- struct base_jd_udata udata;
- struct kbase_context *kctx;
-
- struct list_head dep_head[2];
- struct list_head dep_item[2];
- const struct kbase_jd_atom_dependency dep[2];
- struct list_head jd_item;
- bool in_jd_list;
-
- u16 nr_extres;
- struct kbase_ext_res *extres;
-
- u32 device_nr;
- u64 jc;
- void *softjob_data;
-#if defined(CONFIG_SYNC)
- struct sync_fence *fence;
- struct sync_fence_waiter sync_waiter;
-#endif /* CONFIG_SYNC */
-#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
- struct {
- /* Use the functions/API defined in mali_kbase_fence.h to
- * when working with this sub struct */
-#if defined(CONFIG_SYNC_FILE)
- /* Input fence */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
- struct fence *fence_in;
-#else
- struct dma_fence *fence_in;
-#endif
-#endif
- /* This points to the dma-buf output fence for this atom. If
- * this is NULL then there is no fence for this atom and the
- * following fields related to dma_fence may have invalid data.
- *
- * The context and seqno fields contain the details for this
- * fence.
- *
- * This fence is signaled when the katom is completed,
- * regardless of the event_code of the katom (signal also on
- * failure).
- */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
- struct fence *fence;
-#else
- struct dma_fence *fence;
-#endif
- /* The dma-buf fence context number for this atom. A unique
- * context number is allocated to each katom in the context on
- * context creation.
- */
- unsigned int context;
- /* The dma-buf fence sequence number for this atom. This is
- * increased every time this katom uses dma-buf fence.
- */
- atomic_t seqno;
- /* This contains a list of all callbacks set up to wait on
- * other fences. This atom must be held back from JS until all
- * these callbacks have been called and dep_count have reached
- * 0. The initial value of dep_count must be equal to the
- * number of callbacks on this list.
- *
- * This list is protected by jctx.lock. Callbacks are added to
- * this list when the atom is built and the wait are set up.
- * All the callbacks then stay on the list until all callbacks
- * have been called and the atom is queued, or cancelled, and
- * then all callbacks are taken off the list and freed.
- */
- struct list_head callbacks;
- /* Atomic counter of number of outstandind dma-buf fence
- * dependencies for this atom. When dep_count reaches 0 the
- * atom may be queued.
- *
- * The special value "-1" may only be set after the count
- * reaches 0, while holding jctx.lock. This indicates that the
- * atom has been handled, either queued in JS or cancelled.
- *
- * If anyone but the dma-fence worker sets this to -1 they must
- * ensure that any potentially queued worker must have
- * completed before allowing the atom to be marked as unused.
- * This can be done by flushing the fence work queue:
- * kctx->dma_fence.wq.
- */
- atomic_t dep_count;
- } dma_fence;
-#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE*/
-
- /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
- enum base_jd_event_code event_code;
- base_jd_core_req core_req;
- u8 jobslot;
-
- u32 ticks;
- int sched_priority;
-
- wait_queue_head_t completed;
- enum kbase_jd_atom_state status;
-#ifdef CONFIG_GPU_TRACEPOINTS
- int work_id;
-#endif
- int slot_nr;
-
- u32 atom_flags;
-
- int retry_count;
-
- enum kbase_atom_gpu_rb_state gpu_rb_state;
-
- bool need_cache_flush_cores_retained;
-
- atomic_t blocked;
-
- struct kbase_jd_atom *pre_dep;
- struct kbase_jd_atom *post_dep;
-
- struct kbase_jd_atom *x_pre_dep;
- struct kbase_jd_atom *x_post_dep;
-
- u32 flush_id;
-
-#ifdef CONFIG_DEBUG_FS
- struct base_job_fault_event fault_event;
-#endif
-
- struct list_head queue;
-
- struct list_head jit_node;
- bool jit_blocked;
-
- enum base_jd_event_code will_fail_event_code;
-
- union {
- enum kbase_atom_enter_protected_state enter;
- enum kbase_atom_exit_protected_state exit;
- } protected_state;
-
- struct rb_node runnable_tree_node;
-
- u32 age;
-};
-
-/**
* struct kbase_debug_copy_buffer - information about the buffer to be copied.
*
* @size: size of the buffer in bytes
@@ -772,83 +229,6 @@ struct kbase_debug_copy_buffer {
int nr_extres_pages;
};
-static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
-{
- return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
-}
-
-/*
- * Theory of operations:
- *
- * Atom objects are statically allocated within the context structure.
- *
- * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
- */
-
-#define KBASE_JD_DEP_QUEUE_SIZE 256
-
-/**
- * struct kbase_jd_context - per context object encapsulating all the Job dispatcher
- * related state.
- * @lock: lock to serialize the updates made to the Job dispatcher
- * state and kbase_jd_atom objects.
- * @sched_info: Structure encapsulating all the Job scheduling info.
- * @atoms: Array of the objects representing atoms, containing
- * the complete state and attributes of an atom.
- * @job_nr: Tracks the number of atoms being processed by the
- * kbase. This includes atoms that are not tracked by
- * scheduler: 'not ready to run' & 'dependency-only' jobs.
- * @zero_jobs_wait: Waitq that reflects whether there are no jobs
- * (including SW-only dependency jobs). This is set
- * when no jobs are present on the ctx, and clear when
- * there are jobs.
- * This must be updated atomically with @job_nr.
- * note: Job Dispatcher knows about more jobs than the
- * Job Scheduler as it is unaware of jobs that are
- * blocked on dependencies and SW-only dependency jobs.
- * This waitq can be waited upon to find out when the
- * context jobs are all done/cancelled (including those
- * that might've been blocked on dependencies) - and so,
- * whether it can be terminated. However, it should only
- * be terminated once it is not present in the run-pool.
- * Since the waitq is only set under @lock, the waiter
- * should also briefly obtain and drop @lock to guarantee
- * that the setter has completed its work on the kbase_context
- * @job_done_wq: Workqueue to which the per atom work item is queued
- * for bottom half processing when the atom completes
- * execution on GPU or the input fence get signaled.
- * @tb_lock: Lock to serialize the write access made to @tb to
- * to store the register access trace messages.
- * @tb: Pointer to the Userspace accessible buffer storing
- * the trace messages for register read/write accesses
- * made by the Kbase. The buffer is filled in circular
- * fashion.
- * @tb_wrap_offset: Offset to the end location in the trace buffer, the
- * write pointer is moved to the beginning on reaching
- * this offset.
- * @work_id: atomic variable used for GPU tracepoints, incremented
- * on every call to base_jd_submit.
- */
-struct kbase_jd_context {
- struct mutex lock;
- struct kbasep_js_kctx_info sched_info;
- struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
-
- u32 job_nr;
-
- wait_queue_head_t zero_jobs_wait;
-
- struct workqueue_struct *job_done_wq;
-
- spinlock_t tb_lock;
- u32 *tb;
- size_t tb_wrap_offset;
-
-#ifdef CONFIG_GPU_TRACEPOINTS
- atomic_t work_id;
-#endif
-};
-
struct kbase_device_info {
u32 features;
};
@@ -923,6 +303,8 @@ struct kbase_mmu_table {
struct kbase_context *kctx;
};
+#include "jm/mali_kbase_jm_defs.h"
+
static inline int kbase_as_has_bus_fault(struct kbase_as *as,
struct kbase_fault *fault)
{
@@ -935,10 +317,19 @@ static inline int kbase_as_has_page_fault(struct kbase_as *as,
return (fault == &as->pf_data);
}
+/**
+ * struct kbasep_mem_device - Data stored per device for memory allocation
+ *
+ * @used_pages: Tracks usage of OS shared memory. Updated when OS memory is
+ * allocated/freed.
+ * @ir_threshold: Fraction of the maximum size of an allocation that grows
+ * on GPU page fault that can be used before the driver
+ * switches to incremental rendering, in 1/256ths.
+ * 0 means disabled.
+ */
struct kbasep_mem_device {
- atomic_t used_pages; /* Tracks usage of OS shared memory. Updated
- when OS memory is allocated/freed. */
-
+ atomic_t used_pages;
+ atomic_t ir_threshold;
};
#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
@@ -1201,8 +592,6 @@ struct kbase_mmu_mode {
struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
-
-
#define DEVNAME_SIZE 16
/**
@@ -1523,6 +912,7 @@ struct kbase_device {
#endif /* CONFIG_MALI_NO_MALI */
struct kbase_pm_device_data pm;
+
struct kbasep_js_device_data js_data;
struct kbase_mem_pool_group mem_pools;
struct kbasep_mem_device memdev;
@@ -1574,7 +964,6 @@ struct kbase_device {
u16 trace_next_in;
struct kbase_trace *trace_rbuf;
#endif
-
u32 reset_timeout_ms;
bool cache_clean_in_progress;
@@ -1706,19 +1095,19 @@ struct kbase_device {
struct mutex mmu_hw_mutex;
- /* See KBASE_SERIALIZE_* for details */
- u8 serialize_jobs;
-
-#ifdef CONFIG_MALI_CINSTR_GWT
- u8 backup_serialize_jobs;
-#endif
-
u8 l2_size_override;
u8 l2_hash_override;
/* See KBASE_JS_*_PRIORITY_MODE for details. */
u32 js_ctx_scheduling_mode;
+ /* See KBASE_SERIALIZE_* for details */
+ u8 serialize_jobs;
+
+#ifdef CONFIG_MALI_CINSTR_GWT
+ u8 backup_serialize_jobs;
+#endif /* CONFIG_MALI_CINSTR_GWT */
+
struct {
struct kbase_context *ctx;
@@ -1728,22 +1117,6 @@ struct kbase_device {
} dummy_job_wa;
};
-/**
- * struct jsctx_queue - JS context atom queue
- * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
- * job slot.
- * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot
- * dependencies. Atoms on this list will be moved to the
- * runnable_tree when the blocking atom completes.
- *
- * hwaccess_lock must be held when accessing this structure.
- */
-struct jsctx_queue {
- struct rb_root runnable_tree;
- struct list_head x_dep_head;
-};
-
-
#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \
(((minor) & 0xFFF) << 8) | \
((0 & 0xFF) << 0))
@@ -1852,6 +1225,11 @@ struct kbase_file {
* from it for job slot 2. This is reset when the context first goes active or
* is re-activated on that slot.
*
+ * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for
+ * the context due to unhandled page(or bus) fault. It is cleared when the
+ * refcount for the context drops to 0 or on when the address spaces are
+ * re-enabled on GPU reset or power cycle.
+ *
* All members need to be separate bits. This enum is intended for use in a
* bitmask where multiple values get OR-ed together.
*/
@@ -1871,6 +1249,7 @@ enum kbase_context_flags {
KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12,
KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
+ KCTX_AS_DISABLED_ON_FAULT = 1U << 15,
};
struct kbase_sub_alloc {
@@ -2053,36 +1432,58 @@ struct kbase_sub_alloc {
* soft-jobs which have been blocked for more than the
* timeout value used for the soft-jobs
* @jit_alloc: Array of 256 pointers to GPU memory regions, used for
- * for JIT allocations.
- * @jit_max_allocations: Maximum number of JIT allocations allowed at once.
- * @jit_current_allocations: Current number of in-flight JIT allocations.
- * @jit_current_allocations_per_bin: Current number of in-flight JIT allocations per bin
- * @jit_version: version number indicating whether userspace is using
- * old or new version of interface for JIT allocations
- * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_OLD
- * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT
+ * just-in-time memory allocations.
+ * @jit_max_allocations: Maximum allowed number of in-flight
+ * just-in-time memory allocations.
+ * @jit_current_allocations: Current number of in-flight just-in-time
+ * memory allocations.
+ * @jit_current_allocations_per_bin: Current number of in-flight just-in-time
+ * memory allocations per bin.
+ * @jit_version: Version number indicating whether userspace is using
+ * old or new version of interface for just-in-time
+ * memory allocations.
+ * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_10_2
+ * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT_11_5
+ * 3 -> client used KBASE_IOCTL_MEM_JIT_INIT
* @jit_group_id: A memory group ID to be passed to a platform-specific
* memory group manager.
* Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
- * @jit_active_head: List containing the JIT allocations which are in use.
- * @jit_pool_head: List containing the JIT allocations which have been
- * freed up by userpsace and so not being used by them.
+ * @jit_phys_pages_limit: Limit of physical pages to apply across all
+ * just-in-time memory allocations, applied to
+ * @jit_current_phys_pressure.
+ * @jit_current_phys_pressure: Current 'pressure' on physical pages, which is
+ * the sum of the worst case estimate of pages that
+ * could be used (i.e. the
+ * &struct_kbase_va_region.nr_pages for all in-use
+ * just-in-time memory regions that have not yet had
+ * a usage report) and the actual number of pages
+ * that were used (i.e. the
+ * &struct_kbase_va_region.used_pages for regions
+ * that have had a usage report).
+ * @jit_active_head: List containing the just-in-time memory allocations
+ * which are in use.
+ * @jit_pool_head: List containing the just-in-time memory allocations
+ * which have been freed up by userspace and so not being
+ * used by them.
* Driver caches them to quickly fulfill requests for new
* JIT allocations. They are released in case of memory
* pressure as they are put on the @evict_list when they
* are freed up by userspace.
- * @jit_destroy_head: List containing the JIT allocations which were moved to it
- * from @jit_pool_head, in the shrinker callback, after freeing
- * their backing physical pages.
- * @jit_evict_lock: Lock used for operations done on JIT allocations and also
- * for accessing @evict_list.
- * @jit_work: Work item queued to defer the freeing of memory region when
- * JIT allocation is moved to @jit_destroy_head.
- * @jit_atoms_head: A list of the JIT soft-jobs, both alloc & free, in submission
- * order, protected by kbase_jd_context.lock.
- * @jit_pending_alloc: A list of JIT alloc soft-jobs for which allocation will be
- * reattempted after the impending free of other active JIT
- * allocations.
+ * @jit_destroy_head: List containing the just-in-time memory allocations
+ * which were moved to it from @jit_pool_head, in the
+ * shrinker callback, after freeing their backing
+ * physical pages.
+ * @jit_evict_lock: Lock used for operations done on just-in-time memory
+ * allocations and also for accessing @evict_list.
+ * @jit_work: Work item queued to defer the freeing of a memory
+ * region when a just-in-time memory allocation is moved
+ * to @jit_destroy_head.
+ * @jit_atoms_head: A list of the just-in-time memory soft-jobs, both
+ * allocate & free, in submission order, protected by
+ * &struct_kbase_jd_context.lock.
+ * @jit_pending_alloc: A list of just-in-time memory allocation soft-jobs
+ * which will be reattempted after the impending free of
+ * other active allocations.
* @ext_res_meta_head: A list of sticky external resources which were requested to
* be mapped on GPU side, through a softjob atom of type
* EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl.
@@ -2132,10 +1533,28 @@ struct kbase_context {
struct list_head mem_partials;
struct mutex reg_lock;
+
struct rb_root reg_rbtree_same;
struct rb_root reg_rbtree_custom;
struct rb_root reg_rbtree_exec;
+ struct kbase_jd_context jctx;
+ struct jsctx_queue jsctx_queue
+ [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+ struct list_head completed_jobs;
+ atomic_t work_count;
+ struct timer_list soft_job_timeout;
+
+ atomic_t atoms_pulled;
+ atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+ int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
+ KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+ int priority;
+ bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+ s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+ u32 slots_pullable;
+ u32 age_count;
DECLARE_BITMAP(cookies, BITS_PER_LONG);
struct kbase_va_region *pending_regions[BITS_PER_LONG];
@@ -2143,8 +1562,6 @@ struct kbase_context {
wait_queue_head_t event_queue;
pid_t tgid;
pid_t pid;
-
- struct kbase_jd_context jctx;
atomic_t used_pages;
atomic_t nonmapped_pages;
atomic_t permanent_mapped_pages;
@@ -2186,34 +1603,19 @@ struct kbase_context {
#endif /* CONFIG_DEBUG_FS */
- struct jsctx_queue jsctx_queue
- [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
-
- atomic_t atoms_pulled;
- atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
- int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
- KBASE_JS_ATOM_SCHED_PRIO_COUNT];
-
- bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
-
- u32 slots_pullable;
-
- struct work_struct work;
-
struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli;
struct mutex legacy_hwcnt_lock;
- struct list_head completed_jobs;
- atomic_t work_count;
-
- struct timer_list soft_job_timeout;
-
- struct kbase_va_region *jit_alloc[256];
+ struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT];
u8 jit_max_allocations;
u8 jit_current_allocations;
u8 jit_current_allocations_per_bin[256];
u8 jit_version;
u8 jit_group_id;
+#if MALI_JIT_PRESSURE_LIMIT
+ u64 jit_phys_pages_limit;
+ u64 jit_current_phys_pressure;
+#endif /* MALI_JIT_PRESSURE_LIMIT */
struct list_head jit_active_head;
struct list_head jit_pool_head;
struct list_head jit_destroy_head;
@@ -2225,22 +1627,15 @@ struct kbase_context {
struct list_head ext_res_meta_head;
- u32 age_count;
-
u8 trim_level;
#ifdef CONFIG_MALI_CINSTR_GWT
bool gwt_enabled;
-
bool gwt_was_enabled;
-
struct list_head gwt_current_list;
-
struct list_head gwt_snapshot_list;
#endif
- int priority;
- s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
base_context_create_flags create_flags;
};
diff --git a/mali_kbase/mali_kbase_disjoint_events.c b/mali_kbase/mali_kbase_disjoint_events.c
index 68eb4ed..b5ac414 100644
--- a/mali_kbase/mali_kbase_disjoint_events.c
+++ b/mali_kbase/mali_kbase_disjoint_events.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/mali_kbase_dma_fence.c b/mali_kbase/mali_kbase_dma_fence.c
index 6a95900..25acbcb 100644
--- a/mali_kbase/mali_kbase_dma_fence.c
+++ b/mali_kbase/mali_kbase_dma_fence.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2017,2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -114,6 +114,8 @@ kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
ww_acquire_fini(ctx);
}
+
+
/**
* kbase_dma_fence_queue_work() - Queue work to handle @katom
* @katom: Pointer to atom for which to queue work
diff --git a/mali_kbase/mali_kbase_dummy_job_wa.c b/mali_kbase/mali_kbase_dummy_job_wa.c
index a72436a..5830e8e 100644
--- a/mali_kbase/mali_kbase_dummy_job_wa.c
+++ b/mali_kbase/mali_kbase_dummy_job_wa.c
@@ -187,7 +187,6 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true);
if (cores >> 32)
wait(kbdev, SHADER_READY_HI, (cores >> 32), true);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX);
}
if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) {
diff --git a/mali_kbase/mali_kbase_dummy_job_wa.h b/mali_kbase/mali_kbase_dummy_job_wa.h
index 0ffd5b9..5bbe37d 100644
--- a/mali_kbase/mali_kbase_dummy_job_wa.h
+++ b/mali_kbase/mali_kbase_dummy_job_wa.h
@@ -31,6 +31,7 @@
KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP | \
KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)
+
int kbase_dummy_job_wa_load(struct kbase_device *kbdev);
void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev);
int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores);
@@ -40,4 +41,5 @@ static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev)
return (kbdev->dummy_job_wa.ctx != NULL);
}
+
#endif /* _KBASE_DUMMY_JOB_WORKAROUND_ */
diff --git a/mali_kbase/mali_kbase_event.c b/mali_kbase/mali_kbase_event.c
index 0ba5f97..2bbc313 100644
--- a/mali_kbase/mali_kbase_event.c
+++ b/mali_kbase/mali_kbase_event.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2016,2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016,2018-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -44,7 +44,7 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru
KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom);
katom->status = KBASE_JD_ATOM_STATE_UNUSED;
-
+ dev_dbg(kbdev->dev, "Atom %p status to unused\n", (void *)katom);
wake_up(&katom->completed);
return data;
@@ -83,10 +83,12 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve
dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
uevent->event_code = atom->event_code;
+
uevent->atom_number = (atom - ctx->jctx.atoms);
if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
kbase_jd_free_external_resources(atom);
+
mutex_lock(&ctx->jctx.lock);
uevent->udata = kbase_event_process(ctx, atom);
mutex_unlock(&ctx->jctx.lock);
@@ -110,6 +112,7 @@ static void kbase_event_process_noreport_worker(struct work_struct *data)
if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
kbase_jd_free_external_resources(katom);
+
mutex_lock(&kctx->jctx.lock);
kbase_event_process(kctx, katom);
mutex_unlock(&kctx->jctx.lock);
@@ -162,22 +165,25 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
{
struct kbase_device *kbdev = ctx->kbdev;
+ dev_dbg(kbdev->dev, "Posting event for atom %p\n", (void *)atom);
+
if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
if (atom->event_code == BASE_JD_EVENT_DONE) {
- /* Don't report the event */
+ dev_dbg(kbdev->dev, "Suppressing event (atom done)\n");
kbase_event_process_noreport(ctx, atom);
return;
}
}
if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
- /* Don't report the event */
+ dev_dbg(kbdev->dev, "Suppressing event (never)\n");
kbase_event_process_noreport(ctx, atom);
return;
}
KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED);
if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
/* Don't report the event until other event(s) have completed */
+ dev_dbg(kbdev->dev, "Deferring event (coalesced)\n");
mutex_lock(&ctx->event_mutex);
list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
++ctx->event_coalesce_count;
@@ -191,6 +197,7 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
list_add_tail(&atom->dep_item[0], &ctx->event_list);
atomic_add(event_count, &ctx->event_count);
mutex_unlock(&ctx->event_mutex);
+ dev_dbg(kbdev->dev, "Reporting %d events\n", event_count);
kbase_event_wakeup(ctx);
}
@@ -212,9 +219,7 @@ int kbase_event_init(struct kbase_context *kctx)
INIT_LIST_HEAD(&kctx->event_list);
INIT_LIST_HEAD(&kctx->event_coalesce_list);
mutex_init(&kctx->event_mutex);
- atomic_set(&kctx->event_count, 0);
kctx->event_coalesce_count = 0;
- atomic_set(&kctx->event_closed, false);
kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
if (NULL == kctx->event_workq)
diff --git a/mali_kbase/mali_kbase_fence.c b/mali_kbase/mali_kbase_fence.c
index 96a6ab9..7a715b3 100644
--- a/mali_kbase/mali_kbase_fence.c
+++ b/mali_kbase/mali_kbase_fence.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -87,6 +87,7 @@ const struct dma_fence_ops kbase_fence_ops = {
.fence_value_str = kbase_fence_fence_value_str
};
+
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
struct fence *
kbase_fence_out_new(struct kbase_jd_atom *katom)
@@ -210,3 +211,4 @@ kbase_fence_add_callback(struct kbase_jd_atom *katom,
return err;
}
+
diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h
index d7a65e0..8e7024e 100644
--- a/mali_kbase/mali_kbase_fence.h
+++ b/mali_kbase/mali_kbase_fence.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -87,6 +87,7 @@ struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
} while (0)
#endif
+
/**
* kbase_fence_out_remove() - Removes the output fence from atom
* @katom: Atom to remove output fence for
@@ -268,6 +269,7 @@ bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom);
*/
#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence)
+
/**
* kbase_fence_put() - Releases a reference to a fence
* @fence: Fence to release reference for.
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index d5495a1..f1f188f 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -51,7 +51,8 @@
#define KBASE_UBFX32(value, offset, size) \
(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
-static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+static void kbase_gpuprops_construct_coherent_groups(
+ struct base_gpu_props * const props)
{
struct mali_base_gpu_coherent_group *current_group;
u64 group_present;
@@ -120,13 +121,14 @@ static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const prop
/**
* kbase_gpuprops_get_props - Get the GPU configuration
- * @gpu_props: The &base_gpu_props structure
+ * @gpu_props: The &struct base_gpu_props structure
* @kbdev: The &struct kbase_device structure for the device
*
- * Fill the &base_gpu_props structure with values from the GPU configuration
- * registers. Only the raw properties are filled in this function
+ * Fill the &struct base_gpu_props structure with values from the GPU
+ * configuration registers. Only the raw properties are filled in this function.
*/
-static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+static void kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props,
+ struct kbase_device *kbdev)
{
struct kbase_gpuprops_regdump regdump;
int i;
@@ -172,7 +174,8 @@ static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kb
gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc;
}
-void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props)
+void kbase_gpuprops_update_core_props_gpu_id(
+ struct base_gpu_props * const gpu_props)
{
gpu_props->core_props.version_status =
KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
@@ -186,13 +189,14 @@ void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props)
/**
* kbase_gpuprops_calculate_props - Calculate the derived properties
- * @gpu_props: The &base_gpu_props structure
+ * @gpu_props: The &struct base_gpu_props structure
* @kbdev: The &struct kbase_device structure for the device
*
- * Fill the &base_gpu_props structure with values derived from the GPU
+ * Fill the &struct base_gpu_props structure with values derived from the GPU
* configuration registers
*/
-static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+static void kbase_gpuprops_calculate_props(
+ struct base_gpu_props * const gpu_props, struct kbase_device *kbdev)
{
int i;
u32 gpu_id;
@@ -323,7 +327,7 @@ void kbase_gpuprops_set(struct kbase_device *kbdev)
void kbase_gpuprops_set_features(struct kbase_device *kbdev)
{
- base_gpu_props *gpu_props;
+ struct base_gpu_props *gpu_props;
struct kbase_gpuprops_regdump regdump;
gpu_props = &kbdev->gpu_props.props;
@@ -396,7 +400,7 @@ void kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
{
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
struct kbase_gpuprops_regdump regdump;
- base_gpu_props *gpu_props = &kbdev->gpu_props.props;
+ struct base_gpu_props *gpu_props = &kbdev->gpu_props.props;
/* Check for L2 cache size & hash overrides */
if (!kbase_read_l2_config_from_dt(kbdev))
diff --git a/mali_kbase/mali_kbase_gpuprops.h b/mali_kbase/mali_kbase_gpuprops.h
index 4fdb3f9..eeba92f 100644
--- a/mali_kbase/mali_kbase_gpuprops.h
+++ b/mali_kbase/mali_kbase_gpuprops.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2015,2017,2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015, 2017, 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -97,7 +97,7 @@ int kbase_device_populate_max_freq(struct kbase_device *kbdev);
* separate fields (version_status, minor_revision, major_revision, product_id)
* stored in base_gpu_props::core_props.
*/
-void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props);
-
+void kbase_gpuprops_update_core_props_gpu_id(
+ struct base_gpu_props * const gpu_props);
#endif /* _KBASE_GPUPROPS_H_ */
diff --git a/mali_kbase/mali_kbase_gpuprops_types.h b/mali_kbase/mali_kbase_gpuprops_types.h
index d7877d1..ec6f1c3 100644
--- a/mali_kbase/mali_kbase_gpuprops_types.h
+++ b/mali_kbase/mali_kbase_gpuprops_types.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -89,7 +89,7 @@ struct kbase_gpu_props {
struct kbase_gpu_mmu_props mmu;
/* Properties shared with userspace */
- base_gpu_props props;
+ struct base_gpu_props props;
u32 prop_buffer_size;
void *prop_buffer;
diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c
index 75a0820..6a47c9d 100644
--- a/mali_kbase/mali_kbase_gwt.c
+++ b/mali_kbase/mali_kbase_gwt.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index b5304e8..c3abad4 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -182,6 +182,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0},
{GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1},
{GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1},
+ {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2},
{U32_MAX, NULL} } },
{GPU_ID2_PRODUCT_TNAX,
@@ -195,11 +196,13 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{GPU_ID2_PRODUCT_LBEX,
{{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0},
+ {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tBEx_r1p1},
{U32_MAX, NULL} } },
{GPU_ID2_PRODUCT_TBEX,
{{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0},
{GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0},
+ {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p0},
{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0},
{U32_MAX, NULL} } },
diff --git a/mali_kbase/mali_kbase_hwaccess_instr.h b/mali_kbase/mali_kbase_hwaccess_instr.h
index d5b9099..be85491 100644
--- a/mali_kbase/mali_kbase_hwaccess_instr.h
+++ b/mali_kbase/mali_kbase_hwaccess_instr.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2015, 2017-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2017-2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -139,4 +139,13 @@ int kbase_instr_backend_init(struct kbase_device *kbdev);
*/
void kbase_instr_backend_term(struct kbase_device *kbdev);
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+/**
+ * kbase_instr_backend_debugfs_init() - Add a debugfs entry for the
+ * hardware counter set.
+ * @kbdev: kbase device
+ */
+void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev);
+#endif
+
#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/mali_kbase/mali_kbase_hwaccess_jm.h b/mali_kbase/mali_kbase_hwaccess_jm.h
index 4972893..3d5934e 100644
--- a/mali_kbase/mali_kbase_hwaccess_jm.h
+++ b/mali_kbase/mali_kbase_hwaccess_jm.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/mali_kbase_hwcnt.c
index 265fc21..14ec5cb 100644
--- a/mali_kbase/mali_kbase_hwcnt.c
+++ b/mali_kbase/mali_kbase_hwcnt.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -683,21 +683,14 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx)
if (!WARN_ON(hctx->disable_count == SIZE_MAX)) {
/*
- * If disable count is non-zero or no counters are enabled, we
- * can just bump the disable count.
+ * If disable count is non-zero, we can just bump the disable
+ * count.
*
* Otherwise, we can't disable in an atomic context.
*/
if (hctx->disable_count != 0) {
hctx->disable_count++;
atomic_disabled = true;
- } else {
- WARN_ON(!hctx->accum_inited);
- if (!hctx->accum.enable_map_any_enabled) {
- hctx->disable_count++;
- hctx->accum.state = ACCUM_STATE_DISABLED;
- atomic_disabled = true;
- }
}
}
diff --git a/mali_kbase/mali_kbase_ioctl.h b/mali_kbase/mali_kbase_ioctl.h
index c041829..977b194 100644
--- a/mali_kbase/mali_kbase_ioctl.h
+++ b/mali_kbase/mali_kbase_ioctl.h
@@ -30,64 +30,9 @@ extern "C" {
#include <asm-generic/ioctl.h>
#include <linux/types.h>
-#define KBASE_IOCTL_TYPE 0x80
+#include "jm/mali_kbase_jm_ioctl.h"
-/*
- * 11.1:
- * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags
- * 11.2:
- * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED,
- * which some user-side clients prior to 11.2 might fault if they received
- * them
- * 11.3:
- * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and
- * KBASE_IOCTL_STICKY_RESOURCE_UNMAP
- * 11.4:
- * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
- * 11.5:
- * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
- * 11.6:
- * - Added flags field to base_jit_alloc_info structure, which can be used to
- * specify pseudo chunked tiler alignment for JIT allocations.
- * 11.7:
- * - Removed UMP support
- * 11.8:
- * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags
- * 11.9:
- * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY
- * under base_mem_alloc_flags
- * 11.10:
- * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for
- * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations
- * with one softjob.
- * 11.11:
- * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
- * 11.12:
- * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS
- * 11.13:
- * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT
- * 11.14:
- * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set
- * under base_mem_alloc_flags
- * 11.15:
- * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags.
- * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be
- * passed to mmap().
- * 11.16:
- * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf.
- * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for
- * dma-buf. Now, buffers are mapped on GPU when first imported, no longer
- * requiring external resource or sticky resource tracking. UNLESS,
- * CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled.
- * 11.17:
- * - Added BASE_JD_REQ_JOB_SLOT.
- * - Reused padding field in base_jd_atom_v2 to pass job slot number.
- * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO
- * 11.18:
- * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags
- */
-#define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 18
+#define KBASE_IOCTL_TYPE 0x80
/**
* struct kbase_ioctl_version_check - Check version compatibility with kernel
@@ -116,22 +61,6 @@ struct kbase_ioctl_set_flags {
_IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
/**
- * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
- *
- * @addr: Memory address of an array of struct base_jd_atom_v2
- * @nr_atoms: Number of entries in the array
- * @stride: sizeof(struct base_jd_atom_v2)
- */
-struct kbase_ioctl_job_submit {
- __u64 addr;
- __u32 nr_atoms;
- __u32 stride;
-};
-
-#define KBASE_IOCTL_JOB_SUBMIT \
- _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
-
-/**
* struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
*
* @buffer: Pointer to the buffer to store properties into
@@ -166,9 +95,6 @@ struct kbase_ioctl_get_gpuprops {
#define KBASE_IOCTL_GET_GPUPROPS \
_IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
-#define KBASE_IOCTL_POST_TERM \
- _IO(KBASE_IOCTL_TYPE, 4)
-
/**
* union kbase_ioctl_mem_alloc - Allocate memory on the GPU
*
@@ -332,8 +258,9 @@ struct kbase_ioctl_get_ddk_version {
_IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
/**
- * struct kbase_ioctl_mem_jit_init_old - Initialise the JIT memory allocator
- *
+ * struct kbase_ioctl_mem_jit_init_10_2 - Initialize the just-in-time memory
+ * allocator (between kernel driver
+ * version 10.2--11.4)
* @va_pages: Number of VA pages to reserve for JIT
*
* Note that depending on the VA size of the application and GPU, the value
@@ -342,16 +269,17 @@ struct kbase_ioctl_get_ddk_version {
* New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
* backwards compatibility.
*/
-struct kbase_ioctl_mem_jit_init_old {
+struct kbase_ioctl_mem_jit_init_10_2 {
__u64 va_pages;
};
-#define KBASE_IOCTL_MEM_JIT_INIT_OLD \
- _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_old)
+#define KBASE_IOCTL_MEM_JIT_INIT_10_2 \
+ _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_10_2)
/**
- * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator
- *
+ * struct kbase_ioctl_mem_jit_init_11_5 - Initialize the just-in-time memory
+ * allocator (between kernel driver
+ * version 11.5--11.19)
* @va_pages: Number of VA pages to reserve for JIT
* @max_allocations: Maximum number of concurrent allocations
* @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
@@ -360,6 +288,34 @@ struct kbase_ioctl_mem_jit_init_old {
*
* Note that depending on the VA size of the application and GPU, the value
* specified in @va_pages may be ignored.
+ *
+ * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
+ * backwards compatibility.
+ */
+struct kbase_ioctl_mem_jit_init_11_5 {
+ __u64 va_pages;
+ __u8 max_allocations;
+ __u8 trim_level;
+ __u8 group_id;
+ __u8 padding[5];
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT_11_5 \
+ _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_11_5)
+
+/**
+ * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory
+ * allocator
+ * @va_pages: Number of GPU virtual address pages to reserve for just-in-time
+ * memory allocations
+ * @max_allocations: Maximum number of concurrent allocations
+ * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
+ * @group_id: Group ID to be used for physical allocations
+ * @padding: Currently unused, must be zero
+ * @phys_pages: Maximum number of physical pages to allocate just-in-time
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
*/
struct kbase_ioctl_mem_jit_init {
__u64 va_pages;
@@ -367,6 +323,7 @@ struct kbase_ioctl_mem_jit_init {
__u8 trim_level;
__u8 group_id;
__u8 padding[5];
+ __u64 phys_pages;
};
#define KBASE_IOCTL_MEM_JIT_INIT \
@@ -585,21 +542,6 @@ struct kbase_ioctl_mem_profile_add {
_IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
/**
- * struct kbase_ioctl_soft_event_update - Update the status of a soft-event
- * @event: GPU address of the event which has been updated
- * @new_status: The new status to set
- * @flags: Flags for future expansion
- */
-struct kbase_ioctl_soft_event_update {
- __u64 event;
- __u32 new_status;
- __u32 flags;
-};
-
-#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
- _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
-
-/**
* struct kbase_ioctl_sticky_resource_map - Permanently map an external resource
* @count: Number of resources
* @address: Array of u64 GPU addresses of the external resources to map
@@ -695,7 +637,6 @@ union kbase_ioctl_cinstr_gwt_dump {
#define KBASE_IOCTL_CINSTR_GWT_DUMP \
_IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
-
/**
* struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone
*
@@ -708,7 +649,6 @@ struct kbase_ioctl_mem_exec_init {
#define KBASE_IOCTL_MEM_EXEC_INIT \
_IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init)
-
/**
* union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of
* cpu/gpu time (counter values)
@@ -742,7 +682,6 @@ union kbase_ioctl_get_cpu_gpu_timeinfo {
#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \
_IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo)
-
/***************
* test ioctls *
***************/
@@ -784,38 +723,7 @@ struct kbase_ioctl_tlstream_stats {
#define KBASE_IOCTL_TLSTREAM_STATS \
_IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
-/**
- * struct kbase_ioctl_cs_event_memory_write - Write an event memory address
- * @cpu_addr: Memory address to write
- * @value: Value to write
- * @padding: Currently unused, must be zero
- */
-struct kbase_ioctl_cs_event_memory_write {
- __u64 cpu_addr;
- __u8 value;
- __u8 padding[7];
-};
-
-/**
- * union kbase_ioctl_cs_event_memory_read - Read an event memory address
- * @cpu_addr: Memory address to read
- * @value: Value read
- * @padding: Currently unused, must be zero
- *
- * @in: Input parameters
- * @out: Output parameters
- */
-union kbase_ioctl_cs_event_memory_read {
- struct {
- __u64 cpu_addr;
- } in;
- struct {
- __u8 value;
- __u8 padding[7];
- } out;
-};
-
-#endif
+#endif /* MALI_UNIT_TEST */
/* Customer extension range */
#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2)
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index 88ab962..b4ae3ba 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -36,6 +36,7 @@
#include <tl/mali_kbase_tracepoints.h>
#include "mali_kbase_dma_fence.h"
+#include <mali_kbase_cs_experimental.h>
#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
@@ -49,6 +50,12 @@
#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \
((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \
BASE_JD_REQ_DEP)))
+
+/* Minimum API version that supports the just-in-time memory allocation pressure
+ * limit feature.
+ */
+#define MIN_API_VERSION_WITH_JPL KBASE_API_VERSION(11, 20)
+
/*
* This is the kernel side of the API. Only entry points are:
* - kbase_jd_submit(): Called from userspace to submit a single bag
@@ -76,36 +83,45 @@ get_compat_pointer(struct kbase_context *kctx, const u64 p)
* Note that the caller must also check the atom status and
* if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
*/
-static int jd_run_atom(struct kbase_jd_atom *katom)
+static bool jd_run_atom(struct kbase_jd_atom *katom)
{
struct kbase_context *kctx = katom->kctx;
+ dev_dbg(kctx->kbdev->dev, "JD run atom %p in kctx %p\n",
+ (void *)katom, (void *)kctx);
+
KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
/* Dependency only atom */
katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n",
+ (void *)katom);
return 0;
} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
/* Soft-job */
if (katom->will_fail_event_code) {
kbase_finish_soft_job(katom);
katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ dev_dbg(kctx->kbdev->dev,
+ "Atom %p status to completed\n", (void *)katom);
return 0;
}
if (kbase_process_soft_job(katom) == 0) {
kbase_finish_soft_job(katom);
katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ dev_dbg(kctx->kbdev->dev,
+ "Atom %p status to completed\n", (void *)katom);
}
return 0;
}
katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+ dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", (void *)katom);
/* Queue an action about whether we should try scheduling a context */
return kbasep_js_add_job(kctx, katom);
}
-#if defined(CONFIG_MALI_DMA_FENCE)
void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
{
struct kbase_device *kbdev;
@@ -136,7 +152,6 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
kbase_js_sched_all(kbdev);
}
}
-#endif
void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
{
@@ -533,6 +548,124 @@ static void jd_try_submitting_deps(struct list_head *out_list,
}
}
+#if MALI_JIT_PRESSURE_LIMIT
+/**
+ * jd_update_jit_usage - Update just-in-time physical memory usage for an atom.
+ *
+ * @katom: An atom that has just finished.
+ *
+ * Read back actual just-in-time memory region usage from atoms that provide
+ * this information, and update the current physical page pressure.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+static void jd_update_jit_usage(struct kbase_jd_atom *katom)
+{
+ struct kbase_context *kctx = katom->kctx;
+ struct kbase_va_region *reg;
+ struct kbase_vmap_struct mapping;
+ u64 *ptr;
+ u64 used_pages;
+ unsigned int idx;
+
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ /* If this atom wrote to JIT memory, find out how much it has written
+ * and update the usage information in the region.
+ */
+ for (idx = 0;
+ idx < ARRAY_SIZE(katom->jit_ids) && katom->jit_ids[idx];
+ idx++) {
+ size_t size_to_read;
+ u64 read_val;
+
+ reg = kctx->jit_alloc[katom->jit_ids[idx]];
+
+ if (!reg) {
+ dev_warn(kctx->kbdev->dev,
+ "%s: JIT id[%u]=%u has no region\n",
+ __func__, idx, katom->jit_ids[idx]);
+ continue;
+ }
+
+ if (reg == KBASE_RESERVED_REG_JIT_ALLOC) {
+ dev_warn(kctx->kbdev->dev,
+ "%s: JIT id[%u]=%u has failed to allocate a region\n",
+ __func__, idx, katom->jit_ids[idx]);
+ continue;
+ }
+
+ if (!reg->heap_info_gpu_addr)
+ continue;
+
+ size_to_read = sizeof(*ptr);
+ if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)
+ size_to_read = sizeof(u32);
+
+ ptr = kbase_vmap(kctx, reg->heap_info_gpu_addr, size_to_read,
+ &mapping);
+
+ if (!ptr) {
+ dev_warn(kctx->kbdev->dev,
+ "%s: JIT id[%u]=%u start=0x%llx unable to map end marker %llx\n",
+ __func__, idx, katom->jit_ids[idx],
+ reg->start_pfn << PAGE_SHIFT,
+ reg->heap_info_gpu_addr);
+ continue;
+ }
+
+ if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) {
+ read_val = READ_ONCE(*(u32 *)ptr);
+ used_pages = PFN_UP(read_val);
+ } else {
+ u64 addr_end = read_val = READ_ONCE(*ptr);
+
+ if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
+ unsigned long extent_bytes = reg->extent <<
+ PAGE_SHIFT;
+ /* kbase_check_alloc_sizes() already satisfies
+ * this, but here to avoid future maintenance
+ * hazards
+ */
+ WARN_ON(!is_power_of_2(extent_bytes));
+
+ addr_end = ALIGN(read_val, extent_bytes);
+ }
+ used_pages = PFN_UP(addr_end) - reg->start_pfn;
+ }
+
+ trace_mali_jit_report(katom, reg, idx, read_val, used_pages);
+ kbase_trace_jit_report_gpu_mem(kctx, reg, 0u);
+
+ /* We can never have used more pages than the VA size of the
+ * region
+ */
+ if (used_pages > reg->nr_pages) {
+ dev_warn(kctx->kbdev->dev,
+ "%s: JIT id[%u]=%u start=0x%llx used_pages %llx > %zx (read 0x%llx as %s%s)\n",
+ __func__, idx, katom->jit_ids[idx],
+ reg->start_pfn << PAGE_SHIFT,
+ used_pages, reg->nr_pages, read_val,
+ (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) ?
+ "size" : "addr",
+ (reg->flags & KBASE_REG_TILER_ALIGN_TOP) ?
+ " with align" : "");
+ used_pages = reg->nr_pages;
+ }
+ /* Note: one real use case has an atom correctly reporting 0
+ * pages in use. This happens in normal use-cases but may only
+ * happen for a few of the application's frames.
+ */
+
+ kbase_vunmap(kctx, &mapping);
+
+ kbase_jit_report_update_pressure(kctx, reg, used_pages, 0u);
+ }
+
+ kbase_jit_retry_pending_alloc(kctx);
+}
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
/*
* Perform the necessary handling of an atom that has finished running
* on the GPU.
@@ -556,6 +689,10 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+#if MALI_JIT_PRESSURE_LIMIT
+ jd_update_jit_usage(katom);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
/* This is needed in case an atom is failed due to being invalid, this
* can happen *before* the jobs that the atom depends on have completed */
for (i = 0; i < 2; i++) {
@@ -566,6 +703,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
}
katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n",
+ (void *)katom);
list_add_tail(&katom->jd_item, &completed_jobs);
while (!list_empty(&completed_jobs)) {
@@ -588,7 +727,12 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
list_del(runnable_jobs.next);
node->in_jd_list = false;
+ dev_dbg(kctx->kbdev->dev, "List node %p has status %d\n",
+ node, node->status);
+
KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+ if (node->status == KBASE_JD_ATOM_STATE_IN_JS)
+ continue;
if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
!kbase_ctx_flag(kctx, KCTX_DYING)) {
@@ -692,16 +836,20 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
}
#endif
-bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
+static bool jd_submit_atom(struct kbase_context *const kctx,
+ const struct base_jd_atom_v2 *const user_atom,
+ const struct base_jd_fragment *const user_jc_incr,
+ struct kbase_jd_atom *const katom)
{
struct kbase_device *kbdev = kctx->kbdev;
struct kbase_jd_context *jctx = &kctx->jctx;
int queued = 0;
int i;
int sched_prio;
- bool ret;
bool will_fail = false;
+ dev_dbg(kbdev->dev, "User did JD submit atom %p\n", (void *)katom);
+
/* Update the TOTAL number of jobs. This includes those not tracked by
* the scheduler: 'not ready to run' and 'dependency-only' jobs. */
jctx->job_nr++;
@@ -729,6 +877,22 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
katom->softjob_data = NULL;
+#if MALI_JIT_PRESSURE_LIMIT
+ /* Older API version atoms might have random values where jit_id now
+ * lives, but we must maintain backwards compatibility - handle the
+ * issue.
+ */
+ if (kctx->api_version < MIN_API_VERSION_WITH_JPL) {
+ katom->jit_ids[0] = 0;
+ katom->jit_ids[1] = 0;
+ } else {
+ katom->jit_ids[0] = user_atom->jit_id[0];
+ katom->jit_ids[1] = user_atom->jit_id[1];
+ }
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
+ katom->renderpass_id = user_atom->renderpass_id;
+
/* Implicitly sets katom->protected_state.enter as well. */
katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
@@ -754,6 +918,9 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ dev_dbg(kbdev->dev,
+ "Atom %p status to completed\n",
+ (void *)katom);
/* Wrong dependency setup. Atom will be sent
* back to user space. Do not record any
@@ -770,8 +937,7 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
katom,
TL_ATOM_STATE_IDLE);
- ret = jd_done_nolock(katom, NULL);
- goto out;
+ return jd_done_nolock(katom, NULL);
}
}
}
@@ -805,6 +971,8 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
/* Atom has completed, propagate the error code if any */
katom->event_code = dep_atom->event_code;
katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+ dev_dbg(kbdev->dev, "Atom %p status to queued\n",
+ (void *)katom);
/* This atom will be sent back to user space.
* Do not record any dependencies.
@@ -840,37 +1008,33 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
kbase_finish_soft_job(katom);
}
- ret = jd_done_nolock(katom, NULL);
-
- goto out;
- } else {
+ return jd_done_nolock(katom, NULL);
+ }
- if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
- /* This softjob has failed due to a previous
- * dependency, however we should still run the
- * prepare & finish functions
- */
- if (kbase_prepare_soft_job(katom) != 0) {
- katom->event_code =
- BASE_JD_EVENT_JOB_INVALID;
- ret = jd_done_nolock(katom, NULL);
- goto out;
- }
+ if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+ /* This softjob has failed due to a previous
+ * dependency, however we should still run the
+ * prepare & finish functions
+ */
+ if (kbase_prepare_soft_job(katom) != 0) {
+ katom->event_code =
+ BASE_JD_EVENT_JOB_INVALID;
+ return jd_done_nolock(katom, NULL);
}
-
- katom->will_fail_event_code = katom->event_code;
- ret = false;
-
- goto out;
}
- } else {
- /* These must occur after the above loop to ensure that an atom
- * that depends on a previous atom with the same number behaves
- * as expected */
- katom->event_code = BASE_JD_EVENT_DONE;
- katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+ katom->will_fail_event_code = katom->event_code;
+ return false;
}
+ /* These must occur after the above loop to ensure that an atom
+ * that depends on a previous atom with the same number behaves
+ * as expected
+ */
+ katom->event_code = BASE_JD_EVENT_DONE;
+ katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+ dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)katom);
+
/* For invalid priority, be most lenient and choose the default */
sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
@@ -886,34 +1050,49 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority);
KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx);
- /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
- if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
- dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+#if !MALI_INCREMENTAL_RENDERING
+ /* Reject atoms for incremental rendering if not supported */
+ if (katom->core_req &
+ (BASE_JD_REQ_START_RENDERPASS|BASE_JD_REQ_END_RENDERPASS)) {
+ dev_err(kctx->kbdev->dev,
+ "Rejecting atom with unsupported core_req 0x%x\n",
+ katom->core_req);
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- ret = jd_done_nolock(katom, NULL);
- goto out;
+ return jd_done_nolock(katom, NULL);
+ }
+#endif /* !MALI_INCREMENTAL_RENDERING */
+
+ if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) {
+ WARN_ON(katom->jc != 0);
+ katom->jc_fragment = *user_jc_incr;
+ } else if (!katom->jc &&
+ (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+ /* Reject atoms with job chain = NULL, as these cause issues
+ * with soft-stop
+ */
+ dev_err(kctx->kbdev->dev, "Rejecting atom with jc = NULL\n");
+ katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+ return jd_done_nolock(katom, NULL);
}
/* Reject atoms with an invalid device_nr */
if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
(katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
- dev_warn(kctx->kbdev->dev,
- "Rejecting atom with invalid device_nr %d",
+ dev_err(kctx->kbdev->dev,
+ "Rejecting atom with invalid device_nr %d\n",
katom->device_nr);
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- ret = jd_done_nolock(katom, NULL);
- goto out;
+ return jd_done_nolock(katom, NULL);
}
/* Reject atoms with invalid core requirements */
if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
- dev_warn(kctx->kbdev->dev,
- "Rejecting atom with invalid core requirements");
+ dev_err(kctx->kbdev->dev,
+ "Rejecting atom with invalid core requirements\n");
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
- ret = jd_done_nolock(katom, NULL);
- goto out;
+ return jd_done_nolock(katom, NULL);
}
/* Reject soft-job atom of certain types from accessing external resources */
@@ -921,11 +1100,10 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
(((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) ||
((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) ||
((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) {
- dev_warn(kctx->kbdev->dev,
- "Rejecting soft-job atom accessing external resources");
+ dev_err(kctx->kbdev->dev,
+ "Rejecting soft-job atom accessing external resources\n");
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- ret = jd_done_nolock(katom, NULL);
- goto out;
+ return jd_done_nolock(katom, NULL);
}
if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
@@ -933,11 +1111,21 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
/* setup failed (no access, bad resource, unknown resource types, etc.) */
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- ret = jd_done_nolock(katom, NULL);
- goto out;
+ return jd_done_nolock(katom, NULL);
}
}
+#if !MALI_JIT_PRESSURE_LIMIT
+ if ((kctx->api_version >= MIN_API_VERSION_WITH_JPL) &&
+ (user_atom->jit_id[0] || user_atom->jit_id[1])) {
+ /* JIT pressure limit is disabled, but we are receiving non-0
+ * JIT IDs - atom is invalid.
+ */
+ katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+ return jd_done_nolock(katom, NULL);
+ }
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
/* Validate the atom. Function will return error if the atom is
* malformed.
*
@@ -948,15 +1136,13 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- ret = jd_done_nolock(katom, NULL);
- goto out;
+ return jd_done_nolock(katom, NULL);
}
} else {
/* Soft-job */
if (kbase_prepare_soft_job(katom) != 0) {
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- ret = jd_done_nolock(katom, NULL);
- goto out;
+ return jd_done_nolock(katom, NULL);
}
}
@@ -966,39 +1152,38 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
kbasep_map_core_reqs_to_string(katom->core_req));
#endif
- if (queued && !IS_GPU_ATOM(katom)) {
- ret = false;
- goto out;
- }
+ if (queued && !IS_GPU_ATOM(katom))
+ return false;
#ifdef CONFIG_MALI_DMA_FENCE
- if (kbase_fence_dep_count_read(katom) != -1) {
- ret = false;
- goto out;
- }
+ if (kbase_fence_dep_count_read(katom) != -1)
+ return false;
+
#endif /* CONFIG_MALI_DMA_FENCE */
if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
if (kbase_process_soft_job(katom) == 0) {
kbase_finish_soft_job(katom);
- ret = jd_done_nolock(katom, NULL);
- goto out;
+ return jd_done_nolock(katom, NULL);
}
+ return false;
+ }
+
+ if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+ bool need_to_try_schedule_context;
- ret = false;
- } else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
katom->status = KBASE_JD_ATOM_STATE_IN_JS;
- ret = kbasep_js_add_job(kctx, katom);
+ dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n",
+ (void *)katom);
+
+ need_to_try_schedule_context = kbasep_js_add_job(kctx, katom);
/* If job was cancelled then resolve immediately */
- if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
- ret = jd_done_nolock(katom, NULL);
- } else {
- /* This is a pure dependency. Resolve it immediately */
- ret = jd_done_nolock(katom, NULL);
+ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+ return need_to_try_schedule_context;
}
- out:
- return ret;
+ /* This is a pure dependency. Resolve it immediately */
+ return jd_done_nolock(katom, NULL);
}
int kbase_jd_submit(struct kbase_context *kctx,
@@ -1021,12 +1206,15 @@ int kbase_jd_submit(struct kbase_context *kctx,
beenthere(kctx, "%s", "Enter");
if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
- dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+ dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it\n");
return -EINVAL;
}
- if (stride != sizeof(base_jd_atom_v2)) {
- dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+ if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) &&
+ stride != sizeof(struct base_jd_atom_v2)) {
+ dev_err(kbdev->dev,
+ "Stride %u passed to job_submit isn't supported by the kernel\n",
+ stride);
return -EINVAL;
}
@@ -1035,14 +1223,58 @@ int kbase_jd_submit(struct kbase_context *kctx,
for (i = 0; i < nr_atoms; i++) {
struct base_jd_atom_v2 user_atom;
+ struct base_jd_fragment user_jc_incr;
struct kbase_jd_atom *katom;
- if (copy_from_user(&user_atom, user_addr,
- sizeof(user_atom)) != 0) {
- err = -EINVAL;
+ if (copy_from_user(&user_atom, user_addr, stride) != 0) {
+ dev_err(kbdev->dev,
+ "Invalid atom address %p passed to job_submit\n",
+ user_addr);
+ err = -EFAULT;
break;
}
+ if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) {
+ dev_dbg(kbdev->dev, "No renderpass ID: use 0\n");
+ user_atom.renderpass_id = 0;
+ } else {
+ /* Ensure all padding bytes are 0 for potential future
+ * extension
+ */
+ size_t j;
+
+ dev_dbg(kbdev->dev, "Renderpass ID is %d\n",
+ user_atom.renderpass_id);
+ for (j = 0; j < sizeof(user_atom.padding); j++) {
+ if (user_atom.padding[j]) {
+ dev_err(kbdev->dev,
+ "Bad padding byte %zu: %d\n",
+ j, user_atom.padding[j]);
+ err = -EINVAL;
+ break;
+ }
+ }
+ if (err)
+ break;
+ }
+
+ /* In this case 'jc' is the CPU address of a struct
+ * instead of a GPU address of a job chain.
+ */
+ if (user_atom.core_req & BASE_JD_REQ_END_RENDERPASS) {
+ if (copy_from_user(&user_jc_incr,
+ u64_to_user_ptr(user_atom.jc),
+ sizeof(user_jc_incr))) {
+ dev_err(kbdev->dev,
+ "Invalid jc address 0x%llx passed to job_submit\n",
+ user_atom.jc);
+ err = -EFAULT;
+ break;
+ }
+ dev_dbg(kbdev->dev, "Copied IR jobchain addresses\n");
+ user_atom.jc = 0;
+ }
+
user_addr = (void __user *)((uintptr_t) user_addr + stride);
mutex_lock(&jctx->lock);
@@ -1092,8 +1324,8 @@ while (false)
mutex_lock(&jctx->lock);
}
- need_to_try_schedule_context |=
- jd_submit_atom(kctx, &user_atom, katom);
+ need_to_try_schedule_context |= jd_submit_atom(kctx, &user_atom,
+ &user_jc_incr, katom);
/* Register a completed job as a disjoint event when the GPU is in a disjoint state
* (ie. being reset).
@@ -1133,6 +1365,9 @@ void kbase_jd_done_worker(struct work_struct *data)
js_kctx_info = &kctx->jctx.sched_info;
js_devdata = &kbdev->js_data;
+ dev_dbg(kbdev->dev, "Enter atom %p done worker for kctx %p\n",
+ (void *)katom, (void *)kctx);
+
KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
kbase_backend_complete_wq(kbdev, katom);
@@ -1152,15 +1387,18 @@ void kbase_jd_done_worker(struct work_struct *data)
KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
if (katom->event_code == BASE_JD_EVENT_STOPPED) {
- /* Atom has been promoted to stopped */
unsigned long flags;
+ dev_dbg(kbdev->dev, "Atom %p has been promoted to stopped\n",
+ (void *)katom);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
mutex_unlock(&js_devdata->queue_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+ dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n",
+ (void *)katom);
kbase_js_unpull(kctx, katom);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -1271,6 +1509,9 @@ void kbase_jd_done_worker(struct work_struct *data)
kbase_pm_context_idle(kbdev);
KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+
+ dev_dbg(kbdev->dev, "Leave atom %p done worker for kctx %p\n",
+ (void *)katom, (void *)kctx);
}
/**
@@ -1398,6 +1639,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
kctx = katom->kctx;
KBASE_DEBUG_ASSERT(NULL != kctx);
+ dev_dbg(kbdev->dev, "JD: cancelling atom %p\n", (void *)katom);
KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
/* This should only be done from a context that is not scheduled */
@@ -1494,6 +1736,9 @@ int kbase_jd_init(struct kbase_context *kctx)
#endif
}
+ for (i = 0; i < BASE_JD_RP_COUNT; i++)
+ kctx->jctx.renderpasses[i].state = KBASE_JD_RP_COMPLETE;
+
mutex_init(&kctx->jctx.lock);
init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
diff --git a/mali_kbase/mali_kbase_jm.c b/mali_kbase/mali_kbase_jm.c
index b91a706..3f17dd7 100644
--- a/mali_kbase/mali_kbase_jm.c
+++ b/mali_kbase/mali_kbase_jm.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -45,6 +45,9 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
int i;
kctx = kbdev->hwaccess.active_kctx[js];
+ dev_dbg(kbdev->dev,
+ "Trying to run the next %d jobs in kctx %p (s:%d)\n",
+ nr_jobs_to_submit, (void *)kctx, js);
if (!kctx)
return true;
@@ -58,7 +61,8 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
kbase_backend_run_atom(kbdev, katom);
}
- return false; /* Slot ringbuffer should now be full */
+ dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%d)\n", js);
+ return false;
}
u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
@@ -66,6 +70,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
u32 ret_mask = 0;
lockdep_assert_held(&kbdev->hwaccess_lock);
+ dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask);
while (js_mask) {
int js = ffs(js_mask) - 1;
@@ -77,6 +82,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
js_mask &= ~(1 << js);
}
+ dev_dbg(kbdev->dev, "Can still submit to mask 0x%x\n", ret_mask);
return ret_mask;
}
@@ -111,8 +117,11 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
lockdep_assert_held(&kbdev->hwaccess_lock);
for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
- if (kbdev->hwaccess.active_kctx[js] == kctx)
+ if (kbdev->hwaccess.active_kctx[js] == kctx) {
+ dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n",
+ (void *)kctx, js);
kbdev->hwaccess.active_kctx[js] = NULL;
+ }
}
}
@@ -121,6 +130,9 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
{
lockdep_assert_held(&kbdev->hwaccess_lock);
+ dev_dbg(kbdev->dev, "Atom %p is returning with event code 0x%x\n",
+ (void *)katom, katom->event_code);
+
if (katom->event_code != BASE_JD_EVENT_STOPPED &&
katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
return kbase_js_complete_atom(katom, NULL);
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index 7ab25d1..b3ae604 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -37,6 +37,7 @@
#include "mali_kbase_jm.h"
#include "mali_kbase_hwaccess_jm.h"
+
/*
* Private types
*/
@@ -138,31 +139,6 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev)
mutex_unlock(&kbdev->js_data.runpool_mutex);
}
-/* Hold the mmu_hw_mutex and hwaccess_lock for this */
-bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx)
-{
- bool result = false;
- int as_nr;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(kctx != NULL);
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- as_nr = kctx->as_nr;
- if (atomic_read(&kctx->refcount) > 0) {
- KBASE_DEBUG_ASSERT(as_nr >= 0);
-
- kbase_ctx_sched_retain_ctx_refcount(kctx);
- KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
- NULL, 0u, atomic_read(&kctx->refcount));
- result = true;
- }
-
- return result;
-}
-
/**
* jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
* @kctx: Pointer to kbase context with ring buffer.
@@ -179,11 +155,18 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
static inline bool
jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
{
+ bool none_to_pull;
struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- return RB_EMPTY_ROOT(&rb->runnable_tree);
+ none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree);
+
+ dev_dbg(kctx->kbdev->dev,
+ "Slot %d (prio %d) is %spullable in kctx %p\n",
+ js, prio, none_to_pull ? "not " : "", kctx);
+
+ return none_to_pull;
}
/**
@@ -245,13 +228,37 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
rb_erase(node, &queue->runnable_tree);
callback(kctx->kbdev, entry);
+
+ /* Runnable end-of-renderpass atoms can also be in the linked
+ * list of atoms blocked on cross-slot dependencies. Remove them
+ * to avoid calling the callback twice.
+ */
+ if (entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) {
+ WARN_ON(!(entry->core_req &
+ BASE_JD_REQ_END_RENDERPASS));
+ dev_dbg(kctx->kbdev->dev,
+ "Del runnable atom %p from X_DEP list\n",
+ (void *)entry);
+
+ list_del(&entry->queue);
+ entry->atom_flags &=
+ ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+ }
}
while (!list_empty(&queue->x_dep_head)) {
struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next,
struct kbase_jd_atom, queue);
+ WARN_ON(!(entry->atom_flags &
+ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
+ dev_dbg(kctx->kbdev->dev,
+ "Del blocked atom %p from X_DEP list\n",
+ (void *)entry);
+
list_del(queue->x_dep_head.next);
+ entry->atom_flags &=
+ ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
callback(kctx->kbdev, entry);
}
@@ -296,10 +303,15 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
struct rb_node *node;
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+ dev_dbg(kctx->kbdev->dev,
+ "Peeking runnable tree of kctx %p for prio %d (s:%d)\n",
+ (void *)kctx, prio, js);
node = rb_first(&rb->runnable_tree);
- if (!node)
+ if (!node) {
+ dev_dbg(kctx->kbdev->dev, "Tree is empty\n");
return NULL;
+ }
return rb_entry(node, struct kbase_jd_atom, runnable_tree_node);
}
@@ -354,6 +366,9 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+ dev_dbg(kctx->kbdev->dev, "Erasing atom %p from runnable tree of kctx %p\n",
+ (void *)katom, (void *)kctx);
+
/* Atoms must be pulled in the correct order. */
WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
@@ -373,6 +388,9 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+ dev_dbg(kbdev->dev, "Adding atom %p to runnable tree of kctx %p (s:%d)\n",
+ (void *)katom, (void *)kctx, js);
+
while (*new) {
struct kbase_jd_atom *entry = container_of(*new,
struct kbase_jd_atom, runnable_tree_node);
@@ -573,6 +591,7 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx)
js_kctx_info = &kctx->jctx.sched_info;
+ kctx->slots_pullable = 0;
js_kctx_info->ctx.nr_jobs = 0;
kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
kbase_ctx_flag_clear(kctx, KCTX_DYING);
@@ -663,6 +682,8 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
+ dev_dbg(kbdev->dev, "Add pullable tail kctx %p (s:%d)\n",
+ (void *)kctx, js);
if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -703,6 +724,8 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
+ dev_dbg(kbdev->dev, "Add pullable head kctx %p (s:%d)\n",
+ (void *)kctx, js);
if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -777,6 +800,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
+ dev_dbg(kbdev->dev, "Add unpullable tail kctx %p (s:%d)\n",
+ (void *)kctx, js);
list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
&kbdev->js_data.ctx_list_unpullable[js][kctx->priority]);
@@ -867,7 +892,9 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
jctx.sched_info.ctx.ctx_list_entry[js]);
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
-
+ dev_dbg(kbdev->dev,
+ "Popped %p from the pullable queue (s:%d)\n",
+ (void *)kctx, js);
return kctx;
}
return NULL;
@@ -912,32 +939,57 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
{
struct kbasep_js_device_data *js_devdata;
struct kbase_jd_atom *katom;
+ struct kbase_device *kbdev = kctx->kbdev;
- lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+ lockdep_assert_held(&kbdev->hwaccess_lock);
- js_devdata = &kctx->kbdev->js_data;
+ js_devdata = &kbdev->js_data;
if (is_scheduled) {
- if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+ dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n",
+ (void *)kctx);
return false;
+ }
}
katom = jsctx_rb_peek(kctx, js);
- if (!katom)
+ if (!katom) {
+ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n",
+ (void *)kctx, js);
return false; /* No pullable atoms */
- if (kctx->blocked_js[js][katom->sched_priority])
+ }
+ if (kctx->blocked_js[js][katom->sched_priority]) {
+ dev_dbg(kbdev->dev,
+ "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n",
+ (void *)kctx, katom->sched_priority, js);
return false;
- if (atomic_read(&katom->blocked))
+ }
+ if (atomic_read(&katom->blocked)) {
+ dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_ctx_pullable\n",
+ (void *)katom);
return false; /* next atom blocked */
- if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+ }
+ if (kbase_js_atom_blocked_on_x_dep(katom)) {
if (katom->x_pre_dep->gpu_rb_state ==
- KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
- katom->x_pre_dep->will_fail_event_code)
+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+ katom->x_pre_dep->will_fail_event_code) {
+ dev_dbg(kbdev->dev,
+ "JS: X pre-dep %p is not present in slot FIFO or will fail\n",
+ (void *)katom->x_pre_dep);
return false;
+ }
if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
- kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+ kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) {
+ dev_dbg(kbdev->dev,
+ "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n",
+ (void *)katom, js);
return false;
+ }
}
+ dev_dbg(kbdev->dev, "JS: Atom %p is pullable in kctx %p (s:%d)\n",
+ (void *)katom, (void *)kctx, js);
+
return true;
}
@@ -958,9 +1010,15 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
int dep_js = kbase_js_get_slot(kbdev, dep_atom);
int dep_prio = dep_atom->sched_priority;
+ dev_dbg(kbdev->dev,
+ "Checking dep %d of atom %p (s:%d) on %p (s:%d)\n",
+ i, (void *)katom, js, (void *)dep_atom, dep_js);
+
/* Dependent atom must already have been submitted */
if (!(dep_atom->atom_flags &
KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
+ dev_dbg(kbdev->dev,
+ "Blocker not submitted yet\n");
ret = false;
break;
}
@@ -968,6 +1026,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
/* Dependencies with different priorities can't
be represented in the ringbuffer */
if (prio != dep_prio) {
+ dev_dbg(kbdev->dev,
+ "Different atom priorities\n");
ret = false;
break;
}
@@ -976,12 +1036,16 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
/* Only one same-slot dependency can be
* represented in the ringbuffer */
if (has_dep) {
+ dev_dbg(kbdev->dev,
+ "Too many same-slot deps\n");
ret = false;
break;
}
/* Each dependee atom can only have one
* same-slot dependency */
if (dep_atom->post_dep) {
+ dev_dbg(kbdev->dev,
+ "Too many same-slot successors\n");
ret = false;
break;
}
@@ -990,12 +1054,16 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
/* Only one cross-slot dependency can be
* represented in the ringbuffer */
if (has_x_dep) {
+ dev_dbg(kbdev->dev,
+ "Too many cross-slot deps\n");
ret = false;
break;
}
/* Each dependee atom can only have one
* cross-slot dependency */
if (dep_atom->x_post_dep) {
+ dev_dbg(kbdev->dev,
+ "Too many cross-slot successors\n");
ret = false;
break;
}
@@ -1003,6 +1071,9 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
* HW access ringbuffer */
if (dep_atom->gpu_rb_state !=
KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+ dev_dbg(kbdev->dev,
+ "Blocker already in ringbuffer (state:%d)\n",
+ dep_atom->gpu_rb_state);
ret = false;
break;
}
@@ -1010,6 +1081,9 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
* completed */
if (dep_atom->status !=
KBASE_JD_ATOM_STATE_IN_JS) {
+ dev_dbg(kbdev->dev,
+ "Blocker already completed (status:%d)\n",
+ dep_atom->status);
ret = false;
break;
}
@@ -1030,6 +1104,11 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
if (dep_atom) {
int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+ dev_dbg(kbdev->dev,
+ "Clearing dep %d of atom %p (s:%d) on %p (s:%d)\n",
+ i, (void *)katom, js, (void *)dep_atom,
+ dep_js);
+
if ((js != dep_js) &&
(dep_atom->status !=
KBASE_JD_ATOM_STATE_COMPLETED)
@@ -1040,6 +1119,10 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
katom->atom_flags |=
KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+ dev_dbg(kbdev->dev, "Set X_DEP flag on atom %p\n",
+ (void *)katom);
+
katom->x_pre_dep = dep_atom;
dep_atom->x_post_dep = katom;
if (kbase_jd_katom_dep_type(
@@ -1059,6 +1142,10 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
kbase_jd_katom_dep_clear(&katom->dep[i]);
}
}
+ } else {
+ dev_dbg(kbdev->dev,
+ "Deps of atom %p (s:%d) could not be represented\n",
+ (void *)katom, js);
}
return ret;
@@ -1110,6 +1197,101 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx)
kbase_js_set_ctx_priority(kctx, new_priority);
}
+/**
+ * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler
+ * @start_katom: Pointer to the atom to be added.
+ * Return: 0 if successful or a negative value on failure.
+ */
+static int js_add_start_rp(struct kbase_jd_atom *const start_katom)
+{
+ struct kbase_context *const kctx = start_katom->kctx;
+ struct kbase_jd_renderpass *rp;
+ struct kbase_device *const kbdev = kctx->kbdev;
+ unsigned long flags;
+
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS)))
+ return -EINVAL;
+
+ if (start_katom->core_req & BASE_JD_REQ_END_RENDERPASS)
+ return -EINVAL;
+
+ if (start_katom->renderpass_id >= ARRAY_SIZE(kctx->jctx.renderpasses))
+ return -EINVAL;
+
+ rp = &kctx->jctx.renderpasses[start_katom->renderpass_id];
+
+ if (rp->state != KBASE_JD_RP_COMPLETE)
+ return -EINVAL;
+
+ dev_dbg(kctx->kbdev->dev, "JS add start atom %p of RP %d\n",
+ (void *)start_katom, start_katom->renderpass_id);
+
+ /* The following members are read when updating the job slot
+ * ringbuffer/fifo therefore they require additional locking.
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ rp->state = KBASE_JD_RP_START;
+ rp->start_katom = start_katom;
+ rp->end_katom = NULL;
+ INIT_LIST_HEAD(&rp->oom_reg_list);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return 0;
+}
+
+/**
+ * js_add_end_rp() - Add an atom that ends a renderpass to the job scheduler
+ * @end_katom: Pointer to the atom to be added.
+ * Return: 0 if successful or a negative value on failure.
+ */
+static int js_add_end_rp(struct kbase_jd_atom *const end_katom)
+{
+ struct kbase_context *const kctx = end_katom->kctx;
+ struct kbase_jd_renderpass *rp;
+ struct kbase_device *const kbdev = kctx->kbdev;
+
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS)))
+ return -EINVAL;
+
+ if (end_katom->core_req & BASE_JD_REQ_START_RENDERPASS)
+ return -EINVAL;
+
+ if (end_katom->renderpass_id >= ARRAY_SIZE(kctx->jctx.renderpasses))
+ return -EINVAL;
+
+ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id];
+
+ dev_dbg(kbdev->dev, "JS add end atom %p in state %d of RP %d\n",
+ (void *)end_katom, (int)rp->state, end_katom->renderpass_id);
+
+ if (rp->state == KBASE_JD_RP_COMPLETE)
+ return -EINVAL;
+
+ if (rp->end_katom == NULL) {
+ /* We can't be in a retry state until the fragment job chain
+ * has completed.
+ */
+ unsigned long flags;
+
+ WARN_ON(rp->state == KBASE_JD_RP_RETRY);
+ WARN_ON(rp->state == KBASE_JD_RP_RETRY_PEND_OOM);
+ WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ rp->end_katom = end_katom;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ } else
+ WARN_ON(rp->end_katom != end_katom);
+
+ return 0;
+}
+
bool kbasep_js_add_job(struct kbase_context *kctx,
struct kbase_jd_atom *atom)
{
@@ -1117,6 +1299,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
struct kbasep_js_kctx_info *js_kctx_info;
struct kbase_device *kbdev;
struct kbasep_js_device_data *js_devdata;
+ int err = 0;
bool enqueue_required = false;
bool timer_sync = false;
@@ -1132,6 +1315,17 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
mutex_lock(&js_devdata->queue_mutex);
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ if (atom->core_req & BASE_JD_REQ_START_RENDERPASS)
+ err = js_add_start_rp(atom);
+ else if (atom->core_req & BASE_JD_REQ_END_RENDERPASS)
+ err = js_add_end_rp(atom);
+
+ if (err < 0) {
+ atom->event_code = BASE_JD_EVENT_JOB_INVALID;
+ atom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ goto out_unlock;
+ }
+
/*
* Begin Runpool transaction
*/
@@ -1140,6 +1334,8 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
/* Refcount ctx.nr_jobs */
KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
++(js_kctx_info->ctx.nr_jobs);
+ dev_dbg(kbdev->dev, "Add atom %p to kctx %p; now %d in ctx\n",
+ (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs);
/* Lock for state available during IRQ */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -1150,10 +1346,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
if (!kbase_js_dep_validate(kctx, atom)) {
/* Dependencies could not be represented */
--(js_kctx_info->ctx.nr_jobs);
+ dev_dbg(kbdev->dev,
+ "Remove atom %p from kctx %p; now %d in ctx\n",
+ (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs);
/* Setting atom status back to queued as it still has unresolved
* dependencies */
atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+ dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)atom);
/* Undo the count, as the atom will get added again later but
* leave the context priority adjusted or boosted, in case if
@@ -1221,6 +1421,9 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
}
}
out_unlock:
+ dev_dbg(kbdev->dev, "Enqueue of kctx %p is %srequired\n",
+ kctx, enqueue_required ? "" : "not ");
+
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
mutex_unlock(&js_devdata->queue_mutex);
@@ -1246,6 +1449,9 @@ void kbasep_js_remove_job(struct kbase_device *kbdev,
/* De-refcount ctx.nr_jobs */
KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
--(js_kctx_info->ctx.nr_jobs);
+ dev_dbg(kbdev->dev,
+ "Remove atom %p from kctx %p; now %d in ctx\n",
+ (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (--kctx->atoms_count[atom->sched_priority] == 0)
@@ -1282,44 +1488,6 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
return attr_state_changed;
}
-bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
- struct kbase_context *kctx)
-{
- unsigned long flags;
- bool result;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
-
- mutex_lock(&kbdev->mmu_hw_mutex);
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- mutex_unlock(&kbdev->mmu_hw_mutex);
-
- return result;
-}
-
-struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
- int as_nr)
-{
- unsigned long flags;
- struct kbase_context *found_kctx = NULL;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
- found_kctx = kbdev->as_to_kctx[as_nr];
-
- if (found_kctx != NULL)
- kbase_ctx_sched_retain_ctx_refcount(found_kctx);
-
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- return found_kctx;
-}
-
/**
* kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after
* releasing a context and/or atom
@@ -1476,8 +1644,11 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
kbase_backend_release_ctx_irq(kbdev, kctx);
for (slot = 0; slot < num_slots; slot++) {
- if (kbdev->hwaccess.active_kctx[slot] == kctx)
+ if (kbdev->hwaccess.active_kctx[slot] == kctx) {
+ dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n",
+ (void *)kctx, slot);
kbdev->hwaccess.active_kctx[slot] = NULL;
+ }
}
/* Ctx Attribute handling
@@ -1679,6 +1850,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
bool kctx_suspended = false;
int as_nr;
+ dev_dbg(kbdev->dev, "Scheduling kctx %p (s:%d)\n", kctx, js);
+
js_devdata = &kbdev->js_data;
js_kctx_info = &kctx->jctx.sched_info;
@@ -1812,7 +1985,11 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev,
if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
kbase_backend_use_ctx_sched(kbdev, kctx, js)) {
- /* Context already has ASID - mark as active */
+
+ dev_dbg(kbdev->dev,
+ "kctx %p already has ASID - mark as active (s:%d)\n",
+ (void *)kctx, js);
+
if (kbdev->hwaccess.active_kctx[js] != kctx) {
kbdev->hwaccess.active_kctx[js] = kctx;
kbase_ctx_flag_clear(kctx,
@@ -2043,7 +2220,7 @@ static int kbase_js_get_slot(struct kbase_device *kbdev,
bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
struct kbase_jd_atom *katom)
{
- bool enqueue_required;
+ bool enqueue_required, add_required = true;
katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
@@ -2057,6 +2234,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
} else {
enqueue_required = false;
}
+
if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
(katom->pre_dep && (katom->pre_dep->atom_flags &
KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
@@ -2064,10 +2242,21 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
int js = katom->slot_nr;
struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+ dev_dbg(kctx->kbdev->dev, "Add atom %p to X_DEP list (s:%d)\n",
+ (void *)katom, js);
+
list_add_tail(&katom->queue, &queue->x_dep_head);
katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
- enqueue_required = false;
+ if (kbase_js_atom_blocked_on_x_dep(katom)) {
+ enqueue_required = false;
+ add_required = false;
+ }
} else {
+ dev_dbg(kctx->kbdev->dev, "Atom %p not added to X_DEP list\n",
+ (void *)katom);
+ }
+
+ if (add_required) {
/* Check if there are lower priority jobs to soft stop */
kbase_job_slot_ctx_priority_check_locked(kctx, katom);
@@ -2076,6 +2265,10 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
}
+ dev_dbg(kctx->kbdev->dev,
+ "Enqueue of kctx %p is %srequired to submit atom %p\n",
+ kctx, enqueue_required ? "" : "not ", katom);
+
return enqueue_required;
}
@@ -2090,19 +2283,36 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
*/
static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
{
- lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock);
+ struct kbase_context *const kctx = katom->kctx;
+
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
while (katom) {
WARN_ON(!(katom->atom_flags &
KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
- if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+ if (!kbase_js_atom_blocked_on_x_dep(katom)) {
+ dev_dbg(kctx->kbdev->dev,
+ "Del atom %p from X_DEP list in js_move_to_tree\n",
+ (void *)katom);
+
list_del(&katom->queue);
katom->atom_flags &=
~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
- jsctx_tree_add(katom->kctx, katom);
- katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+ /* For incremental rendering, an end-of-renderpass atom
+ * may have had its dependency on start-of-renderpass
+ * ignored and may therefore already be in the tree.
+ */
+ if (!(katom->atom_flags &
+ KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
+ jsctx_tree_add(kctx, katom);
+ katom->atom_flags |=
+ KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+ }
} else {
+ dev_dbg(kctx->kbdev->dev,
+ "Atom %p blocked on x-dep in js_move_to_tree\n",
+ (void *)katom);
break;
}
@@ -2145,6 +2355,9 @@ static void kbase_js_evict_deps(struct kbase_context *kctx,
/* Remove dependency.*/
x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+ dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %p\n",
+ (void *)x_dep);
+
/* Fail if it had a data dependency. */
if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
x_dep->will_fail_event_code = katom->event_code;
@@ -2164,22 +2377,37 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
KBASE_DEBUG_ASSERT(kctx);
kbdev = kctx->kbdev;
+ dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %p (s:%d)\n",
+ (void *)kctx, js);
js_devdata = &kbdev->js_data;
lockdep_assert_held(&kbdev->hwaccess_lock);
- if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+ dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n",
+ (void *)kctx);
return NULL;
+ }
if (kbase_pm_is_suspending(kbdev))
return NULL;
katom = jsctx_rb_peek(kctx, js);
- if (!katom)
+ if (!katom) {
+ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n",
+ (void *)kctx, js);
return NULL;
- if (kctx->blocked_js[js][katom->sched_priority])
+ }
+ if (kctx->blocked_js[js][katom->sched_priority]) {
+ dev_dbg(kbdev->dev,
+ "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n",
+ (void *)kctx, katom->sched_priority, js);
return NULL;
- if (atomic_read(&katom->blocked))
+ }
+ if (atomic_read(&katom->blocked)) {
+ dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_pull\n",
+ (void *)katom);
return NULL;
+ }
/* Due to ordering restrictions when unpulling atoms on failure, we do
* not allow multiple runs of fail-dep atoms from the same context to be
@@ -2192,14 +2420,22 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
return NULL;
}
- if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+ if (kbase_js_atom_blocked_on_x_dep(katom)) {
if (katom->x_pre_dep->gpu_rb_state ==
- KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
- katom->x_pre_dep->will_fail_event_code)
+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+ katom->x_pre_dep->will_fail_event_code) {
+ dev_dbg(kbdev->dev,
+ "JS: X pre-dep %p is not present in slot FIFO or will fail\n",
+ (void *)katom->x_pre_dep);
return NULL;
+ }
if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
- kbase_backend_nr_atoms_on_slot(kbdev, js))
+ kbase_backend_nr_atoms_on_slot(kbdev, js)) {
+ dev_dbg(kbdev->dev,
+ "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n",
+ (void *)katom, js);
return NULL;
+ }
}
kbase_ctx_flag_set(kctx, KCTX_PULLED);
@@ -2221,9 +2457,214 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
katom->ticks = 0;
+ dev_dbg(kbdev->dev, "JS: successfully pulled atom %p from kctx %p (s:%d)\n",
+ (void *)katom, (void *)kctx, js);
+
return katom;
}
+/**
+ * js_return_of_start_rp() - Handle soft-stop of an atom that starts a
+ * renderpass
+ * @start_katom: Pointer to the start-of-renderpass atom that was soft-stopped
+ *
+ * This function is called to switch to incremental rendering if the tiler job
+ * chain at the start of a renderpass has used too much memory. It prevents the
+ * tiler job being pulled for execution in the job scheduler again until the
+ * next phase of incremental rendering is complete.
+ *
+ * If the end-of-renderpass atom is already in the job scheduler (because a
+ * previous attempt at tiling used too much memory during the same renderpass)
+ * then it is unblocked; otherwise, it is run by handing it to the scheduler.
+ */
+static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom)
+{
+ struct kbase_context *const kctx = start_katom->kctx;
+ struct kbase_device *const kbdev = kctx->kbdev;
+ struct kbase_jd_renderpass *rp;
+ struct kbase_jd_atom *end_katom;
+ unsigned long flags;
+
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS)))
+ return;
+
+ if (WARN_ON(start_katom->renderpass_id >=
+ ARRAY_SIZE(kctx->jctx.renderpasses)))
+ return;
+
+ rp = &kctx->jctx.renderpasses[start_katom->renderpass_id];
+
+ if (WARN_ON(rp->start_katom != start_katom))
+ return;
+
+ dev_dbg(kctx->kbdev->dev,
+ "JS return start atom %p in state %d of RP %d\n",
+ (void *)start_katom, (int)rp->state,
+ start_katom->renderpass_id);
+
+ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE))
+ return;
+
+ /* The tiler job might have been soft-stopped for some reason other
+ * than running out of memory.
+ */
+ if (rp->state == KBASE_JD_RP_START || rp->state == KBASE_JD_RP_RETRY) {
+ dev_dbg(kctx->kbdev->dev,
+ "JS return isn't OOM in state %d of RP %d\n",
+ (int)rp->state, start_katom->renderpass_id);
+ return;
+ }
+
+ dev_dbg(kctx->kbdev->dev,
+ "JS return confirm OOM in state %d of RP %d\n",
+ (int)rp->state, start_katom->renderpass_id);
+
+ if (WARN_ON(rp->state != KBASE_JD_RP_PEND_OOM &&
+ rp->state != KBASE_JD_RP_RETRY_PEND_OOM))
+ return;
+
+ /* Prevent the tiler job being pulled for execution in the
+ * job scheduler again.
+ */
+ dev_dbg(kbdev->dev, "Blocking start atom %p\n",
+ (void *)start_katom);
+ atomic_inc(&start_katom->blocked);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ rp->state = (rp->state == KBASE_JD_RP_PEND_OOM) ?
+ KBASE_JD_RP_OOM : KBASE_JD_RP_RETRY_OOM;
+
+ /* Was the fragment job chain submitted to kbase yet? */
+ end_katom = rp->end_katom;
+ if (end_katom) {
+ dev_dbg(kctx->kbdev->dev, "JS return add end atom %p\n",
+ (void *)end_katom);
+
+ if (rp->state == KBASE_JD_RP_RETRY_OOM) {
+ /* Allow the end of the renderpass to be pulled for
+ * execution again to continue incremental rendering.
+ */
+ dev_dbg(kbdev->dev, "Unblocking end atom %p\n",
+ (void *)end_katom);
+ atomic_dec(&end_katom->blocked);
+ WARN_ON(!(end_katom->atom_flags &
+ KBASE_KATOM_FLAG_JSCTX_IN_TREE));
+ WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_IN_JS);
+
+ kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
+ end_katom->slot_nr);
+
+ /* Expect the fragment job chain to be scheduled without
+ * further action because this function is called when
+ * returning an atom to the job scheduler ringbuffer.
+ */
+ end_katom = NULL;
+ } else {
+ WARN_ON(end_katom->status !=
+ KBASE_JD_ATOM_STATE_QUEUED &&
+ end_katom->status != KBASE_JD_ATOM_STATE_IN_JS);
+ }
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (end_katom)
+ kbase_jd_dep_clear_locked(end_katom);
+}
+
+/**
+ * js_return_of_end_rp() - Handle completion of an atom that ends a renderpass
+ * @end_katom: Pointer to the end-of-renderpass atom that was completed
+ *
+ * This function is called to continue incremental rendering if the tiler job
+ * chain at the start of a renderpass used too much memory. It resets the
+ * mechanism for detecting excessive memory usage then allows the soft-stopped
+ * tiler job chain to be pulled for execution again.
+ *
+ * The start-of-renderpass atom must already been submitted to kbase.
+ */
+static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom)
+{
+ struct kbase_context *const kctx = end_katom->kctx;
+ struct kbase_device *const kbdev = kctx->kbdev;
+ struct kbase_jd_renderpass *rp;
+ struct kbase_jd_atom *start_katom;
+ unsigned long flags;
+
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS)))
+ return;
+
+ if (WARN_ON(end_katom->renderpass_id >=
+ ARRAY_SIZE(kctx->jctx.renderpasses)))
+ return;
+
+ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id];
+
+ if (WARN_ON(rp->end_katom != end_katom))
+ return;
+
+ dev_dbg(kctx->kbdev->dev,
+ "JS return end atom %p in state %d of RP %d\n",
+ (void *)end_katom, (int)rp->state, end_katom->renderpass_id);
+
+ if (WARN_ON(rp->state != KBASE_JD_RP_OOM &&
+ rp->state != KBASE_JD_RP_RETRY_OOM))
+ return;
+
+ /* Reduce the number of mapped pages in the memory regions that
+ * triggered out-of-memory last time so that we can detect excessive
+ * memory usage again.
+ */
+ kbase_gpu_vm_lock(kctx);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ while (!list_empty(&rp->oom_reg_list)) {
+ struct kbase_va_region *reg =
+ list_first_entry(&rp->oom_reg_list,
+ struct kbase_va_region, link);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ dev_dbg(kbdev->dev,
+ "Reset backing to %zu pages for region %p\n",
+ reg->threshold_pages, (void *)reg);
+
+ if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED))
+ kbase_mem_shrink(kctx, reg, reg->threshold_pages);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ dev_dbg(kbdev->dev, "Deleting region %p from list\n",
+ (void *)reg);
+ list_del_init(&reg->link);
+ kbase_va_region_alloc_put(kctx, reg);
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ kbase_gpu_vm_unlock(kctx);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ rp->state = KBASE_JD_RP_RETRY;
+ dev_dbg(kbdev->dev, "Changed state to %d for retry\n", rp->state);
+
+ /* Allow the start of the renderpass to be pulled for execution again
+ * to begin/continue incremental rendering.
+ */
+ start_katom = rp->start_katom;
+ if (!WARN_ON(!start_katom)) {
+ dev_dbg(kbdev->dev, "Unblocking start atom %p\n",
+ (void *)start_katom);
+ atomic_dec(&start_katom->blocked);
+ (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx,
+ start_katom->slot_nr);
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
static void js_return_worker(struct work_struct *data)
{
@@ -2241,7 +2682,11 @@ static void js_return_worker(struct work_struct *data)
unsigned long flags;
base_jd_core_req core_req = katom->core_req;
- KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom);
+ dev_dbg(kbdev->dev, "%s for atom %p with event code 0x%x\n",
+ __func__, (void *)katom, katom->event_code);
+
+ if (katom->event_code != BASE_JD_EVENT_END_RP_DONE)
+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom);
kbase_backend_complete_wq(kbdev, katom);
@@ -2253,7 +2698,8 @@ static void js_return_worker(struct work_struct *data)
atomic_dec(&kctx->atoms_pulled);
atomic_dec(&kctx->atoms_pulled_slot[js]);
- atomic_dec(&katom->blocked);
+ if (katom->event_code != BASE_JD_EVENT_END_RP_DONE)
+ atomic_dec(&katom->blocked);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -2278,7 +2724,17 @@ static void js_return_worker(struct work_struct *data)
}
if (!atomic_read(&kctx->atoms_pulled)) {
+ dev_dbg(kbdev->dev,
+ "No atoms currently pulled from context %p\n",
+ (void *)kctx);
+
if (!kctx->slots_pullable) {
+ dev_dbg(kbdev->dev,
+ "Context %p %s counted as runnable\n",
+ (void *)kctx,
+ kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ?
+ "is" : "isn't");
+
WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
atomic_dec(&kbdev->js_data.nr_contexts_runnable);
@@ -2309,6 +2765,11 @@ static void js_return_worker(struct work_struct *data)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (context_idle) {
+ dev_dbg(kbdev->dev,
+ "Context %p %s counted as active\n",
+ (void *)kctx,
+ kbase_ctx_flag(kctx, KCTX_ACTIVE) ?
+ "is" : "isn't");
WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
kbase_pm_context_idle(kbdev);
@@ -2320,7 +2781,21 @@ static void js_return_worker(struct work_struct *data)
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
mutex_unlock(&js_devdata->queue_mutex);
+ if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) {
+ mutex_lock(&kctx->jctx.lock);
+ js_return_of_start_rp(katom);
+ mutex_unlock(&kctx->jctx.lock);
+ } else if (katom->event_code == BASE_JD_EVENT_END_RP_DONE) {
+ mutex_lock(&kctx->jctx.lock);
+ js_return_of_end_rp(katom);
+ mutex_unlock(&kctx->jctx.lock);
+ }
+
katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+ dev_dbg(kbdev->dev, "JS: retained state %s finished",
+ kbasep_js_has_atom_finished(&retained_state) ?
+ "has" : "hasn't");
+
WARN_ON(kbasep_js_has_atom_finished(&retained_state));
kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
@@ -2329,10 +2804,16 @@ static void js_return_worker(struct work_struct *data)
kbase_js_sched_all(kbdev);
kbase_backend_complete_wq_post_sched(kbdev, core_req);
+
+ dev_dbg(kbdev->dev, "Leaving %s for atom %p\n",
+ __func__, (void *)katom);
}
void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
+ dev_dbg(kctx->kbdev->dev, "Unpulling atom %p in kctx %p\n",
+ (void *)katom, (void *)kctx);
+
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
jsctx_rb_unpull(kctx, katom);
@@ -2348,6 +2829,157 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
queue_work(kctx->jctx.job_done_wq, &katom->work);
}
+/**
+ * js_complete_start_rp() - Handle completion of atom that starts a renderpass
+ * @kctx: Context pointer
+ * @start_katom: Pointer to the atom that completed
+ *
+ * Put any references to virtual memory regions that might have been added by
+ * kbase_job_slot_softstop_start_rp() because the tiler job chain completed
+ * despite any pending soft-stop request.
+ *
+ * If the atom that just completed was soft-stopped during a previous attempt to
+ * run it then there should be a blocked end-of-renderpass atom waiting for it,
+ * which we must unblock to process the output of the tiler job chain.
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool js_complete_start_rp(struct kbase_context *kctx,
+ struct kbase_jd_atom *const start_katom)
+{
+ struct kbase_device *const kbdev = kctx->kbdev;
+ struct kbase_jd_renderpass *rp;
+ bool timer_sync = false;
+
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS)))
+ return false;
+
+ if (WARN_ON(start_katom->renderpass_id >=
+ ARRAY_SIZE(kctx->jctx.renderpasses)))
+ return false;
+
+ rp = &kctx->jctx.renderpasses[start_katom->renderpass_id];
+
+ if (WARN_ON(rp->start_katom != start_katom))
+ return false;
+
+ dev_dbg(kctx->kbdev->dev,
+ "Start atom %p is done in state %d of RP %d\n",
+ (void *)start_katom, (int)rp->state,
+ start_katom->renderpass_id);
+
+ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE))
+ return false;
+
+ if (rp->state == KBASE_JD_RP_PEND_OOM ||
+ rp->state == KBASE_JD_RP_RETRY_PEND_OOM) {
+ unsigned long flags;
+
+ dev_dbg(kctx->kbdev->dev,
+ "Start atom %p completed before soft-stop\n",
+ (void *)start_katom);
+
+ kbase_gpu_vm_lock(kctx);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ while (!list_empty(&rp->oom_reg_list)) {
+ struct kbase_va_region *reg =
+ list_first_entry(&rp->oom_reg_list,
+ struct kbase_va_region, link);
+
+ WARN_ON(reg->flags & KBASE_REG_VA_FREED);
+ dev_dbg(kctx->kbdev->dev, "Deleting region %p from list\n",
+ (void *)reg);
+ list_del_init(&reg->link);
+ kbase_va_region_alloc_put(kctx, reg);
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ kbase_gpu_vm_unlock(kctx);
+ } else {
+ dev_dbg(kctx->kbdev->dev,
+ "Start atom %p did not exceed memory threshold\n",
+ (void *)start_katom);
+
+ WARN_ON(rp->state != KBASE_JD_RP_START &&
+ rp->state != KBASE_JD_RP_RETRY);
+ }
+
+ if (rp->state == KBASE_JD_RP_RETRY ||
+ rp->state == KBASE_JD_RP_RETRY_PEND_OOM) {
+ struct kbase_jd_atom *const end_katom = rp->end_katom;
+
+ if (!WARN_ON(!end_katom)) {
+ unsigned long flags;
+
+ /* Allow the end of the renderpass to be pulled for
+ * execution again to continue incremental rendering.
+ */
+ dev_dbg(kbdev->dev, "Unblocking end atom %p!\n",
+ (void *)end_katom);
+ atomic_dec(&end_katom->blocked);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+ kbdev, kctx, end_katom->slot_nr);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
+ }
+
+ return timer_sync;
+}
+
+/**
+ * js_complete_end_rp() - Handle final completion of atom that ends a renderpass
+ * @kctx: Context pointer
+ * @end_katom: Pointer to the atom that completed for the last time
+ *
+ * This function must only be called if the renderpass actually completed
+ * without the tiler job chain at the start using too much memory; otherwise
+ * completion of the end-of-renderpass atom is handled similarly to a soft-stop.
+ */
+static void js_complete_end_rp(struct kbase_context *kctx,
+ struct kbase_jd_atom *const end_katom)
+{
+ struct kbase_device *const kbdev = kctx->kbdev;
+ unsigned long flags;
+ struct kbase_jd_renderpass *rp;
+
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS)))
+ return;
+
+ if (WARN_ON(end_katom->renderpass_id >=
+ ARRAY_SIZE(kctx->jctx.renderpasses)))
+ return;
+
+ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id];
+
+ if (WARN_ON(rp->end_katom != end_katom))
+ return;
+
+ dev_dbg(kbdev->dev, "End atom %p is done in state %d of RP %d\n",
+ (void *)end_katom, (int)rp->state, end_katom->renderpass_id);
+
+ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) ||
+ WARN_ON(rp->state == KBASE_JD_RP_OOM) ||
+ WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM))
+ return;
+
+ /* Rendering completed without running out of memory.
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(!list_empty(&rp->oom_reg_list));
+ rp->state = KBASE_JD_RP_COMPLETE;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ dev_dbg(kbdev->dev, "Renderpass %d is complete\n",
+ end_katom->renderpass_id);
+}
+
bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
struct kbase_jd_atom *katom)
{
@@ -2363,6 +2995,16 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
kbdev = kctx->kbdev;
atom_slot = katom->slot_nr;
+ dev_dbg(kbdev->dev, "%s for atom %p (s:%d)\n",
+ __func__, (void *)katom, atom_slot);
+
+ /* Update the incremental rendering state machine.
+ */
+ if (katom->core_req & BASE_JD_REQ_START_RENDERPASS)
+ timer_sync |= js_complete_start_rp(kctx, katom);
+ else if (katom->core_req & BASE_JD_REQ_END_RENDERPASS)
+ js_complete_end_rp(kctx, katom);
+
js_kctx_info = &kctx->jctx.sched_info;
js_devdata = &kbdev->js_data;
@@ -2372,6 +3014,9 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+ dev_dbg(kbdev->dev, "Atom %p is in runnable_tree\n",
+ (void *)katom);
+
context_idle = !atomic_dec_return(&kctx->atoms_pulled);
atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
kctx->atoms_pulled_slot_pri[atom_slot][prio]--;
@@ -2388,6 +3033,10 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
* all atoms have now been processed, then unblock the slot */
if (!kctx->atoms_pulled_slot_pri[atom_slot][prio]
&& kctx->blocked_js[atom_slot][prio]) {
+ dev_dbg(kbdev->dev,
+ "kctx %p is no longer blocked from submitting on slot %d at priority %d\n",
+ (void *)kctx, atom_slot, prio);
+
kctx->blocked_js[atom_slot][prio] = false;
if (kbase_js_ctx_pullable(kctx, atom_slot, true))
timer_sync |=
@@ -2438,17 +3087,79 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
/* Mark context as inactive. The pm reference will be dropped later in
* jd_done_worker().
*/
- if (context_idle)
+ if (context_idle) {
+ dev_dbg(kbdev->dev, "kctx %p is no longer active\n",
+ (void *)kctx);
kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+ }
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (timer_sync)
kbase_backend_ctx_count_changed(kbdev);
mutex_unlock(&js_devdata->runpool_mutex);
+ dev_dbg(kbdev->dev, "Leaving %s\n", __func__);
return context_idle;
}
+/**
+ * js_end_rp_is_complete() - Check whether an atom that ends a renderpass has
+ * completed for the last time.
+ *
+ * @end_katom: Pointer to the atom that completed on the hardware.
+ *
+ * An atom that ends a renderpass may be run on the hardware several times
+ * before notifying userspace or allowing dependent atoms to be executed.
+ *
+ * This function is used to decide whether or not to allow end-of-renderpass
+ * atom completion. It only returns false if the atom at the start of the
+ * renderpass was soft-stopped because it used too much memory during the most
+ * recent attempt at tiling.
+ *
+ * Return: True if the atom completed for the last time.
+ */
+static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom)
+{
+ struct kbase_context *const kctx = end_katom->kctx;
+ struct kbase_device *const kbdev = kctx->kbdev;
+ struct kbase_jd_renderpass *rp;
+
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS)))
+ return true;
+
+ if (WARN_ON(end_katom->renderpass_id >=
+ ARRAY_SIZE(kctx->jctx.renderpasses)))
+ return true;
+
+ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id];
+
+ if (WARN_ON(rp->end_katom != end_katom))
+ return true;
+
+ dev_dbg(kbdev->dev,
+ "JS complete end atom %p in state %d of RP %d\n",
+ (void *)end_katom, (int)rp->state,
+ end_katom->renderpass_id);
+
+ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE))
+ return true;
+
+ /* Failure of end-of-renderpass atoms must not return to the
+ * start of the renderpass.
+ */
+ if (end_katom->event_code != BASE_JD_EVENT_DONE)
+ return true;
+
+ if (rp->state != KBASE_JD_RP_OOM &&
+ rp->state != KBASE_JD_RP_RETRY_OOM)
+ return true;
+
+ dev_dbg(kbdev->dev, "Suppressing end atom completion\n");
+ return false;
+}
+
struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
ktime_t *end_timestamp)
{
@@ -2457,14 +3168,23 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
struct kbase_jd_atom *x_dep = katom->x_post_dep;
kbdev = kctx->kbdev;
-
+ dev_dbg(kbdev->dev, "Atom %p complete in kctx %p (post-dep %p)\n",
+ (void *)katom, (void *)kctx, (void *)x_dep);
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+ if ((katom->core_req & BASE_JD_REQ_END_RENDERPASS) &&
+ !js_end_rp_is_complete(katom)) {
+ katom->event_code = BASE_JD_EVENT_END_RP_DONE;
+ kbase_js_unpull(kctx, katom);
+ return NULL;
+ }
+
if (katom->will_fail_event_code)
katom->event_code = katom->will_fail_event_code;
katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+ dev_dbg(kbdev->dev, "Atom %p status to HW completed\n", (void *)katom);
if (katom->event_code != BASE_JD_EVENT_DONE) {
kbase_js_evict_deps(kctx, katom, katom->slot_nr,
@@ -2478,24 +3198,103 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
/* Unblock cross dependency if present */
if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
- !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
- (x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+ !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
+ (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)) {
bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
false);
x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+ dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %p\n",
+ (void *)x_dep);
+
kbase_js_move_to_tree(x_dep);
+
if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
false))
kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
x_dep->slot_nr);
- if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)
+ if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+ dev_dbg(kbdev->dev, "Atom %p is in runnable tree\n",
+ (void *)x_dep);
return x_dep;
+ }
+ } else {
+ dev_dbg(kbdev->dev,
+ "No cross-slot dep to unblock for atom %p\n",
+ (void *)katom);
}
return NULL;
}
+/**
+ * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot
+ * dependency
+ * @katom: Pointer to an atom in the slot ringbuffer
+ *
+ * A cross-slot dependency is ignored if necessary to unblock incremental
+ * rendering. If the atom at the start of a renderpass used too much memory
+ * and was soft-stopped then the atom at the end of a renderpass is submitted
+ * to hardware regardless of its dependency on the start-of-renderpass atom.
+ * This can happen multiple times for the same pair of atoms.
+ *
+ * Return: true to block the atom or false to allow it to be submitted to
+ * hardware
+ */
+bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom)
+{
+ struct kbase_context *const kctx = katom->kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbase_jd_renderpass *rp;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (!(katom->atom_flags &
+ KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+ dev_dbg(kbdev->dev, "Atom %p is not blocked on a cross-slot dependency",
+ (void *)katom);
+ return false;
+ }
+
+ if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) {
+ dev_dbg(kbdev->dev, "Atom %p is blocked on a cross-slot dependency",
+ (void *)katom);
+ return true;
+ }
+
+ if (WARN_ON(katom->renderpass_id >=
+ ARRAY_SIZE(kctx->jctx.renderpasses)))
+ return true;
+
+ rp = &kctx->jctx.renderpasses[katom->renderpass_id];
+ /* We can read a subset of renderpass state without holding
+ * higher-level locks (but not end_katom, for example).
+ */
+
+ WARN_ON(rp->state == KBASE_JD_RP_COMPLETE);
+
+ dev_dbg(kbdev->dev, "End atom has cross-slot dep in state %d\n",
+ (int)rp->state);
+
+ if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM)
+ return true;
+
+ /* Tiler ran out of memory so allow the fragment job chain to run
+ * if it only depends on the tiler job chain.
+ */
+ if (katom->x_pre_dep != rp->start_katom) {
+ dev_dbg(kbdev->dev, "Dependency is on %p not start atom %p\n",
+ (void *)katom->x_pre_dep, (void *)rp->start_katom);
+ return true;
+ }
+
+ dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %p\n",
+ (void *)katom->x_pre_dep);
+
+ return false;
+}
+
void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
{
struct kbasep_js_device_data *js_devdata;
@@ -2504,6 +3303,9 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
bool ctx_waiting[BASE_JM_MAX_NR_SLOTS];
int js;
+ dev_dbg(kbdev->dev, "%s kbdev %p mask 0x%x\n",
+ __func__, (void *)kbdev, (unsigned int)js_mask);
+
js_devdata = &kbdev->js_data;
down(&js_devdata->schedule_sem);
@@ -2526,15 +3328,24 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
if (!kctx) {
js_mask &= ~(1 << js);
- break; /* No contexts on pullable list */
+ dev_dbg(kbdev->dev,
+ "No kctx on pullable list (s:%d)\n",
+ js);
+ break;
}
if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
context_idle = true;
+ dev_dbg(kbdev->dev,
+ "kctx %p is not active (s:%d)\n",
+ (void *)kctx, js);
+
if (kbase_pm_context_active_handle_suspend(
kbdev,
KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+ dev_dbg(kbdev->dev,
+ "Suspend pending (s:%d)\n", js);
/* Suspend pending - return context to
* queue and stop scheduling */
mutex_lock(
@@ -2554,7 +3365,11 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
if (!kbase_js_use_ctx(kbdev, kctx, js)) {
mutex_lock(
&kctx->jctx.sched_info.ctx.jsctx_mutex);
- /* Context can not be used at this time */
+
+ dev_dbg(kbdev->dev,
+ "kctx %p cannot be used at this time\n",
+ kctx);
+
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (kbase_js_ctx_pullable(kctx, js, false)
|| kbase_ctx_flag(kctx, KCTX_PRIVILEGED))
@@ -2585,10 +3400,18 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
kbase_ctx_flag_clear(kctx, KCTX_PULLED);
if (!kbase_jm_kick(kbdev, 1 << js))
- /* No more jobs can be submitted on this slot */
+ dev_dbg(kbdev->dev,
+ "No more jobs can be submitted (s:%d)\n",
+ js);
js_mask &= ~(1 << js);
if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
- bool pullable = kbase_js_ctx_pullable(kctx, js,
+ bool pullable;
+
+ dev_dbg(kbdev->dev,
+ "No atoms pulled from kctx %p (s:%d)\n",
+ (void *)kctx, js);
+
+ pullable = kbase_js_ctx_pullable(kctx, js,
true);
/* Failed to pull jobs - push to head of list.
@@ -2645,7 +3468,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
break; /* Could not run atoms on this slot */
}
- /* Push to back of list */
+ dev_dbg(kbdev->dev, "Push kctx %p to back of list\n",
+ (void *)kctx);
if (kbase_js_ctx_pullable(kctx, js, true))
timer_sync |=
kbase_js_ctx_list_add_pullable_nolock(
@@ -2665,8 +3489,11 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
if (kbdev->hwaccess.active_kctx[js] == last_active[js] &&
- ctx_waiting[js])
+ ctx_waiting[js]) {
+ dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n",
+ (void *)last_active[js], js);
kbdev->hwaccess.active_kctx[js] = NULL;
+ }
}
mutex_unlock(&js_devdata->queue_mutex);
@@ -2867,3 +3694,81 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
+
+
+/* Hold the mmu_hw_mutex and hwaccess_lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ bool result = false;
+ int as_nr;
+
+ if (WARN_ON(kbdev == NULL))
+ return result;
+
+ if (WARN_ON(kctx == NULL))
+ return result;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ as_nr = kctx->as_nr;
+ if (atomic_read(&kctx->refcount) > 0) {
+ KBASE_DEBUG_ASSERT(as_nr >= 0);
+
+ kbase_ctx_sched_retain_ctx_refcount(kctx);
+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+ NULL, 0u, atomic_read(&kctx->refcount));
+ result = true;
+ }
+
+ return result;
+}
+
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ unsigned long flags;
+ bool result = false;
+
+ if (WARN_ON(kbdev == NULL))
+ return result;
+
+ if (WARN_ON(kctx == NULL))
+ return result;
+
+ mutex_lock(&kbdev->mmu_hw_mutex);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+
+ return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+ int as_nr)
+{
+ unsigned long flags;
+ struct kbase_context *found_kctx = NULL;
+
+ if (WARN_ON(kbdev == NULL))
+ return NULL;
+
+ if (WARN_ON(as_nr < 0))
+ return NULL;
+
+ if (WARN_ON(as_nr >= BASE_MAX_NR_AS))
+ return NULL;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ found_kctx = kbdev->as_to_kctx[as_nr];
+
+ if (found_kctx != NULL)
+ kbase_ctx_sched_retain_ctx_refcount(found_kctx);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return found_kctx;
+}
diff --git a/mali_kbase/mali_kbase_js.h b/mali_kbase/mali_kbase_js.h
index e4bd4a2..51ab023 100644
--- a/mali_kbase/mali_kbase_js.h
+++ b/mali_kbase/mali_kbase_js.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,737 +34,85 @@
#include "context/mali_kbase_context.h"
#include "mali_kbase_defs.h"
#include "mali_kbase_debug.h"
+#include <mali_kbase_ctx_sched.h>
-#include "mali_kbase_js_ctx_attr.h"
+#include <jm/mali_kbase_jm_js.h>
/**
- * @addtogroup base_api
- * @{
- */
-
-/**
- * @addtogroup base_kbase_api
- * @{
- */
-
-/**
- * @addtogroup kbase_js Job Scheduler Internal APIs
- * @{
- *
- * These APIs are Internal to KBase.
- */
-
-/**
- * @brief Initialize the Job Scheduler
- *
- * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
- * initialized before passing to the kbasep_js_devdata_init() function. This is
- * to give efficient error path code.
- */
-int kbasep_js_devdata_init(struct kbase_device * const kbdev);
-
-/**
- * @brief Halt the Job Scheduler.
- *
- * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
- * sub-structure was never initialized/failed initialization, to give efficient
- * error-path code.
- *
- * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
- * be zero initialized before passing to the kbasep_js_devdata_init()
- * function. This is to give efficient error path code.
- *
- * It is a Programming Error to call this whilst there are still kbase_context
- * structures registered with this scheduler.
- *
- */
-void kbasep_js_devdata_halt(struct kbase_device *kbdev);
-
-/**
- * @brief Terminate the Job Scheduler
- *
- * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
- * sub-structure was never initialized/failed initialization, to give efficient
- * error-path code.
- *
- * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
- * be zero initialized before passing to the kbasep_js_devdata_init()
- * function. This is to give efficient error path code.
- *
- * It is a Programming Error to call this whilst there are still kbase_context
- * structures registered with this scheduler.
- */
-void kbasep_js_devdata_term(struct kbase_device *kbdev);
-
-/**
- * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
- *
- * This effectively registers a struct kbase_context with a Job Scheduler.
- *
- * It does not register any jobs owned by the struct kbase_context with the scheduler.
- * Those must be separately registered by kbasep_js_add_job().
- *
- * The struct kbase_context must be zero intitialized before passing to the
- * kbase_js_init() function. This is to give efficient error path code.
- */
-int kbasep_js_kctx_init(struct kbase_context * const kctx);
-
-/**
- * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
- *
- * This effectively de-registers a struct kbase_context from its Job Scheduler
- *
- * It is safe to call this on a struct kbase_context that has never had or failed
- * initialization of its jctx.sched_info member, to give efficient error-path
- * code.
- *
- * For this to work, the struct kbase_context must be zero intitialized before passing
- * to the kbase_js_init() function.
- *
- * It is a Programming Error to call this whilst there are still jobs
- * registered with this context.
- */
-void kbasep_js_kctx_term(struct kbase_context *kctx);
-
-/**
- * @brief Add a job chain to the Job Scheduler, and take necessary actions to
- * schedule the context/run the job.
- *
- * This atomically does the following:
- * - Update the numbers of jobs information
- * - Add the job to the run pool if necessary (part of init_job)
- *
- * Once this is done, then an appropriate action is taken:
- * - If the ctx is scheduled, it attempts to start the next job (which might be
- * this added job)
- * - Otherwise, and if this is the first job on the context, it enqueues it on
- * the Policy Queue
- *
- * The Policy's Queue can be updated by this in the following ways:
- * - In the above case that this is the first job on the context
- * - If the context is high priority and the context is not scheduled, then it
- * could cause the Policy to schedule out a low-priority context, allowing
- * this context to be scheduled in.
- *
- * If the context is already scheduled on the RunPool, then adding a job to it
- * is guarenteed not to update the Policy Queue. And so, the caller is
- * guarenteed to not need to try scheduling a context from the Run Pool - it
- * can safely assert that the result is false.
- *
- * It is a programming error to have more than U32_MAX jobs in flight at a time.
- *
- * The following locking conditions are made on the caller:
- * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
- * - it must \em not hold hwaccess_lock (as this will be obtained internally)
- * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
- * obtained internally)
- * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
- *
- * @return true indicates that the Policy Queue was updated, and so the
- * caller will need to try scheduling a context onto the Run Pool.
- * @return false indicates that no updates were made to the Policy Queue,
- * so no further action is required from the caller. This is \b always returned
- * when the context is currently scheduled.
- */
-bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
-
-/**
- * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
- *
- * Completely removing a job requires several calls:
- * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
- * the atom
- * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
- * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
- * remaining state held as part of the job having been run.
- *
- * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
- *
- * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
- *
- * It is a programming error to call this when:
- * - \a atom is not a job belonging to kctx.
- * - \a atom has already been removed from the Job Scheduler.
- * - \a atom is still in the runpool
- *
- * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
- * kbasep_js_remove_cancelled_job() instead.
- *
- * The following locking conditions are made on the caller:
- * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
- *
- */
-void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
-
-/**
- * @brief Completely remove a job chain from the Job Scheduler, in the case
- * where the job chain was cancelled.
+ * kbasep_js_runpool_retain_ctx_nolock - Refcount a context as being busy,
+ * preventing it from being scheduled
+ * out.
*
- * This is a variant of kbasep_js_remove_job() that takes care of removing all
- * of the retained state too. This is generally useful for cancelled atoms,
- * which need not be handled in an optimal way.
+ * This function can safely be called from IRQ context.
*
- * It is a programming error to call this when:
- * - \a atom is not a job belonging to kctx.
- * - \a atom has already been removed from the Job Scheduler.
- * - \a atom is still in the runpool:
- * - it is not being killed with kbasep_jd_cancel()
- *
- * The following locking conditions are made on the caller:
- * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
- * - it must \em not hold the hwaccess_lock, (as this will be obtained
- * internally)
- * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
- * obtained internally)
+ * The following locks must be held by the caller:
+ * * mmu_hw_mutex, hwaccess_lock
*
- * @return true indicates that ctx attributes have changed and the caller
- * should call kbase_js_sched_all() to try to run more jobs
- * @return false otherwise
+ * Return: value true if the retain succeeded, and the context will not be
+ * scheduled out, otherwise false if the retain failed (because the context
+ * is being/has been scheduled out).
*/
-bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- struct kbase_jd_atom *katom);
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+ struct kbase_context *kctx);
/**
- * @brief Refcount a context as being busy, preventing it from being scheduled
- * out.
+ * kbasep_js_runpool_retain_ctx - Refcount a context as being busy, preventing
+ * it from being scheduled out.
*
- * @note This function can safely be called from IRQ context.
+ * This function can safely be called from IRQ context.
*
* The following locking conditions are made on the caller:
- * - it must \em not hold mmu_hw_mutex and hwaccess_lock, because they will be
+ * * it must not hold mmu_hw_mutex and hwaccess_lock, because they will be
* used internally.
*
- * @return value != false if the retain succeeded, and the context will not be scheduled out.
- * @return false if the retain failed (because the context is being/has been scheduled out).
+ * Return: value true if the retain succeeded, and the context will not be
+ * scheduled out, otherwise false if the retain failed (because the context
+ * is being/has been scheduled out).
*/
-bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx);
/**
- * @brief Refcount a context as being busy, preventing it from being scheduled
- * out.
- *
- * @note This function can safely be called from IRQ context.
- *
- * The following locks must be held by the caller:
- * - mmu_hw_mutex, hwaccess_lock
- *
- * @return value != false if the retain succeeded, and the context will not be scheduled out.
- * @return false if the retain failed (because the context is being/has been scheduled out).
- */
-bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
-
-/**
- * @brief Lookup a context in the Run Pool based upon its current address space
- * and ensure that is stays scheduled in.
+ * kbasep_js_runpool_lookup_ctx - Lookup a context in the Run Pool based upon
+ * its current address space and ensure that
+ * is stays scheduled in.
*
* The context is refcounted as being busy to prevent it from scheduling
* out. It must be released with kbasep_js_runpool_release_ctx() when it is no
* longer required to stay scheduled in.
*
- * @note This function can safely be called from IRQ context.
+ * This function can safely be called from IRQ context.
*
* The following locking conditions are made on the caller:
- * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * * it must not hold the hwaccess_lock, because it will be used internally.
* If the hwaccess_lock is already held, then the caller should use
* kbasep_js_runpool_lookup_ctx_nolock() instead.
*
- * @return a valid struct kbase_context on success, which has been refcounted as being busy.
- * @return NULL on failure, indicating that no context was found in \a as_nr
- */
-struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
-
-/**
- * @brief Handling the requeuing/killing of a context that was evicted from the
- * policy queue or runpool.
- *
- * This should be used whenever handing off a context that has been evicted
- * from the policy queue or the runpool:
- * - If the context is not dying and has jobs, it gets re-added to the policy
- * queue
- * - Otherwise, it is not added
- *
- * In addition, if the context is dying the jobs are killed asynchronously.
- *
- * In all cases, the Power Manager active reference is released
- * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. \a
- * has_pm_ref must be set to false whenever the context was not previously in
- * the runpool and does not hold a Power Manager active refcount. Note that
- * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
- * active refcount even though they weren't in the runpool.
- *
- * The following locking conditions are made on the caller:
- * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
- * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
- * obtained internally)
- */
-void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
-
-/**
- * @brief Release a refcount of a context being busy, allowing it to be
- * scheduled out.
- *
- * When the refcount reaches zero and the context \em might be scheduled out
- * (depending on whether the Scheudling Policy has deemed it so, or if it has run
- * out of jobs).
- *
- * If the context does get scheduled out, then The following actions will be
- * taken as part of deschduling a context:
- * - For the context being descheduled:
- * - If the context is in the processing of dying (all the jobs are being
- * removed from it), then descheduling also kills off any jobs remaining in the
- * context.
- * - If the context is not dying, and any jobs remain after descheduling the
- * context then it is re-enqueued to the Policy's Queue.
- * - Otherwise, the context is still known to the scheduler, but remains absent
- * from the Policy Queue until a job is next added to it.
- * - In all descheduling cases, the Power Manager active reference (obtained
- * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
- *
- * Whilst the context is being descheduled, this also handles actions that
- * cause more atoms to be run:
- * - Attempt submitting atoms when the Context Attributes on the Runpool have
- * changed. This is because the context being scheduled out could mean that
- * there are more opportunities to run atoms.
- * - Attempt submitting to a slot that was previously blocked due to affinity
- * restrictions. This is usually only necessary when releasing a context
- * happens as part of completing a previous job, but is harmless nonetheless.
- * - Attempt scheduling in a new context (if one is available), and if necessary,
- * running a job from that new context.
- *
- * Unlike retaining a context in the runpool, this function \b cannot be called
- * from IRQ context.
- *
- * It is a programming error to call this on a \a kctx that is not currently
- * scheduled, or that already has a zero refcount.
- *
- * The following locking conditions are made on the caller:
- * - it must \em not hold the hwaccess_lock, because it will be used internally.
- * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
- * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
- * obtained internally)
- * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
- * obtained internally)
- * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
- * obtained internally)
- *
- */
-void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
-
-/**
- * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
- * actions from completing an atom.
- *
- * This is usually called as part of completing an atom and releasing the
- * refcount on the context held by the atom.
- *
- * Therefore, the extra actions carried out are part of handling actions queued
- * on a completed atom, namely:
- * - Releasing the atom's context attributes
- * - Retrying the submission on a particular slot, because we couldn't submit
- * on that slot from an IRQ handler.
- *
- * The locking conditions of this function are the same as those for
- * kbasep_js_runpool_release_ctx()
- */
-void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
-
-/**
- * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
- * kbasep_js_device_data::runpool_mutex and
- * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
- * attempt to schedule new contexts.
- */
-void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx);
-
-/**
- * @brief Schedule in a privileged context
- *
- * This schedules a context in regardless of the context priority.
- * If the runpool is full, a context will be forced out of the runpool and the function will wait
- * for the new context to be scheduled in.
- * The context will be kept scheduled in (and the corresponding address space reserved) until
- * kbasep_js_release_privileged_ctx is called).
- *
- * The following locking conditions are made on the caller:
- * - it must \em not hold the hwaccess_lock, because it will be used internally.
- * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
- * obtained internally)
- * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
- * obtained internally)
- * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
- * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
- * be used internally.
- *
- */
-void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
-
-/**
- * @brief Release a privileged context, allowing it to be scheduled out.
- *
- * See kbasep_js_runpool_release_ctx for potential side effects.
- *
- * The following locking conditions are made on the caller:
- * - it must \em not hold the hwaccess_lock, because it will be used internally.
- * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
- * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
- * obtained internally)
- * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
- * obtained internally)
- *
+ * Return: a valid struct kbase_context on success, which has been refcounted
+ * as being busy or return NULL on failure, indicating that no context was found
+ * in as_nr.
*/
-void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+ int as_nr);
/**
- * @brief Try to submit the next job on each slot
- *
- * The following locks may be used:
- * - kbasep_js_device_data::runpool_mutex
- * - hwaccess_lock
- */
-void kbase_js_try_run_jobs(struct kbase_device *kbdev);
-
-/**
- * @brief Suspend the job scheduler during a Power Management Suspend event.
- *
- * Causes all contexts to be removed from the runpool, and prevents any
- * contexts from (re)entering the runpool.
- *
- * This does not handle suspending the one privileged context: the caller must
- * instead do this by by suspending the GPU HW Counter Instrumentation.
- *
- * This will eventually cause all Power Management active references held by
- * contexts on the runpool to be released, without running any more atoms.
- *
- * The caller must then wait for all Power Mangement active refcount to become
- * zero before completing the suspend.
- *
- * The emptying mechanism may take some time to complete, since it can wait for
- * jobs to complete naturally instead of forcing them to end quickly. However,
- * this is bounded by the Job Scheduler's Job Timeouts. Hence, this
- * function is guaranteed to complete in a finite time.
- */
-void kbasep_js_suspend(struct kbase_device *kbdev);
-
-/**
- * @brief Resume the Job Scheduler after a Power Management Resume event.
- *
- * This restores the actions from kbasep_js_suspend():
- * - Schedules contexts back into the runpool
- * - Resumes running atoms on the GPU
- */
-void kbasep_js_resume(struct kbase_device *kbdev);
-
-/**
- * @brief Submit an atom to the job scheduler.
- *
- * The atom is enqueued on the context's ringbuffer. The caller must have
- * ensured that all dependencies can be represented in the ringbuffer.
- *
- * Caller must hold jctx->lock
- *
- * @param[in] kctx Context pointer
- * @param[in] atom Pointer to the atom to submit
- *
- * @return Whether the context requires to be enqueued. */
-bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
- struct kbase_jd_atom *katom);
-
-/**
- * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
- * @kctx: Context Pointer
- * @prio: Priority (specifies the queue together with js).
- * @js: Job slot (specifies the queue together with prio).
- *
- * Pushes all possible atoms from the linked list to the ringbuffer.
- * Number of atoms are limited to free space in the ringbuffer and
- * number of available atoms in the linked list.
- *
- */
-void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
-/**
- * @brief Pull an atom from a context in the job scheduler for execution.
- *
- * The atom will not be removed from the ringbuffer at this stage.
- *
- * The HW access lock must be held when calling this function.
- *
- * @param[in] kctx Context to pull from
- * @param[in] js Job slot to pull from
- * @return Pointer to an atom, or NULL if there are no atoms for this
- * slot that can be currently run.
- */
-struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
-
-/**
- * @brief Return an atom to the job scheduler ringbuffer.
- *
- * An atom is 'unpulled' if execution is stopped but intended to be returned to
- * later. The most common reason for this is that the atom has been
- * soft-stopped.
- *
- * Note that if multiple atoms are to be 'unpulled', they must be returned in
- * the reverse order to which they were originally pulled. It is a programming
- * error to return atoms in any other order.
- *
- * The HW access lock must be held when calling this function.
- *
- * @param[in] kctx Context pointer
- * @param[in] atom Pointer to the atom to unpull
- */
-void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
-
-/**
- * @brief Complete an atom from jd_done_worker(), removing it from the job
- * scheduler ringbuffer.
- *
- * If the atom failed then all dependee atoms marked for failure propagation
- * will also fail.
- *
- * @param[in] kctx Context pointer
- * @param[in] katom Pointer to the atom to complete
- * @return true if the context is now idle (no jobs pulled)
- * false otherwise
- */
-bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
- struct kbase_jd_atom *katom);
-
-/**
- * @brief Complete an atom.
- *
- * Most of the work required to complete an atom will be performed by
- * jd_done_worker().
- *
- * The HW access lock must be held when calling this function.
- *
- * @param[in] katom Pointer to the atom to complete
- * @param[in] end_timestamp The time that the atom completed (may be NULL)
- *
- * Return: Atom that has now been unblocked and can now be run, or NULL if none
- */
-struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
- ktime_t *end_timestamp);
-
-
-/**
- * @brief Submit atoms from all available contexts.
- *
- * This will attempt to submit as many jobs as possible to the provided job
- * slots. It will exit when either all job slots are full, or all contexts have
- * been used.
- *
- * @param[in] kbdev Device pointer
- * @param[in] js_mask Mask of job slots to submit to
- */
-void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
-
-/**
- * kbase_jd_zap_context - Attempt to deschedule a context that is being
- * destroyed
- * @kctx: Context pointer
- *
- * This will attempt to remove a context from any internal job scheduler queues
- * and perform any other actions to ensure a context will not be submitted
- * from.
- *
- * If the context is currently scheduled, then the caller must wait for all
- * pending jobs to complete before taking any further action.
- */
-void kbase_js_zap_context(struct kbase_context *kctx);
-
-/**
- * @brief Validate an atom
- *
- * This will determine whether the atom can be scheduled onto the GPU. Atoms
- * with invalid combinations of core requirements will be rejected.
- *
- * @param[in] kbdev Device pointer
- * @param[in] katom Atom to validate
- * @return true if atom is valid
- * false otherwise
- */
-bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom);
-
-/**
- * kbase_js_set_timeouts - update all JS timeouts with user specified data
- * @kbdev: Device pointer
- *
- * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
- * set to a positive number then that becomes the new value used, if a timeout
- * is negative then the default is set.
- */
-void kbase_js_set_timeouts(struct kbase_device *kbdev);
-
-/**
- * kbase_js_set_ctx_priority - set the context priority
- * @kctx: Context pointer
- * @new_priority: New priority value for the Context
- *
- * The context priority is set to a new value and it is moved to the
- * pullable/unpullable list as per the new priority.
- */
-void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority);
-
-
-/**
- * kbase_js_update_ctx_priority - update the context priority
- * @kctx: Context pointer
- *
- * The context priority gets updated as per the priority of atoms currently in
- * use for that context, but only if system priority mode for context scheduling
- * is being used.
- */
-void kbase_js_update_ctx_priority(struct kbase_context *kctx);
-
-/*
- * Helpers follow
- */
-
-/**
- * @brief Check that a context is allowed to submit jobs on this policy
- *
- * The purpose of this abstraction is to hide the underlying data size, and wrap up
- * the long repeated line of code.
- *
- * As with any bool, never test the return value with true.
- *
- * The caller must hold hwaccess_lock.
- */
-static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
-{
- u16 test_bit;
-
- /* Ensure context really is scheduled in */
- KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
- KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
-
- test_bit = (u16) (1u << kctx->as_nr);
-
- return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
-}
-
-/**
- * @brief Allow a context to submit jobs on this policy
- *
- * The purpose of this abstraction is to hide the underlying data size, and wrap up
- * the long repeated line of code.
- *
- * The caller must hold hwaccess_lock.
- */
-static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
-{
- u16 set_bit;
-
- /* Ensure context really is scheduled in */
- KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
- KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
-
- set_bit = (u16) (1u << kctx->as_nr);
-
- dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)",
- kctx, kctx->as_nr);
-
- js_devdata->runpool_irq.submit_allowed |= set_bit;
-}
-
-/**
- * @brief Prevent a context from submitting more jobs on this policy
- *
- * The purpose of this abstraction is to hide the underlying data size, and wrap up
- * the long repeated line of code.
- *
- * The caller must hold hwaccess_lock.
- */
-static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
-{
- u16 clear_bit;
- u16 clear_mask;
-
- /* Ensure context really is scheduled in */
- KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
- KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
-
- clear_bit = (u16) (1u << kctx->as_nr);
- clear_mask = ~clear_bit;
-
- dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)",
- kctx, kctx->as_nr);
-
- js_devdata->runpool_irq.submit_allowed &= clear_mask;
-}
-
-/**
- * Create an initial 'invalid' atom retained state, that requires no
- * atom-related work to be done on releasing with
- * kbasep_js_runpool_release_ctx_and_katom_retained_state()
- */
-static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
-{
- retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
- retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
-}
-
-/**
- * Copy atom state that can be made available after jd_done_nolock() is called
- * on that atom.
- */
-static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
-{
- retained_state->event_code = katom->event_code;
- retained_state->core_req = katom->core_req;
- retained_state->sched_priority = katom->sched_priority;
- retained_state->device_nr = katom->device_nr;
-}
-
-/**
- * @brief Determine whether an atom has finished (given its retained state),
- * and so should be given back to userspace/removed from the system.
- *
- * Reasons for an atom not finishing include:
- * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
- *
- * @param[in] katom_retained_state the retained state of the atom to check
- * @return false if the atom has not finished
- * @return !=false if the atom has finished
- */
-static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
-{
- return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
-}
-
-/**
- * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
- *
- * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
- * code should just ignore it.
- *
- * @param[in] katom_retained_state the atom's retained state to check
- * @return false if the retained state is invalid, and can be ignored
- * @return !=false if the retained state is valid
- */
-static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
-{
- return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
-}
-
-/**
- * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * kbasep_js_runpool_lookup_ctx_noretain - Variant of
+ * kbasep_js_runpool_lookup_ctx() that can be used when the
* context is guaranteed to be already previously retained.
*
- * It is a programming error to supply the \a as_nr of a context that has not
+ * It is a programming error to supply the as_nr of a context that has not
* been previously retained/has a busy refcount of zero. The only exception is
- * when there is no ctx in \a as_nr (NULL returned).
+ * when there is no ctx in as_nr (NULL returned).
*
* The following locking conditions are made on the caller:
- * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * * it must not hold the hwaccess_lock, because it will be used internally.
*
- * @return a valid struct kbase_context on success, with a refcount that is guaranteed
- * to be non-zero and unmodified by this function.
- * @return NULL on failure, indicating that no context was found in \a as_nr
+ * Return: a valid struct kbase_context on success, with a refcount that is
+ * guaranteed to be non-zero and unmodified by this function or
+ * return NULL on failure, indicating that no context was found in as_nr.
*/
-static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(
+ struct kbase_device *kbdev, int as_nr)
{
struct kbase_context *found_kctx;
@@ -778,136 +126,4 @@ static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct
return found_kctx;
}
-/*
- * The following locking conditions are made on the caller:
- * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
- * - The caller must hold the kbasep_js_device_data::runpool_mutex
- */
-static inline void kbase_js_runpool_inc_context_count(
- struct kbase_device *kbdev,
- struct kbase_context *kctx)
-{
- struct kbasep_js_device_data *js_devdata;
- struct kbasep_js_kctx_info *js_kctx_info;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(kctx != NULL);
-
- js_devdata = &kbdev->js_data;
- js_kctx_info = &kctx->jctx.sched_info;
-
- lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
- lockdep_assert_held(&js_devdata->runpool_mutex);
-
- /* Track total contexts */
- KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
- ++(js_devdata->nr_all_contexts_running);
-
- if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
- /* Track contexts that can submit jobs */
- KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
- S8_MAX);
- ++(js_devdata->nr_user_contexts_running);
- }
-}
-
-/*
- * The following locking conditions are made on the caller:
- * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
- * - The caller must hold the kbasep_js_device_data::runpool_mutex
- */
-static inline void kbase_js_runpool_dec_context_count(
- struct kbase_device *kbdev,
- struct kbase_context *kctx)
-{
- struct kbasep_js_device_data *js_devdata;
- struct kbasep_js_kctx_info *js_kctx_info;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(kctx != NULL);
-
- js_devdata = &kbdev->js_data;
- js_kctx_info = &kctx->jctx.sched_info;
-
- lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
- lockdep_assert_held(&js_devdata->runpool_mutex);
-
- /* Track total contexts */
- --(js_devdata->nr_all_contexts_running);
- KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
-
- if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
- /* Track contexts that can submit jobs */
- --(js_devdata->nr_user_contexts_running);
- KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
- }
-}
-
-
-/**
- * @brief Submit atoms from all available contexts to all job slots.
- *
- * This will attempt to submit as many jobs as possible. It will exit when
- * either all job slots are full, or all contexts have been used.
- *
- * @param[in] kbdev Device pointer
- */
-static inline void kbase_js_sched_all(struct kbase_device *kbdev)
-{
- kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
-}
-
-extern const int
-kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
-
-extern const base_jd_prio
-kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
-
-/**
- * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
- * to relative ordering
- * @atom_prio: Priority ID to translate.
- *
- * Atom priority values for @ref base_jd_prio cannot be compared directly to
- * find out which are higher or lower.
- *
- * This function will convert base_jd_prio values for successively lower
- * priorities into a monotonically increasing sequence. That is, the lower the
- * base_jd_prio priority, the higher the value produced by this function. This
- * is in accordance with how the rest of the kernel treates priority.
- *
- * The mapping is 1:1 and the size of the valid input range is the same as the
- * size of the valid output range, i.e.
- * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
- *
- * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
- *
- * Return: On success: a value in the inclusive range
- * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
- * KBASE_JS_ATOM_SCHED_PRIO_INVALID
- */
-static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
-{
- if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
- return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
-
- return kbasep_js_atom_priority_to_relative[atom_prio];
-}
-
-static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
-{
- unsigned int prio_idx;
-
- KBASE_DEBUG_ASSERT(0 <= sched_prio
- && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
-
- prio_idx = (unsigned int)sched_prio;
-
- return kbasep_js_relative_priority_to_atom[prio_idx];
-}
-
- /** @} *//* end group kbase_js */
- /** @} *//* end group base_kbase_api */
- /** @} *//* end group base_api */
-
-#endif /* _KBASE_JS_H_ */
+#endif /* _KBASE_JS_H_ */
diff --git a/mali_kbase/mali_kbase_js_defs.h b/mali_kbase/mali_kbase_js_defs.h
index 052a0b3..f858687 100644
--- a/mali_kbase/mali_kbase_js_defs.h
+++ b/mali_kbase/mali_kbase_js_defs.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -51,12 +51,6 @@ struct kbase_jd_atom;
typedef u32 kbase_context_flags;
-struct kbasep_atom_req {
- base_jd_core_req core_req;
- kbase_context_flags ctx_req;
- u32 device_nr;
-};
-
/** Callback function run on all of a context's jobs registered with the Job
* Scheduler */
typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
@@ -246,24 +240,6 @@ struct kbasep_js_device_data {
} runpool_irq;
/**
- * Run Pool mutex, for managing contexts within the runpool.
- * Unless otherwise specified, you must hold this lock whilst accessing any
- * members that follow
- *
- * In addition, this is used to access:
- * - the kbasep_js_kctx_info::runpool substructure
- */
- struct mutex runpool_mutex;
-
- /**
- * Queue Lock, used to access the Policy's queue of contexts independently
- * of the Run Pool.
- *
- * Of course, you don't need the Run Pool lock to access this.
- */
- struct mutex queue_mutex;
-
- /**
* Scheduling semaphore. This must be held when calling
* kbase_jm_kick()
*/
@@ -299,9 +275,6 @@ struct kbasep_js_device_data {
u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */
- /**< Value for JS_SOFT_JOB_TIMEOUT */
- atomic_t soft_job_timeout_ms;
-
/** List of suspended soft jobs */
struct list_head suspended_soft_jobs_list;
@@ -321,6 +294,27 @@ struct kbasep_js_device_data {
/* Number of contexts that can either be pulled from or are currently
* running */
atomic_t nr_contexts_runnable;
+
+ /** Value for JS_SOFT_JOB_TIMEOUT */
+ atomic_t soft_job_timeout_ms;
+
+ /**
+ * Queue Lock, used to access the Policy's queue of contexts
+ * independently of the Run Pool.
+ *
+ * Of course, you don't need the Run Pool lock to access this.
+ */
+ struct mutex queue_mutex;
+
+ /**
+ * Run Pool mutex, for managing contexts within the runpool.
+ * Unless otherwise specified, you must hold this lock whilst accessing
+ * any members that follow
+ *
+ * In addition, this is used to access:
+ * * the kbasep_js_kctx_info::runpool substructure
+ */
+ struct mutex runpool_mutex;
};
/**
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index de57024..2362e22 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,8 +20,6 @@
*
*/
-
-
/**
* Base kernel memory APIs
*/
@@ -44,6 +42,30 @@
#include <mali_kbase_native_mgm.h>
#include <mali_kbase_mem_pool_group.h>
#include <mmu/mali_kbase_mmu.h>
+#include <mali_kbase_config_defaults.h>
+
+/*
+ * Alignment of objects allocated by the GPU inside a just-in-time memory
+ * region whose size is given by an end address
+ *
+ * This is the alignment of objects allocated by the GPU, but possibly not
+ * fully written to. When taken into account with
+ * KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES it gives the maximum number of bytes
+ * that the JIT memory report size can exceed the actual backed memory size.
+ */
+#define KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES (128u)
+
+/*
+ * Maximum size of objects allocated by the GPU inside a just-in-time memory
+ * region whose size is given by an end address
+ *
+ * This is the maximum size of objects allocated by the GPU, but possibly not
+ * fully written to. When taken into account with
+ * KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES it gives the maximum number of bytes
+ * that the JIT memory report size can exceed the actual backed memory size.
+ */
+#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u)
+
/* Forward declarations */
static void free_partial_locked(struct kbase_context *kctx,
@@ -672,9 +694,11 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
void kbase_region_tracker_term(struct kbase_context *kctx)
{
+ kbase_gpu_vm_lock(kctx);
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
+ kbase_gpu_vm_unlock(kctx);
}
void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
@@ -812,15 +836,22 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
#endif
int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
- u8 max_allocations, u8 trim_level, int group_id)
+ int max_allocations, int trim_level, int group_id,
+ u64 phys_pages_limit)
{
int err = 0;
- if (trim_level > 100)
+ if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL)
+ return -EINVAL;
+
+ if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
return -EINVAL;
- if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
- WARN_ON(group_id < 0))
+#if MALI_JIT_PRESSURE_LIMIT
+ if (phys_pages_limit > jit_va_pages)
+#else
+ if (phys_pages_limit != jit_va_pages)
+#endif /* MALI_JIT_PRESSURE_LIMIT */
return -EINVAL;
kbase_gpu_vm_lock(kctx);
@@ -839,6 +870,11 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
kctx->trim_level = trim_level;
kctx->jit_va = true;
kctx->jit_group_id = group_id;
+#if MALI_JIT_PRESSURE_LIMIT
+ kctx->jit_phys_pages_limit = phys_pages_limit;
+ dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n",
+ phys_pages_limit);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
}
kbase_gpu_vm_unlock(kctx);
@@ -940,6 +976,12 @@ int kbase_mem_init(struct kbase_device *kbdev)
/* Initialize memory usage */
atomic_set(&memdev->used_pages, 0);
+#ifdef IR_THRESHOLD
+ atomic_set(&memdev->ir_threshold, IR_THRESHOLD);
+#else
+ atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD);
+#endif
+
kbdev->mgm_dev = &kbase_native_mgm_dev;
#ifdef CONFIG_OF
@@ -1055,6 +1097,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
new_reg->nr_pages = nr_pages;
INIT_LIST_HEAD(&new_reg->jit_node);
+ INIT_LIST_HEAD(&new_reg->link);
return new_reg;
}
@@ -1109,6 +1152,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
if (WARN_ON(kbase_is_region_invalid(reg)))
return;
+ dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n",
+ (void *)reg);
mutex_lock(&kctx->jit_evict_lock);
@@ -1633,6 +1678,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
KBASE_DEBUG_ASSERT(NULL != kctx);
KBASE_DEBUG_ASSERT(NULL != reg);
+ dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n",
+ __func__, (void *)reg, (void *)kctx);
lockdep_assert_held(&kctx->reg_lock);
if (reg->flags & KBASE_REG_NO_USER_FREE) {
@@ -1688,6 +1735,8 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
struct kbase_va_region *reg;
KBASE_DEBUG_ASSERT(kctx != NULL);
+ dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n",
+ __func__, gpu_addr, (void *)kctx);
if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid");
@@ -3049,6 +3098,153 @@ static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kct
return meet_reqs;
}
+#if MALI_JIT_PRESSURE_LIMIT
+/* Function will guarantee *@freed will not exceed @pages_needed
+ */
+static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx,
+ struct kbase_va_region *reg, size_t pages_needed,
+ size_t *freed)
+{
+ int err = 0;
+ size_t available_pages = 0u;
+ const size_t old_pages = kbase_reg_current_backed_size(reg);
+ size_t new_pages = old_pages;
+ size_t to_free = 0u;
+ size_t max_allowed_pages = old_pages;
+
+ lockdep_assert_held(&kctx->jctx.lock);
+ lockdep_assert_held(&kctx->reg_lock);
+
+ /* Is this a JIT allocation that has been reported on? */
+ if (reg->used_pages == reg->nr_pages)
+ goto out;
+
+ if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) {
+ /* For address based memory usage calculation, the GPU
+ * allocates objects of up to size 's', but aligns every object
+ * to alignment 'a', with a < s.
+ *
+ * It also doesn't have to write to all bytes in an object of
+ * size 's'.
+ *
+ * Hence, we can observe the GPU's address for the end of used
+ * memory being up to (s - a) bytes into the first unallocated
+ * page.
+ *
+ * We allow for this and only warn when it exceeds this bound
+ * (rounded up to page sized units). Note, this is allowed to
+ * exceed reg->nr_pages.
+ */
+ max_allowed_pages += PFN_UP(
+ KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES -
+ KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES);
+ } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
+ /* The GPU could report being ready to write to the next
+ * 'extent' sized chunk, but didn't actually write to it, so we
+ * can report up to 'extent' size pages more than the backed
+ * size.
+ *
+ * Note, this is allowed to exceed reg->nr_pages.
+ */
+ max_allowed_pages += reg->extent;
+
+ /* Also note that in these GPUs, the GPU may make a large (>1
+ * page) initial allocation but not actually write out to all
+ * of it. Hence it might report that a much higher amount of
+ * memory was used than actually was written to. This does not
+ * result in a real warning because on growing this memory we
+ * round up the size of the allocation up to an 'extent' sized
+ * chunk, hence automatically bringing the backed size up to
+ * the reported size.
+ */
+ }
+
+ if (old_pages < reg->used_pages) {
+ /* Prevent overflow on available_pages, but only report the
+ * problem if it's in a scenario where used_pages should have
+ * been consistent with the backed size
+ *
+ * Note: In case of a size-based report, this legitimately
+ * happens in common use-cases: we allow for up to this size of
+ * memory being used, but depending on the content it doesn't
+ * have to use all of it.
+ *
+ * Hence, we're much more quiet about that in the size-based
+ * report case - it's not indicating a real problem, it's just
+ * for information
+ */
+ if (max_allowed_pages < reg->used_pages) {
+ if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE))
+ dev_warn(kctx->kbdev->dev,
+ "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n",
+ __func__,
+ old_pages, reg->used_pages,
+ max_allowed_pages,
+ reg->start_pfn << PAGE_SHIFT,
+ reg->nr_pages);
+ else
+ dev_dbg(kctx->kbdev->dev,
+ "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n",
+ __func__,
+ old_pages, reg->used_pages,
+ reg->start_pfn << PAGE_SHIFT,
+ reg->nr_pages);
+ }
+ /* In any case, no error condition to report here, caller can
+ * try other regions
+ */
+
+ goto out;
+ }
+ available_pages = old_pages - reg->used_pages;
+ to_free = min(available_pages, pages_needed);
+
+ new_pages -= to_free;
+
+ err = kbase_mem_shrink(kctx, reg, new_pages);
+
+out:
+ trace_mali_jit_trim_from_region(reg, to_free, old_pages,
+ available_pages, new_pages);
+ *freed = to_free;
+ return err;
+}
+
+size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx,
+ size_t pages_needed)
+{
+ struct kbase_va_region *reg, *tmp;
+ size_t total_freed = 0;
+
+ kbase_gpu_vm_lock(kctx);
+ mutex_lock(&kctx->jit_evict_lock);
+ list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) {
+ int err;
+ size_t freed = 0u;
+
+ err = kbase_mem_jit_trim_pages_from_region(kctx, reg,
+ pages_needed, &freed);
+
+ if (err) {
+ /* Failed to trim, try the next region */
+ continue;
+ }
+
+ total_freed += freed;
+ WARN_ON(freed > pages_needed);
+ pages_needed -= freed;
+ if (!pages_needed)
+ break;
+ }
+ mutex_unlock(&kctx->jit_evict_lock);
+ kbase_gpu_vm_unlock(kctx);
+
+ trace_mali_jit_trim(total_freed);
+
+ return total_freed;
+}
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
static int kbase_jit_grow(struct kbase_context *kctx,
struct base_jit_alloc_info *info, struct kbase_va_region *reg)
{
@@ -3208,6 +3404,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
{
struct kbase_va_region *reg = NULL;
+#if MALI_JIT_PRESSURE_LIMIT
+ if (info->va_pages > (kctx->jit_phys_pages_limit -
+ kctx->jit_current_phys_pressure) &&
+ kctx->jit_current_phys_pressure > 0) {
+ dev_dbg(kctx->kbdev->dev,
+ "Max JIT page allocations limit reached: active pages %llu, max pages %llu\n",
+ kctx->jit_current_phys_pressure + info->va_pages,
+ kctx->jit_phys_pages_limit);
+ return NULL;
+ }
+#endif /* MALI_JIT_PRESSURE_LIMIT */
if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
/* Too many current allocations */
dev_dbg(kctx->kbdev->dev,
@@ -3228,6 +3435,33 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
return NULL;
}
+#if MALI_JIT_PRESSURE_LIMIT
+ /* Before allocating a new just-in-time memory region or reusing a
+ * previous one, ensure that the total JIT physical page usage also will
+ * not exceed the pressure limit.
+ *
+ * If there are no reported-on allocations, then we already guarantee
+ * this will be the case - because our current pressure then only comes
+ * from the va_pages of each JIT region, hence JIT physical page usage
+ * is guaranteed to be bounded by this.
+ *
+ * However as soon as JIT allocations become "reported on", the
+ * pressure is lowered to allow new JIT regions to be allocated. It is
+ * after such a point that the total JIT physical page usage could
+ * (either now or in the future on a grow-on-GPU-page-fault) exceed the
+ * pressure limit, but only on newly allocated JIT regions. Hence, trim
+ * any "reported on" regions.
+ *
+ * Any pages freed will go into the pool and be allocated from there in
+ * kbase_mem_alloc().
+ *
+ * In future, GPUCORE-21217: Only do this when physical page usage
+ * could exceed the pressure limit, and only trim as much as is
+ * necessary.
+ */
+ kbase_mem_jit_trim_pages(kctx, SIZE_MAX);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
mutex_lock(&kctx->jit_evict_lock);
/*
@@ -3372,6 +3606,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
mutex_unlock(&kctx->jit_evict_lock);
}
+ trace_mali_jit_alloc(reg, info->id);
+
kctx->jit_current_allocations++;
kctx->jit_current_allocations_per_bin[info->bin_id]++;
@@ -3379,6 +3615,13 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
reg->jit_usage_id = info->usage_id;
reg->jit_bin_id = info->bin_id;
+#if MALI_JIT_PRESSURE_LIMIT
+ if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
+ reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE;
+ reg->heap_info_gpu_addr = info->heap_info_gpu_addr;
+ kbase_jit_report_update_pressure(kctx, reg, info->va_pages,
+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
return reg;
@@ -3394,6 +3637,9 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
{
u64 old_pages;
+ /* JIT id not immediately available here, so use 0u */
+ trace_mali_jit_free(reg, 0u);
+
/* Get current size of JIT region */
old_pages = kbase_reg_current_backed_size(reg);
if (reg->initial_commit < old_pages) {
@@ -3404,19 +3650,16 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
div_u64(old_pages * (100 - kctx->trim_level), 100));
u64 delta = old_pages - new_size;
- if (delta) {
- kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta,
- old_pages);
- kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta,
- old_pages);
-
- kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
- if (reg->cpu_alloc != reg->gpu_alloc)
- kbase_free_phy_pages_helper(reg->gpu_alloc,
- delta);
- }
+ if (delta)
+ kbase_mem_shrink(kctx, reg, old_pages - delta);
}
+#if MALI_JIT_PRESSURE_LIMIT
+ reg->heap_info_gpu_addr = 0;
+ kbase_jit_report_update_pressure(kctx, reg, 0,
+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
kctx->jit_current_allocations--;
kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
@@ -3535,6 +3778,118 @@ void kbase_jit_term(struct kbase_context *kctx)
cancel_work_sync(&kctx->jit_work);
}
+#if MALI_JIT_PRESSURE_LIMIT
+void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
+ struct kbase_va_region *reg, unsigned int flags)
+{
+ /* Offset to the location used for a JIT report within the GPU memory
+ *
+ * This constants only used for this debugging function - not useful
+ * anywhere else in kbase
+ */
+ const u64 jit_report_gpu_mem_offset = sizeof(u64)*2;
+
+ u64 addr_start;
+ struct kbase_vmap_struct mapping;
+ u64 *ptr;
+
+ if (reg->heap_info_gpu_addr == 0ull)
+ goto out;
+
+ /* Nothing else to trace in the case the memory just contains the
+ * size. Other tracepoints already record the relevant area of memory.
+ */
+ if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)
+ goto out;
+
+ addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset;
+
+ ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE,
+ &mapping);
+ if (!ptr) {
+ dev_warn(kctx->kbdev->dev,
+ "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n",
+ __func__, reg->start_pfn << PAGE_SHIFT,
+ addr_start);
+ goto out;
+ }
+
+ trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT,
+ ptr, flags);
+
+ kbase_vunmap(kctx, &mapping);
+out:
+ return;
+}
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
+#if MALI_JIT_PRESSURE_LIMIT
+void kbase_jit_report_update_pressure(struct kbase_context *kctx,
+ struct kbase_va_region *reg, u64 new_used_pages,
+ unsigned int flags)
+{
+ u64 diff;
+
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ trace_mali_jit_report_pressure(reg, new_used_pages,
+ kctx->jit_current_phys_pressure + new_used_pages -
+ reg->used_pages,
+ flags);
+
+ if (WARN_ON(new_used_pages > reg->nr_pages))
+ return;
+
+ if (reg->used_pages > new_used_pages) {
+ /* We reduced the number of used pages */
+ diff = reg->used_pages - new_used_pages;
+
+ if (!WARN_ON(diff > kctx->jit_current_phys_pressure))
+ kctx->jit_current_phys_pressure -= diff;
+
+ reg->used_pages = new_used_pages;
+
+ /* In the case of pressure reduced on a free, don't attempt to
+ * trim the region: it will soon be placed on the evict_list
+ * so that if we really were close to running out of memory then
+ * the shrinker can reclaim the memory.
+ */
+ if ((flags & KBASE_JIT_REPORT_ON_ALLOC_OR_FREE) == 0u) {
+ size_t freed;
+ int err;
+
+ kbase_gpu_vm_lock(kctx);
+ /* If this was from an allocation that a single
+ * BASE_JD_REQ_SOFT_JIT_ALLOC atom that is allowed to
+ * breach the pressure limit, then check whether we can
+ * bring the total JIT physical page below (or at least
+ * nearer) the pressure limit.
+ *
+ * In future, GPUCORE-21217: Only do this when physical
+ * page usage currently exceeds the pressure limit, and
+ * only trim as much as is necessary.
+ */
+ err = kbase_mem_jit_trim_pages_from_region(kctx, reg,
+ SIZE_MAX, &freed);
+ kbase_gpu_vm_unlock(kctx);
+
+ CSTD_UNUSED(freed);
+ /* Nothing to do if trimming failed */
+ CSTD_UNUSED(err);
+ }
+ } else {
+ /* We increased the number of used pages */
+ diff = new_used_pages - reg->used_pages;
+
+ if (!WARN_ON(diff > U64_MAX - kctx->jit_current_phys_pressure))
+ kctx->jit_current_phys_pressure += diff;
+
+ reg->used_pages = new_used_pages;
+ }
+
+}
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
bool kbase_has_exec_va_zone(struct kbase_context *kctx)
{
bool has_exec_va_zone;
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 0ce3037..3f74492 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -184,6 +184,19 @@ struct kbase_mem_phy_alloc {
*/
#define PINNED_ON_IMPORT (1<<31)
+/**
+ * enum kbase_jit_report_flags - Flags for just-in-time memory allocation
+ * pressure limit functions
+ * @KBASE_JIT_REPORT_ON_ALLOC_OR_FREE: Notifying about an update happening due
+ * to a just-in-time memory allocation or free
+ *
+ * Used to control flow within pressure limit related functions, or to provide
+ * extra debugging information
+ */
+enum kbase_jit_report_flags {
+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0)
+};
+
static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
{
KBASE_DEBUG_ASSERT(alloc);
@@ -236,18 +249,35 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
/**
* A GPU memory region, and attributes for CPU mappings.
+ *
+ * @rblink: Node in a red-black tree of memory regions within the same zone of
+ * the GPU's virtual address space.
+ * @link: Links to neighboring items in a list of growable memory regions
+ * that triggered incremental rendering by growing too much.
+ * @rbtree: Backlink to the red-black tree of memory regions.
+ * @start_pfn: The Page Frame Number in GPU virtual address space.
+ * @nr_pages: The size of the region in pages.
+ * @initial_commit: Initial commit, for aligning the start address and
+ * correctly growing KBASE_REG_TILER_ALIGN_TOP regions.
+ * @threshold_pages: If non-zero and the amount of memory committed to a region
+ * that can grow on page fault exceeds this number of pages
+ * then the driver switches to incremental rendering.
+ * @extent: Number of pages allocated on page fault.
+ * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region.
+ * @gpu_alloc: The physical memory we mmap to the GPU when mapping this region.
+ * @jit_node: Links to neighboring regions in the just-in-time memory pool.
+ * @jit_usage_id: The last just-in-time memory usage ID for this region.
+ * @jit_bin_id: The just-in-time memory bin this region came from.
+ * @va_refcnt: Number of users of this region. Protected by reg_lock.
*/
struct kbase_va_region {
struct rb_node rblink;
struct list_head link;
-
- struct rb_root *rbtree; /* Backlink to rb tree */
-
- u64 start_pfn; /* The PFN in GPU space */
+ struct rb_root *rbtree;
+ u64 start_pfn;
size_t nr_pages;
- /* Initial commit, for aligning the start address and correctly growing
- * KBASE_REG_TILER_ALIGN_TOP regions */
size_t initial_commit;
+ size_t threshold_pages;
/* Free region */
#define KBASE_REG_FREE (1ul << 0)
@@ -332,6 +362,11 @@ struct kbase_va_region {
*/
#define KBASE_REG_VA_FREED (1ul << 26)
+/* If set, the heap info address points to a u32 holding the used size in bytes;
+ * otherwise it points to a u64 holding the lowest address of unused memory.
+ */
+#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27)
+
#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0)
/* only used with 32-bit clients */
@@ -357,22 +392,47 @@ struct kbase_va_region {
unsigned long flags;
-
- size_t extent; /* nr of pages alloc'd on PF */
-
- struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
- struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
-
- /* List head used to store the region in the JIT allocation pool */
+ size_t extent;
+ struct kbase_mem_phy_alloc *cpu_alloc;
+ struct kbase_mem_phy_alloc *gpu_alloc;
struct list_head jit_node;
- /* The last JIT usage ID for this region */
u16 jit_usage_id;
- /* The JIT bin this allocation came from */
u8 jit_bin_id;
+#if MALI_JIT_PRESSURE_LIMIT
+ /* Pointer to an object in GPU memory defining an end of an allocated
+ * region
+ *
+ * The object can be one of:
+ * - u32 value defining the size of the region
+ * - u64 pointer first unused byte in the region
+ *
+ * The interpretation of the object depends on
+ * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in jit_info_flags - if it is
+ * set, the heap info object should be interpreted as size.
+ */
+ u64 heap_info_gpu_addr;
+
+ /* The current estimate of the number of pages used, which in normal
+ * use is either:
+ * - the initial estimate == va_pages
+ * - the actual pages used, as found by a JIT usage report
+ *
+ * Note that since the value is calculated from GPU memory after a JIT
+ * usage report, at any point in time it is allowed to take a random
+ * value that is no greater than va_pages (e.g. it may be greater than
+ * gpu_alloc->nents)
+ */
+ size_t used_pages;
+#endif /* MALI_JIT_PRESSURE_LIMIT */
- int va_refcnt; /* number of users of this va */
+ int va_refcnt;
};
+/* Special marker for failed JIT allocations that still must be marked as
+ * in-use
+ */
+#define KBASE_RESERVED_REG_JIT_ALLOC ((struct kbase_va_region *)-1)
+
static inline bool kbase_is_region_free(struct kbase_va_region *reg)
{
return (!reg || reg->flags & KBASE_REG_FREE);
@@ -411,6 +471,8 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get(
WARN_ON(!region->va_refcnt);
/* non-atomic as kctx->reg_lock is held */
+ dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %p\n",
+ region->va_refcnt, (void *)region);
region->va_refcnt++;
return region;
@@ -426,6 +488,8 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put(
/* non-atomic as kctx->reg_lock is held */
region->va_refcnt--;
+ dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %p\n",
+ region->va_refcnt, (void *)region);
if (!region->va_refcnt)
kbase_region_refcnt_free(region);
@@ -905,21 +969,27 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool);
int kbase_region_tracker_init(struct kbase_context *kctx);
/**
- * kbase_region_tracker_init_jit - Initialize the JIT region
- * @kctx: kbase context
- * @jit_va_pages: Size of the JIT region in pages
- * @max_allocations: Maximum number of allocations allowed for the JIT region
- * @trim_level: Trim level for the JIT region
- * @group_id: The physical group ID from which to allocate JIT memory.
- * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * kbase_region_tracker_init_jit - Initialize the just-in-time memory
+ * allocation region
+ * @kctx: Kbase context.
+ * @jit_va_pages: Size of the JIT region in pages.
+ * @max_allocations: Maximum number of allocations allowed for the JIT region.
+ * Valid range is 0..%BASE_JIT_ALLOC_COUNT.
+ * @trim_level: Trim level for the JIT region.
+ * Valid range is 0..%BASE_JIT_MAX_TRIM_LEVEL.
+ * @group_id: The physical group ID from which to allocate JIT memory.
+ * Valid range is 0..(%MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @phys_pages_limit: Maximum number of physical pages to use to back the JIT
+ * region. Must not exceed @jit_va_pages.
*
* Return: 0 if success, negative error code otherwise.
*/
int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
- u8 max_allocations, u8 trim_level, int group_id);
+ int max_allocations, int trim_level, int group_id,
+ u64 phys_pages_limit);
/**
- * kbase_region_tracker_init_exec - Initialize the EXEC_VA region
+ * kbase_region_tracker_init_exec - Initialize the GPU-executable memory region
* @kctx: kbase context
* @exec_va_pages: Size of the JIT region in pages.
* It must not be greater than 4 GB.
@@ -1096,8 +1166,6 @@ int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset);
void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa,
struct tagged_addr gpu_pa, off_t offset, size_t size,
enum kbase_sync_type sync_fn);
-void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
-void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
/* OS specific functions */
int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
@@ -1427,6 +1495,93 @@ bool kbase_jit_evict(struct kbase_context *kctx);
*/
void kbase_jit_term(struct kbase_context *kctx);
+#if MALI_JIT_PRESSURE_LIMIT
+/**
+ * kbase_trace_jit_report_gpu_mem_trace_enabled - variant of
+ * kbase_trace_jit_report_gpu_mem() that should only be called once the
+ * corresponding tracepoint is verified to be enabled
+ * @kctx: kbase context
+ * @reg: Just-in-time memory region to trace
+ * @flags: combination of values from enum kbase_jit_report_flags
+ */
+void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
+ struct kbase_va_region *reg, unsigned int flags);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
+/**
+ * kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used
+ * to make a JIT report
+ * @kctx: kbase context
+ * @reg: Just-in-time memory region to trace
+ * @flags: combination of values from enum kbase_jit_report_flags
+ *
+ * Information is traced using the trace_mali_jit_report_gpu_mem() tracepoint.
+ *
+ * In case that tracepoint is not enabled, this function should have the same
+ * low overheads as a tracepoint itself (i.e. use of 'jump labels' to avoid
+ * conditional branches)
+ *
+ * This can take the reg_lock on @kctx, do not use in places where this lock is
+ * already held.
+ *
+ * Note: this has to be a macro because at this stage the tracepoints have not
+ * been included. Also gives no opportunity for the compiler to mess up
+ * inlining it.
+ */
+#if MALI_JIT_PRESSURE_LIMIT
+#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \
+ do { \
+ if (trace_mali_jit_report_gpu_mem_enabled()) \
+ kbase_trace_jit_report_gpu_mem_trace_enabled( \
+ (kctx), (reg), (flags)); \
+ } while (0)
+#else
+#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \
+ CSTD_NOP(kctx, reg, flags)
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
+#if MALI_JIT_PRESSURE_LIMIT
+/**
+ * kbase_jit_report_update_pressure - safely update the JIT physical page
+ * pressure and JIT region's estimate of used_pages
+ * @kctx: kbase context, to update the current physical pressure
+ * @reg: Just-in-time memory region to update with @new_used_pages
+ * @new_used_pages: new value of number of pages used in the JIT region
+ * @flags: combination of values from enum kbase_jit_report_flags
+ *
+ * Takes care of:
+ * - correctly updating the pressure given the current reg->used_pages and
+ * new_used_pages
+ * - then updating the %kbase_va_region used_pages member
+ *
+ * Precondition:
+ * - new_used_pages <= reg->nr_pages
+ */
+void kbase_jit_report_update_pressure(struct kbase_context *kctx,
+ struct kbase_va_region *reg, u64 new_used_pages,
+ unsigned int flags);
+
+/**
+ * kbase_mem_jit_trim_pages - Trim JIT regions until sufficient pages have been
+ * freed
+ * @kctx: Pointer to the kbase context whose active JIT allocations will be
+ * checked.
+ * @pages_needed: The maximum number of pages to trim.
+ *
+ * This functions checks all active JIT allocations in @kctx for unused pages
+ * at the end, and trim the backed memory regions of those allocations down to
+ * the used portion and free the unused pages into the page pool.
+ *
+ * Specifying @pages_needed allows us to stop early when there's enough
+ * physical memory freed to sufficiently bring down the total JIT physical page
+ * usage (e.g. to below the pressure limit)
+ *
+ * Return: Total number of successfully freed pages
+ */
+size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx,
+ size_t pages_needed);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
/**
* kbase_has_exec_va_zone - EXEC_VA zone predicate
*
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 57667be..219e0af 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -85,6 +85,8 @@
#define KBASE_MEM_ION_SYNC_WORKAROUND
#endif
+#define IR_THRESHOLD_STEPS (256u)
+
static int kbase_vmap_phy_pages(struct kbase_context *kctx,
struct kbase_va_region *reg, u64 offset_bytes, size_t size,
@@ -94,6 +96,10 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+ struct kbase_va_region *reg,
+ u64 new_pages, u64 old_pages);
+
/* Retrieve the associated region pointer if the GPU address corresponds to
* one of the event memory pages. The enclosing region, if found, shouldn't
* have been marked as free.
@@ -282,7 +288,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
if (!(*flags & BASE_MEM_FLAG_MAP_FIXED))
*gpu_va = 0; /* return 0 on failure */
else
- dev_err(dev, "Keeping requested GPU VA of 0x%llx\n", (unsigned long long)*gpu_va);
+ dev_err(dev,
+ "Keeping requested GPU VA of 0x%llx\n",
+ (unsigned long long)*gpu_va);
if (!kbase_check_alloc_flags(*flags)) {
dev_warn(dev,
@@ -355,6 +363,15 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
goto prepare_failed;
}
+ if (*flags & BASE_MEM_GROW_ON_GPF) {
+ unsigned int const ir_threshold = atomic_read(
+ &kctx->kbdev->memdev.ir_threshold);
+
+ reg->threshold_pages = ((va_pages * ir_threshold) +
+ (IR_THRESHOLD_STEPS / 2)) / IR_THRESHOLD_STEPS;
+ } else
+ reg->threshold_pages = 0;
+
if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) {
/* kbase_check_alloc_sizes() already checks extent is valid for
* assigning to reg->extent */
@@ -1978,9 +1995,22 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
(old_pages - new_pages)<<PAGE_SHIFT, 1);
}
-int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
- struct kbase_va_region *reg,
- u64 new_pages, u64 old_pages)
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx: Context the region belongs to
+ * @reg: The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
+ struct kbase_va_region *const reg,
+ u64 const new_pages, u64 const old_pages)
{
u64 delta = old_pages - new_pages;
int ret = 0;
@@ -2089,23 +2119,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
goto out_unlock;
}
} else {
- delta = old_pages - new_pages;
-
- /* Update all CPU mapping(s) */
- kbase_mem_shrink_cpu_mapping(kctx, reg,
- new_pages, old_pages);
-
- /* Update the GPU mapping */
- res = kbase_mem_shrink_gpu_mapping(kctx, reg,
- new_pages, old_pages);
- if (res) {
+ res = kbase_mem_shrink(kctx, reg, new_pages);
+ if (res)
res = -ENOMEM;
- goto out_unlock;
- }
-
- kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
- if (reg->cpu_alloc != reg->gpu_alloc)
- kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
}
out_unlock:
@@ -2118,6 +2134,43 @@ out_unlock:
return res;
}
+int kbase_mem_shrink(struct kbase_context *const kctx,
+ struct kbase_va_region *const reg, u64 const new_pages)
+{
+ u64 delta, old_pages;
+ int err;
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ if (WARN_ON(!kctx))
+ return -EINVAL;
+
+ if (WARN_ON(!reg))
+ return -EINVAL;
+
+ old_pages = kbase_reg_current_backed_size(reg);
+ if (WARN_ON(old_pages < new_pages))
+ return -EINVAL;
+
+ delta = old_pages - new_pages;
+
+ /* Update the GPU mapping */
+ err = kbase_mem_shrink_gpu_mapping(kctx, reg,
+ new_pages, old_pages);
+ if (err >= 0) {
+ /* Update all CPU mapping(s) */
+ kbase_mem_shrink_cpu_mapping(kctx, reg,
+ new_pages, old_pages);
+
+ kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+ if (reg->cpu_alloc != reg->gpu_alloc)
+ kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+ }
+
+ return err;
+}
+
+
static void kbase_cpu_vm_open(struct vm_area_struct *vma)
{
struct kbase_cpu_mapping *map = vma->vm_private_data;
@@ -2880,6 +2933,20 @@ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
}
KBASE_EXPORT_TEST_API(kbase_vunmap);
+static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0))
+ /* To avoid the build breakage due to an unexported kernel symbol
+ * 'mm_trace_rss_stat' from later kernels, i.e. from V5.5.0 onwards,
+ * we inline here the equivalent of 'add_mm_counter()' from linux
+ * kernel V5.4.0~8.
+ */
+ atomic_long_add(value, &mm->rss_stat.count[member]);
+#else
+ add_mm_counter(mm, member, value);
+#endif
+}
+
void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
{
struct mm_struct *mm;
@@ -2889,10 +2956,10 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
if (mm) {
atomic_add(pages, &kctx->nonmapped_pages);
#ifdef SPLIT_RSS_COUNTING
- add_mm_counter(mm, MM_FILEPAGES, pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
#else
spin_lock(&mm->page_table_lock);
- add_mm_counter(mm, MM_FILEPAGES, pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
spin_unlock(&mm->page_table_lock);
#endif
}
@@ -2917,10 +2984,10 @@ static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
pages = atomic_xchg(&kctx->nonmapped_pages, 0);
#ifdef SPLIT_RSS_COUNTING
- add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
#else
spin_lock(&mm->page_table_lock);
- add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
spin_unlock(&mm->page_table_lock);
#endif
}
diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h
index 02f1c3b..cd094b3 100644
--- a/mali_kbase/mali_kbase_mem_linux.h
+++ b/mali_kbase/mali_kbase_mem_linux.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010, 2012-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -129,6 +129,18 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages);
/**
+ * kbase_mem_shrink - Shrink the physical backing size of a region
+ *
+ * @kctx: The kernel context
+ * @reg: The GPU region
+ * @new_pages: Number of physical pages to back the region with
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_shrink(struct kbase_context *kctx,
+ struct kbase_va_region *reg, u64 new_pages);
+
+/**
* kbase_context_mmap - Memory map method, gets invoked when mmap system call is
* issued on device file /dev/malixx.
* @kctx: The kernel context
@@ -334,23 +346,6 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
u64 new_pages, u64 old_pages);
/**
- * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
- * @kctx: Context the region belongs to
- * @reg: The GPU region or NULL if there isn't one
- * @new_pages: The number of pages after the shrink
- * @old_pages: The number of pages before the shrink
- *
- * Return: 0 on success, negative -errno on error
- *
- * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
- * itself is unmodified as we still need to reserve the VA, only the page tables
- * will be modified by this function.
- */
-int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
- struct kbase_va_region *reg,
- u64 new_pages, u64 old_pages);
-
-/**
* kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a
* physical allocation
* @kctx: The kernel base context associated with the mapping
diff --git a/mali_kbase/mali_kbase_mipe_gen_header.h b/mali_kbase/mali_kbase_mipe_gen_header.h
index 99475b6..ec52122 100644
--- a/mali_kbase/mali_kbase_mipe_gen_header.h
+++ b/mali_kbase/mali_kbase_mipe_gen_header.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,101 +20,198 @@
*
*/
+/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py.
+ * DO NOT EDIT.
+ */
+
+/* clang-format off */
+
#include "mali_kbase_mipe_proto.h"
/**
* This header generates MIPE tracepoint declaration BLOB at
* compile time.
*
- * Before including this header, the following parameters
- * must be defined:
+ * It is intentional that there is no header guard.
+ * The header could be included multiple times for
+ * different blobs compilation.
+ *
+ * Before including this header MIPE_HEADER_* parameters must be
+ * defined. See documentation below:
+ */
+
+/**
+ * The name of the variable where the result BLOB will be stored.
+ */
+#if !defined(MIPE_HEADER_BLOB_VAR_NAME)
+#error "MIPE_HEADER_BLOB_VAR_NAME must be defined!"
+#endif
+
+/**
+ * A compiler attribute for the BLOB variable.
+ *
+ * e.g. __attribute__((section("my_section")))
*
- * MIPE_HEADER_BLOB_VAR_NAME: the name of the variable
- * where the result BLOB will be stored.
+ * Default value is no attribute.
+ */
+#if !defined(MIPE_HEADER_BLOB_VAR_ATTRIBUTE)
+#define MIPE_HEADER_BLOB_VAR_ATTRIBUTE
+#endif
+
+/**
+ * MIPE stream id.
+ *
+ * See enum tl_stream_id.
+ */
+#if !defined(MIPE_HEADER_STREAM_ID)
+#error "MIPE_HEADER_STREAM_ID must be defined!"
+#endif
+
+/**
+ * MIPE packet class.
+ *
+ * See enum tl_packet_class.
+ */
+#if !defined(MIPE_HEADER_PKT_CLASS)
+#error "MIPE_HEADER_PKT_CLASS must be defined!"
+#endif
+
+/**
+ * The list of tracepoints to process.
*
- * MIPE_HEADER_TP_LIST: the list of tracepoints to process.
* It should be defined as follows:
- * #define MIPE_HEADER_TP_LIST \
- * TP_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \
- * TP_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \
+ * #define MIPE_HEADER_TRACEPOINT_LIST \
+ * TRACEPOINT_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \
+ * TRACEPOINT_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \
* etc.
+ *
* Where the first argument is tracepoints name, the second
* argument is a short tracepoint description, the third argument
* argument types (see MIPE documentation), and the fourth argument
* is comma separated argument names.
- *
- * MIPE_HEADER_TP_LIST_COUNT: number of entries in MIPE_HEADER_TP_LIST.
- *
- * MIPE_HEADER_PKT_CLASS: MIPE packet class.
*/
-
-#if !defined(MIPE_HEADER_BLOB_VAR_NAME)
-#error "MIPE_HEADER_BLOB_VAR_NAME must be defined!"
+#if !defined(MIPE_HEADER_TRACEPOINT_LIST)
+#error "MIPE_HEADER_TRACEPOINT_LIST must be defined!"
#endif
-#if !defined(MIPE_HEADER_TP_LIST)
-#error "MIPE_HEADER_TP_LIST must be defined!"
+/**
+ * The number of entries in MIPE_HEADER_TRACEPOINT_LIST.
+ */
+#if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE)
+#error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!"
#endif
-#if !defined(MIPE_HEADER_TP_LIST_COUNT)
-#error "MIPE_HEADER_TP_LIST_COUNT must be defined!"
+/**
+ * The list of enums to process.
+ *
+ * It should be defined as follows:
+ * #define MIPE_HEADER_ENUM_LIST \
+ * ENUM_DESC(enum_arg_name, enum_value) \
+ * ENUM_DESC(enum_arg_name, enum_value) \
+ * etc.
+ *
+ * Where enum_arg_name is the name of a tracepoint argument being used with
+ * this enum. enum_value is a valid C enum value.
+ *
+ * Default value is an empty list.
+ */
+#if defined(MIPE_HEADER_ENUM_LIST)
+
+/**
+ * Tracepoint message ID used for enums declaration.
+ */
+#if !defined(MIPE_HEADER_ENUM_MSG_ID)
+#error "MIPE_HEADER_ENUM_MSG_ID must be defined!"
#endif
-#if !defined(MIPE_HEADER_PKT_CLASS)
-#error "MIPE_HEADER_PKT_CLASS must be defined!"
+#else
+#define MIPE_HEADER_ENUM_LIST
#endif
-static const struct {
+/**
+ * The MIPE tracepoint declaration BLOB.
+ */
+const struct
+{
u32 _mipe_w0;
u32 _mipe_w1;
u8 _protocol_version;
u8 _pointer_size;
u32 _tp_count;
-#define TP_DESC(name, desc, arg_types, arg_names) \
- struct { \
- u32 _name; \
- u32 _size_string_name; \
- char _string_name[sizeof(#name)]; \
- u32 _size_desc; \
- char _desc[sizeof(desc)]; \
- u32 _size_arg_types; \
- char _arg_types[sizeof(arg_types)]; \
- u32 _size_arg_names; \
- char _arg_names[sizeof(arg_names)]; \
+#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \
+ struct { \
+ u32 _name; \
+ u32 _size_string_name; \
+ char _string_name[sizeof(#name)]; \
+ u32 _size_desc; \
+ char _desc[sizeof(desc)]; \
+ u32 _size_arg_types; \
+ char _arg_types[sizeof(arg_types)]; \
+ u32 _size_arg_names; \
+ char _arg_names[sizeof(arg_names)]; \
} __attribute__ ((__packed__)) __ ## name;
- MIPE_HEADER_TP_LIST
-#undef TP_DESC
+#define ENUM_DESC(arg_name, value) \
+ struct { \
+ u32 _msg_id; \
+ u32 _arg_name_len; \
+ char _arg_name[sizeof(#arg_name)]; \
+ u32 _value; \
+ u32 _value_str_len; \
+ char _value_str[sizeof(#value)]; \
+ } __attribute__ ((__packed__)) __ ## arg_name ## _ ## value;
-} __attribute__ ((__packed__)) MIPE_HEADER_BLOB_VAR_NAME = {
+ MIPE_HEADER_TRACEPOINT_LIST
+ MIPE_HEADER_ENUM_LIST
+#undef TRACEPOINT_DESC
+#undef ENUM_DESC
+} __attribute__((packed)) MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = {
._mipe_w0 = MIPE_PACKET_HEADER_W0(
TL_PACKET_FAMILY_TL,
MIPE_HEADER_PKT_CLASS,
TL_PACKET_TYPE_HEADER,
- 1),
+ MIPE_HEADER_STREAM_ID),
._mipe_w1 = MIPE_PACKET_HEADER_W1(
sizeof(MIPE_HEADER_BLOB_VAR_NAME) - PACKET_HEADER_SIZE,
0),
._protocol_version = SWTRACE_VERSION,
._pointer_size = sizeof(void *),
- ._tp_count = MIPE_HEADER_TP_LIST_COUNT,
-#define TP_DESC(name, desc, arg_types, arg_names) \
- .__ ## name = { \
- ._name = name, \
- ._size_string_name = sizeof(#name), \
- ._string_name = #name, \
- ._size_desc = sizeof(desc), \
- ._desc = desc, \
- ._size_arg_types = sizeof(arg_types), \
- ._arg_types = arg_types, \
- ._size_arg_names = sizeof(arg_names), \
- ._arg_names = arg_names \
+ ._tp_count = MIPE_HEADER_TRACEPOINT_LIST_SIZE,
+#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \
+ .__ ## name = { \
+ ._name = name, \
+ ._size_string_name = sizeof(#name), \
+ ._string_name = #name, \
+ ._size_desc = sizeof(desc), \
+ ._desc = desc, \
+ ._size_arg_types = sizeof(arg_types), \
+ ._arg_types = arg_types, \
+ ._size_arg_names = sizeof(arg_names), \
+ ._arg_names = arg_names \
},
- MIPE_HEADER_TP_LIST
-#undef TP_DESC
+#define ENUM_DESC(arg_name, value) \
+ .__ ## arg_name ## _ ## value = { \
+ ._msg_id = MIPE_HEADER_ENUM_MSG_ID, \
+ ._arg_name_len = sizeof(#arg_name), \
+ ._arg_name = #arg_name, \
+ ._value = value, \
+ ._value_str_len = sizeof(#value), \
+ ._value_str = #value \
+ },
+
+ MIPE_HEADER_TRACEPOINT_LIST
+ MIPE_HEADER_ENUM_LIST
+#undef TRACEPOINT_DESC
+#undef ENUM_DESC
};
#undef MIPE_HEADER_BLOB_VAR_NAME
-#undef MIPE_HEADER_TP_LIST
-#undef MIPE_HEADER_TP_LIST_COUNT
+#undef MIPE_HEADER_BLOB_VAR_ATTRIBUTE
+#undef MIPE_HEADER_STREAM_ID
#undef MIPE_HEADER_PKT_CLASS
+#undef MIPE_HEADER_TRACEPOINT_LIST
+#undef MIPE_HEADER_TRACEPOINT_LIST_SIZE
+#undef MIPE_HEADER_ENUM_LIST
+#undef MIPE_HEADER_ENUM_MSG_ID
+
+/* clang-format on */
diff --git a/mali_kbase/mali_kbase_mipe_proto.h b/mali_kbase/mali_kbase_mipe_proto.h
index 1a0b8b4..54667cf 100644
--- a/mali_kbase/mali_kbase_mipe_proto.h
+++ b/mali_kbase/mali_kbase_mipe_proto.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,6 +20,12 @@
*
*/
+/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py.
+ * DO NOT EDIT.
+ */
+
+/* clang-format off */
+
#if !defined(_KBASE_MIPE_PROTO_H)
#define _KBASE_MIPE_PROTO_H
@@ -109,5 +115,13 @@ enum tl_packet_type {
TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
};
+/* Stream ID types (timeline family). */
+enum tl_stream_id {
+ TL_STREAM_ID_USER = 0, /* User-space driver Timeline stream. */
+ TL_STREAM_ID_KERNEL = 1, /* Kernel-space driver Timeline stream. */
+ TL_STREAM_ID_CSFFW = 2, /* CSF firmware driver Timeline stream. */
+};
+
#endif /* _KBASE_MIPE_PROTO_H */
+/* clang-format on */
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index 2251031..2adbb21 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/mali_kbase_smc.c b/mali_kbase/mali_kbase_smc.c
index 3470f58..b5c7b12 100644
--- a/mali_kbase/mali_kbase_smc.c
+++ b/mali_kbase/mali_kbase_smc.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2015, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015, 2018, 2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index c264d0b..45ce8ad 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -719,6 +719,36 @@ out_cleanup:
return ret;
}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0)
+static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc,
+ unsigned long page_num, struct page **page)
+{
+ struct sg_table *sgt = gpu_alloc->imported.umm.sgt;
+ struct sg_page_iter sg_iter;
+ unsigned long page_index = 0;
+
+ if (WARN_ON(gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+ return NULL;
+
+ if (!sgt)
+ return NULL;
+
+ if (WARN_ON(page_num >= gpu_alloc->nents))
+ return NULL;
+
+ for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0) {
+ if (page_index == page_num) {
+ *page = sg_page_iter_page(&sg_iter);
+
+ return kmap(*page);
+ }
+ page_index++;
+ }
+
+ return NULL;
+}
+#endif
+
int kbase_mem_copy_from_extres(struct kbase_context *kctx,
struct kbase_debug_copy_buffer *buf_data)
{
@@ -779,16 +809,23 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx,
for (i = 0; i < dma_to_copy/PAGE_SIZE &&
target_page_nr < buf_data->nr_pages; i++) {
-
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0)
+ struct page *pg;
+ void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg);
+#else
void *extres_page = dma_buf_kmap(dma_buf, i);
-
+#endif
if (extres_page) {
ret = kbase_mem_copy_to_pinned_user_pages(
pages, extres_page, &to_copy,
buf_data->nr_pages,
&target_page_nr, offset);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0)
+ kunmap(pg);
+#else
dma_buf_kunmap(dma_buf, i, extres_page);
+#endif
if (ret)
goto out_unlock;
}
@@ -831,6 +868,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom)
int kbasep_jit_alloc_validate(struct kbase_context *kctx,
struct base_jit_alloc_info *info)
{
+ int j;
/* If the ID is zero, then fail the job */
if (info->id == 0)
return -EINVAL;
@@ -843,46 +881,82 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx,
if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0)
return -EINVAL;
- if (kctx->jit_version == 1) {
- /* Old JIT didn't have usage_id, max_allocations, bin_id
- * or padding, so force them to zero
- */
- info->usage_id = 0;
- info->max_allocations = 0;
- info->bin_id = 0;
- info->flags = 0;
- memset(info->padding, 0, sizeof(info->padding));
- } else {
- int j;
-
- /* Check padding is all zeroed */
- for (j = 0; j < sizeof(info->padding); j++) {
- if (info->padding[j] != 0) {
- return -EINVAL;
- }
- }
+ /* Interface version 2 (introduced with kernel driver version 11.5)
+ * onward has padding and a flags member to validate.
+ *
+ * Note: To support earlier versions the extra bytes will have been set
+ * to 0 by the caller.
+ */
- /* No bit other than TILER_ALIGN_TOP shall be set */
- if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+ /* Check padding is all zeroed */
+ for (j = 0; j < sizeof(info->padding); j++) {
+ if (info->padding[j] != 0)
return -EINVAL;
- }
}
+ /* Only valid flags shall be set */
+ if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS))
+ return -EINVAL;
+
+#if !MALI_JIT_PRESSURE_LIMIT
+ /* If just-in-time memory allocation pressure limit feature is disabled,
+ * heap_info_gpu_addr must be zeroed-out
+ */
+ if (info->heap_info_gpu_addr)
+ return -EINVAL;
+#endif
+
+ /* If BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE is set, heap_info_gpu_addr
+ * cannot be 0
+ */
+ if ((info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) &&
+ !info->heap_info_gpu_addr)
+ return -EINVAL;
+
return 0;
}
+/*
+ * Sizes of user data to copy for each just-in-time memory interface version
+ *
+ * In interface version 2 onwards this is the same as the struct size, allowing
+ * copying of arrays of structures from userspace.
+ *
+ * In interface version 1 the structure size was variable, and hence arrays of
+ * structures cannot be supported easily, and were not a feature present in
+ * version 1 anyway.
+ */
+static const size_t jit_info_copy_size_for_jit_version[] = {
+ /* in jit_version 1, the structure did not have any end padding, hence
+ * it could be a different size on 32 and 64-bit clients. We therefore
+ * do not copy past the last member
+ */
+ [1] = offsetofend(struct base_jit_alloc_info_10_2, id),
+ [2] = sizeof(struct base_jit_alloc_info_11_5),
+ [3] = sizeof(struct base_jit_alloc_info)
+};
+
static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
{
- __user void *data = (__user void *)(uintptr_t) katom->jc;
+ __user u8 *data = (__user u8 *)(uintptr_t) katom->jc;
struct base_jit_alloc_info *info;
struct kbase_context *kctx = katom->kctx;
struct kbase_device *kbdev = kctx->kbdev;
u32 count;
int ret;
u32 i;
+ size_t jit_info_user_copy_size;
- /* For backwards compatibility */
- if (katom->nr_extres == 0)
+ WARN_ON(kctx->jit_version >=
+ ARRAY_SIZE(jit_info_copy_size_for_jit_version));
+ jit_info_user_copy_size =
+ jit_info_copy_size_for_jit_version[kctx->jit_version];
+ WARN_ON(jit_info_user_copy_size > sizeof(*info));
+
+ /* For backwards compatibility, and to prevent reading more than 1 jit
+ * info struct on jit version 1
+ */
+ if (katom->nr_extres == 0 || kctx->jit_version == 1)
katom->nr_extres = 1;
count = katom->nr_extres;
@@ -899,13 +973,21 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
ret = -ENOMEM;
goto fail;
}
- if (copy_from_user(info, data, sizeof(*info)*count) != 0) {
- ret = -EINVAL;
- goto free_info;
- }
+
katom->softjob_data = info;
- for (i = 0; i < count; i++, info++) {
+ for (i = 0; i < count; i++, info++, data += jit_info_user_copy_size) {
+ if (copy_from_user(info, data, jit_info_user_copy_size) != 0) {
+ ret = -EINVAL;
+ goto free_info;
+ }
+ /* Clear any remaining bytes when user struct is smaller than
+ * kernel struct. For jit version 1, this also clears the
+ * padding bytes
+ */
+ memset(((u8 *)info) + jit_info_user_copy_size, 0,
+ sizeof(*info) - jit_info_user_copy_size);
+
ret = kbasep_jit_alloc_validate(kctx, info);
if (ret)
goto free_info;
@@ -1009,7 +1091,7 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
for (j = 0; j < i; j++, info++) {
kbase_jit_free(kctx, kctx->jit_alloc[info->id]);
kctx->jit_alloc[info->id] =
- (struct kbase_va_region *) -1;
+ KBASE_RESERVED_REG_JIT_ALLOC;
}
katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
@@ -1054,7 +1136,7 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
*/
for (; i < count; i++, info++) {
kctx->jit_alloc[info->id] =
- (struct kbase_va_region *) -1;
+ KBASE_RESERVED_REG_JIT_ALLOC;
}
katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
@@ -1121,6 +1203,9 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
entry_mmu_flags, info->id, info->commit_pages,
info->extent, info->va_pages);
kbase_vunmap(kctx, &mapping);
+
+ kbase_trace_jit_report_gpu_mem(kctx, reg,
+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
}
katom->event_code = BASE_JD_EVENT_DONE;
@@ -1229,7 +1314,7 @@ static void kbase_jit_free_process(struct kbase_jd_atom *katom)
}
}
-static void kbasep_jit_free_finish_worker(struct work_struct *work)
+static void kbasep_jit_finish_worker(struct work_struct *work)
{
struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
work);
@@ -1245,11 +1330,29 @@ static void kbasep_jit_free_finish_worker(struct work_struct *work)
kbase_js_sched_all(kctx->kbdev);
}
-static void kbase_jit_free_finish(struct kbase_jd_atom *katom)
+void kbase_jit_retry_pending_alloc(struct kbase_context *kctx)
{
+ LIST_HEAD(jit_pending_alloc_list);
struct list_head *i, *tmp;
+
+ list_splice_tail_init(&kctx->jit_pending_alloc,
+ &jit_pending_alloc_list);
+
+ list_for_each_safe(i, tmp, &jit_pending_alloc_list) {
+ struct kbase_jd_atom *pending_atom = list_entry(i,
+ struct kbase_jd_atom, queue);
+ if (kbase_jit_allocate_process(pending_atom) == 0) {
+ /* Atom has completed */
+ INIT_WORK(&pending_atom->work,
+ kbasep_jit_finish_worker);
+ queue_work(kctx->jctx.job_done_wq, &pending_atom->work);
+ }
+ }
+}
+
+static void kbase_jit_free_finish(struct kbase_jd_atom *katom)
+{
struct kbase_context *kctx = katom->kctx;
- LIST_HEAD(jit_pending_alloc_list);
u8 *ids;
size_t j;
@@ -1270,7 +1373,8 @@ static void kbase_jit_free_finish(struct kbase_jd_atom *katom)
* still succeed this soft job but don't try and free
* the allocation.
*/
- if (kctx->jit_alloc[ids[j]] != (struct kbase_va_region *) -1) {
+ if (kctx->jit_alloc[ids[j]] !=
+ KBASE_RESERVED_REG_JIT_ALLOC) {
KBASE_TLSTREAM_TL_JIT_USEDPAGES(kctx->kbdev,
kctx->jit_alloc[ids[j]]->
gpu_alloc->nents, ids[j]);
@@ -1282,18 +1386,7 @@ static void kbase_jit_free_finish(struct kbase_jd_atom *katom)
/* Free the list of ids */
kfree(ids);
- list_splice_tail_init(&kctx->jit_pending_alloc, &jit_pending_alloc_list);
-
- list_for_each_safe(i, tmp, &jit_pending_alloc_list) {
- struct kbase_jd_atom *pending_atom = list_entry(i,
- struct kbase_jd_atom, queue);
- if (kbase_jit_allocate_process(pending_atom) == 0) {
- /* Atom has completed */
- INIT_WORK(&pending_atom->work,
- kbasep_jit_free_finish_worker);
- queue_work(kctx->jctx.job_done_wq, &pending_atom->work);
- }
- }
+ kbase_jit_retry_pending_alloc(kctx);
}
static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index 5e3b74d..f01291a 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -574,25 +574,34 @@ int kbase_vinstr_hwcnt_reader_setup(
if (errcode)
goto error;
+ /* Add the new client. No need to reschedule worker, as not periodic */
+ mutex_lock(&vctx->lock);
+
+ vctx->client_count++;
+ list_add(&vcli->node, &vctx->clients);
+
+ mutex_unlock(&vctx->lock);
+
+ /* Expose to user-space only once the client is fully initialized */
errcode = anon_inode_getfd(
"[mali_vinstr_desc]",
&vinstr_client_fops,
vcli,
O_RDONLY | O_CLOEXEC);
if (errcode < 0)
- goto error;
+ goto client_installed_error;
fd = errcode;
- /* Add the new client. No need to reschedule worker, as not periodic */
+ return fd;
+
+client_installed_error:
mutex_lock(&vctx->lock);
- vctx->client_count++;
- list_add(&vcli->node, &vctx->clients);
+ vctx->client_count--;
+ list_del(&vcli->node);
mutex_unlock(&vctx->lock);
-
- return fd;
error:
kbasep_vinstr_client_destroy(vcli);
return errcode;
diff --git a/mali_kbase/mali_linux_trace.h b/mali_kbase/mali_linux_trace.h
index 96296ac..b639764 100644
--- a/mali_kbase/mali_linux_trace.h
+++ b/mali_kbase/mali_linux_trace.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,15 +20,15 @@
*
*/
-#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_MALI_H
-
#undef TRACE_SYSTEM
#define TRACE_SYSTEM mali
-#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
#include <linux/tracepoint.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
#define MALI_JOB_SLOTS_EVENT_CHANGED
/**
@@ -127,12 +127,335 @@ TRACE_EVENT(mali_total_alloc_pages_change,
),
TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id)
);
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+/*
+ * MMU subsystem tracepoints
+ */
+
+/* Fault status and exception code helpers
+ *
+ * Must be macros to allow use by user-side tracepoint tools
+ *
+ * bits 0:1 masked off code, and used for the level
+ *
+ * Tracepoint files get included more than once - protect against multiple
+ * definition
+ */
+#ifndef __TRACE_MALI_MMU_HELPERS
+#define __TRACE_MALI_MMU_HELPERS
+/* Complex macros should be enclosed in parenthesis.
+ *
+ * We need to have those parentheses removed for our arrays of symbolic look-ups
+ * for __print_symbolic() whilst also being able to use them outside trace code
+ */
+#define _ENSURE_PARENTHESIS(args...) args
+
+#define KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT(code) \
+ (!KBASE_MMU_FAULT_CODE_VALID(code) ? "UNKNOWN,level=" : \
+ __print_symbolic(((code) & ~3u), \
+ KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS))
+#define KBASE_MMU_FAULT_CODE_LEVEL(code) \
+ (((((code) & ~0x3u) == 0xC4) ? 4 : 0) + ((code) & 0x3u))
+
+#define KBASE_MMU_FAULT_STATUS_CODE(status) \
+ ((status) & 0xFFu)
+#define KBASE_MMU_FAULT_STATUS_DECODED_STRING(status) \
+ (((status) & (1u << 10)) ? "DECODER_FAULT" : "SLAVE_FAULT")
+
+#define KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(status) \
+ KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT( \
+ KBASE_MMU_FAULT_STATUS_CODE(status))
+
+#define KBASE_MMU_FAULT_STATUS_LEVEL(status) \
+ KBASE_MMU_FAULT_CODE_LEVEL(KBASE_MMU_FAULT_STATUS_CODE(status))
+
+#define KBASE_MMU_FAULT_STATUS_ACCESS(status) \
+ ((status) & AS_FAULTSTATUS_ACCESS_TYPE_MASK)
+#define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\
+ {AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \
+ {AS_FAULTSTATUS_ACCESS_TYPE_EX, "EXECUTE"}, \
+ {AS_FAULTSTATUS_ACCESS_TYPE_READ, "READ" }, \
+ {AS_FAULTSTATUS_ACCESS_TYPE_WRITE, "WRITE" })
+#define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \
+ __print_symbolic(KBASE_MMU_FAULT_STATUS_ACCESS(status), \
+ KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS)
+
+#define KBASE_MMU_FAULT_CODE_VALID(code) \
+ ((code >= 0xC0 && code <= 0xEF) && \
+ (!(code >= 0xC5 && code <= 0xC6)) && \
+ (!(code >= 0xCC && code <= 0xCF)) && \
+ (!(code >= 0xD4 && code <= 0xD7)) && \
+ (!(code >= 0xDC && code <= 0xDF)))
+#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\
+ {0xC0, "TRANSLATION_FAULT_" }, \
+ {0xC4, "TRANSLATION_FAULT(_7==_IDENTITY)_" }, \
+ {0xC8, "PERMISSION_FAULT_" }, \
+ {0xD0, "TRANSTAB_BUS_FAULT_" }, \
+ {0xD8, "ACCESS_FLAG_" }, \
+ {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \
+ {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \
+ {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }, \
+ {0xEC, "MEMORY_ATTRIBUTES_NONCACHEABLE_" })
+#endif /* __TRACE_MALI_MMU_HELPERS */
+
+/* trace_mali_mmu_page_fault_grow
+ *
+ * Tracepoint about a successful grow of a region due to a GPU page fault
+ */
+TRACE_EVENT(mali_mmu_page_fault_grow,
+ TP_PROTO(struct kbase_va_region *reg, struct kbase_fault *fault,
+ size_t new_pages),
+ TP_ARGS(reg, fault, new_pages),
+ TP_STRUCT__entry(
+ __field(u64, start_addr)
+ __field(u64, fault_addr)
+ __field(u64, fault_extra_addr)
+ __field(size_t, new_pages)
+ __field(u32, status)
+ ),
+ TP_fast_assign(
+ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT;
+ __entry->fault_addr = fault->addr;
+ __entry->fault_extra_addr = fault->extra_addr;
+ __entry->new_pages = new_pages;
+ __entry->status = fault->status;
+ ),
+ TP_printk("start=0x%llx fault_addr=0x%llx fault_extra_addr=0x%llx new_pages=%zu raw_fault_status=0x%x decoded_faultstatus=%s exception_type=0x%x,%s%u access_type=0x%x,%s source_id=0x%x",
+ __entry->start_addr, __entry->fault_addr,
+ __entry->fault_extra_addr, __entry->new_pages,
+ __entry->status,
+ KBASE_MMU_FAULT_STATUS_DECODED_STRING(__entry->status),
+ KBASE_MMU_FAULT_STATUS_CODE(__entry->status),
+ KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(__entry->status),
+ KBASE_MMU_FAULT_STATUS_LEVEL(__entry->status),
+ KBASE_MMU_FAULT_STATUS_ACCESS(__entry->status) >> 8,
+ KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(__entry->status),
+ __entry->status >> 16)
+);
+
+
+
+
+/*
+ * Just-in-time memory allocation subsystem tracepoints
+ */
+
+/* Just-in-time memory allocation soft-job template. Override the TP_printk
+ * further if need be. jit_id can be 0.
+ */
+DECLARE_EVENT_CLASS(mali_jit_softjob_template,
+ TP_PROTO(struct kbase_va_region *reg, u8 jit_id),
+ TP_ARGS(reg, jit_id),
+ TP_STRUCT__entry(
+ __field(u64, start_addr)
+ __field(size_t, nr_pages)
+ __field(size_t, backed_pages)
+ __field(u8, jit_id)
+ ),
+ TP_fast_assign(
+ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT;
+ __entry->nr_pages = reg->nr_pages;
+ __entry->backed_pages = kbase_reg_current_backed_size(reg);
+ __entry->jit_id = jit_id;
+ ),
+ TP_printk("jit_id=%u start=0x%llx va_pages=0x%zx backed_size=0x%zx",
+ __entry->jit_id, __entry->start_addr, __entry->nr_pages,
+ __entry->backed_pages)
+);
+
+/* trace_mali_jit_alloc()
+ *
+ * Tracepoint about a just-in-time memory allocation soft-job successfully
+ * allocating memory
+ */
+DEFINE_EVENT(mali_jit_softjob_template, mali_jit_alloc,
+ TP_PROTO(struct kbase_va_region *reg, u8 jit_id),
+ TP_ARGS(reg, jit_id));
+
+/* trace_mali_jit_free()
+ *
+ * Tracepoint about memory that was allocated just-in-time being freed
+ * (which may happen either on free soft-job, or during rollback error
+ * paths of an allocation soft-job, etc)
+ *
+ * Free doesn't immediately have the just-in-time memory allocation ID so
+ * it's currently suppressed from the output - set jit_id to 0
+ */
+DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free,
+ TP_PROTO(struct kbase_va_region *reg, u8 jit_id),
+ TP_ARGS(reg, jit_id),
+ TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx",
+ __entry->start_addr, __entry->nr_pages, __entry->backed_pages));
+
+#if MALI_JIT_PRESSURE_LIMIT && !MALI_USE_CSF
+/* trace_mali_jit_report
+ *
+ * Tracepoint about the GPU data structure read to form a just-in-time memory
+ * allocation report, and its calculated physical page usage
+ */
+TRACE_EVENT(mali_jit_report,
+ TP_PROTO(struct kbase_jd_atom *katom, struct kbase_va_region *reg,
+ unsigned int id_idx, u64 read_val, u64 used_pages),
+ TP_ARGS(katom, reg, id_idx, read_val, used_pages),
+ TP_STRUCT__entry(
+ __field(u64, start_addr)
+ __field(u64, read_val)
+ __field(u64, used_pages)
+ __field(unsigned long, flags)
+ __field(u8, id_idx)
+ __field(u8, jit_id)
+ ),
+ TP_fast_assign(
+ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT;
+ __entry->read_val = read_val;
+ __entry->used_pages = used_pages;
+ __entry->flags = reg->flags;
+ __entry->id_idx = id_idx;
+ __entry->jit_id = katom->jit_ids[id_idx];
+ ),
+ TP_printk("start=0x%llx jit_ids[%u]=%u read_type='%s' read_val=0x%llx used_pages=%llu",
+ __entry->start_addr, __entry->id_idx, __entry->jit_id,
+ __print_symbolic(__entry->flags,
+ { 0, "address"},
+ { KBASE_REG_TILER_ALIGN_TOP, "address with align" },
+ { KBASE_REG_HEAP_INFO_IS_SIZE, "size" },
+ { KBASE_REG_HEAP_INFO_IS_SIZE |
+ KBASE_REG_TILER_ALIGN_TOP,
+ "size with align (invalid)" }
+ ),
+ __entry->read_val, __entry->used_pages)
+);
+#endif /* MALI_JIT_PRESSURE_LIMIT && !MALI_USE_CSF */
+
+TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
+
+#if MALI_JIT_PRESSURE_LIMIT
+/* trace_mali_jit_report_pressure
+ *
+ * Tracepoint about change in physical memory pressure, due to the information
+ * about a region changing. Examples include:
+ * - a report on a region that was allocated just-in-time
+ * - just-in-time allocation of a region
+ * - free of a region that was allocated just-in-time
+ */
+TRACE_EVENT(mali_jit_report_pressure,
+ TP_PROTO(struct kbase_va_region *reg, u64 new_used_pages,
+ u64 new_pressure, unsigned int flags),
+ TP_ARGS(reg, new_used_pages, new_pressure, flags),
+ TP_STRUCT__entry(
+ __field(u64, start_addr)
+ __field(u64, used_pages)
+ __field(u64, new_used_pages)
+ __field(u64, new_pressure)
+ __field(unsigned int, flags)
+ ),
+ TP_fast_assign(
+ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT;
+ __entry->used_pages = reg->used_pages;
+ __entry->new_used_pages = new_used_pages;
+ __entry->new_pressure = new_pressure;
+ __entry->flags = flags;
+ ),
+ TP_printk("start=0x%llx old_used_pages=%llu new_used_pages=%llu new_pressure=%llu report_flags=%s",
+ __entry->start_addr, __entry->used_pages,
+ __entry->new_used_pages, __entry->new_pressure,
+ __print_flags(__entry->flags, "|",
+ { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE,
+ "HAPPENED_ON_ALLOC_OR_FREE" }))
+);
+#endif /* MALI_JIT_PRESSURE_LIMIT */
+
+/* Tracepoint files get included more than once - protect against multiple
+ * definition
+ */
+#undef KBASE_JIT_REPORT_GPU_MEM_SIZE
+
+/* Size in bytes of the memory surrounding the location used for a just-in-time
+ * memory allocation report
+ */
+#define KBASE_JIT_REPORT_GPU_MEM_SIZE (4 * sizeof(u64))
+
+/* trace_mali_jit_report_gpu_mem
+ *
+ * Tracepoint about the GPU memory nearby the location used for a just-in-time
+ * memory allocation report
+ */
+TRACE_EVENT(mali_jit_report_gpu_mem,
+ TP_PROTO(u64 base_addr, u64 reg_addr, u64 *gpu_mem, unsigned int flags),
+ TP_ARGS(base_addr, reg_addr, gpu_mem, flags),
+ TP_STRUCT__entry(
+ __field(u64, base_addr)
+ __field(u64, reg_addr)
+ __array(u64, mem_values,
+ KBASE_JIT_REPORT_GPU_MEM_SIZE / sizeof(u64))
+ __field(unsigned int, flags)
+ ),
+ TP_fast_assign(
+ __entry->base_addr = base_addr;
+ __entry->reg_addr = reg_addr;
+ memcpy(__entry->mem_values, gpu_mem,
+ sizeof(__entry->mem_values));
+ __entry->flags = flags;
+ ),
+ TP_printk("start=0x%llx read GPU memory base=0x%llx values=%s report_flags=%s",
+ __entry->reg_addr, __entry->base_addr,
+ __print_array(__entry->mem_values,
+ ARRAY_SIZE(__entry->mem_values), sizeof(u64)),
+ __print_flags(__entry->flags, "|",
+ { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE,
+ "HAPPENED_ON_ALLOC_OR_FREE" }))
+);
+
+/* trace_mali_jit_trim_from_region
+ *
+ * Tracepoint about trimming physical pages from a region
+ */
+TRACE_EVENT(mali_jit_trim_from_region,
+ TP_PROTO(struct kbase_va_region *reg, size_t freed_pages,
+ size_t old_pages, size_t available_pages, size_t new_pages),
+ TP_ARGS(reg, freed_pages, old_pages, available_pages, new_pages),
+ TP_STRUCT__entry(
+ __field(u64, start_addr)
+ __field(size_t, freed_pages)
+ __field(size_t, old_pages)
+ __field(size_t, available_pages)
+ __field(size_t, new_pages)
+ ),
+ TP_fast_assign(
+ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT;
+ __entry->freed_pages = freed_pages;
+ __entry->old_pages = old_pages;
+ __entry->available_pages = available_pages;
+ __entry->new_pages = new_pages;
+ ),
+ TP_printk("start=0x%llx freed_pages=%zu old_pages=%zu available_pages=%zu new_pages=%zu",
+ __entry->start_addr, __entry->freed_pages, __entry->old_pages,
+ __entry->available_pages, __entry->new_pages)
+);
+
+/* trace_mali_jit_trim
+ *
+ * Tracepoint about total trimmed physical pages
+ */
+TRACE_EVENT(mali_jit_trim,
+ TP_PROTO(size_t freed_pages),
+ TP_ARGS(freed_pages),
+ TP_STRUCT__entry(
+ __field(size_t, freed_pages)
+ ),
+ TP_fast_assign(
+ __entry->freed_pages = freed_pages;
+ ),
+ TP_printk("freed_pages=%zu", __entry->freed_pages)
+);
#endif /* _TRACE_MALI_H */
#undef TRACE_INCLUDE_PATH
-#undef linux
#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_trace
/* This part must be outside protection */
#include <trace/define_trace.h>
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
index fd60e35..46800fe 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,6 +29,7 @@
#include <mali_kbase_hwaccess_jm.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <mali_kbase_as_fault_debugfs.h>
+#include "../mali_kbase_mmu_internal.h"
void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut,
struct kbase_mmu_setup * const setup)
@@ -191,6 +192,10 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
{
lockdep_assert_held(&kbdev->hwaccess_lock);
+ dev_dbg(kbdev->dev,
+ "Entering %s kctx %p, as %p\n",
+ __func__, (void *)kctx, (void *)as);
+
if (!kctx) {
dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n",
kbase_as_has_bus_fault(as, fault) ?
@@ -254,6 +259,10 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault));
atomic_inc(&kbdev->faults_pending);
}
+
+ dev_dbg(kbdev->dev,
+ "Leaving %s kctx %p, as %p\n",
+ __func__, (void *)kctx, (void *)as);
}
static void validate_protected_page_fault(struct kbase_device *kbdev)
@@ -285,12 +294,14 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
const unsigned long as_bit_mask = (1UL << num_as) - 1;
unsigned long flags;
u32 new_mask;
- u32 tmp;
+ u32 tmp, bf_bits, pf_bits;
+ dev_dbg(kbdev->dev, "Entering %s irq_stat %u\n",
+ __func__, irq_stat);
/* bus faults */
- u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+ bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
/* page faults (note: Ignore ASes with both pf and bf) */
- u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+ pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
if (WARN_ON(kbdev == NULL))
return;
@@ -388,4 +399,16 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
new_mask |= tmp;
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask);
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+ dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n",
+ __func__, irq_stat);
+}
+
+int kbase_mmu_switch_to_ir(struct kbase_context *const kctx,
+ struct kbase_va_region *const reg)
+{
+ dev_dbg(kctx->kbdev->dev,
+ "Switching to incremental rendering for region %p\n",
+ (void *)reg);
+ return kbase_job_slot_softstop_start_rp(kctx, reg);
}
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 5392305..c4bea39 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -42,6 +42,7 @@
#include <mali_kbase_reset_gpu.h>
#include <mmu/mali_kbase_mmu.h>
#include <mmu/mali_kbase_mmu_internal.h>
+#include <mali_kbase_cs_experimental.h>
#define KBASE_MMU_PAGE_ENTRIES 512
@@ -534,6 +535,8 @@ void page_fault_worker(struct work_struct *data)
bool grow_2mb_pool;
struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
int i;
+ size_t current_backed_size;
+
faulting_as = container_of(data, struct kbase_as, work_pagefault);
fault = &faulting_as->pf_data;
@@ -541,6 +544,9 @@ void page_fault_worker(struct work_struct *data)
as_no = faulting_as->number;
kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+ dev_dbg(kbdev->dev,
+ "Entering %s %p, fault_pfn %lld, as_no %d\n",
+ __func__, (void *)data, fault_pfn, as_no);
/* Grab the context that was already refcounted in kbase_mmu_interrupt()
* Therefore, it cannot be scheduled out of this AS until we explicitly
@@ -684,11 +690,14 @@ page_fault_retry:
*/
fault_rel_pfn = fault_pfn - region->start_pfn;
- if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
- dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+ current_backed_size = kbase_reg_current_backed_size(region);
+
+ if (fault_rel_pfn < current_backed_size) {
+ dev_dbg(kbdev->dev,
+ "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
fault->addr, region->start_pfn,
region->start_pfn +
- kbase_reg_current_backed_size(region));
+ current_backed_size);
mutex_lock(&kbdev->mmu_hw_mutex);
@@ -717,8 +726,9 @@ page_fault_retry:
new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn);
/* cap to max vsize */
- new_pages = min(new_pages, region->nr_pages -
- kbase_reg_current_backed_size(region));
+ new_pages = min(new_pages, region->nr_pages - current_backed_size);
+ dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n",
+ new_pages);
if (new_pages == 0) {
mutex_lock(&kbdev->mmu_hw_mutex);
@@ -750,8 +760,8 @@ page_fault_retry:
u32 op;
/* alloc success */
- KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region)
- <= region->nr_pages);
+ WARN_ON(kbase_reg_current_backed_size(region) >
+ region->nr_pages);
/* set up the new pages */
pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
@@ -783,6 +793,29 @@ page_fault_retry:
}
KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no,
(u64)new_pages);
+ trace_mali_mmu_page_fault_grow(region, fault, new_pages);
+
+#if MALI_INCREMENTAL_RENDERING
+ /* Switch to incremental rendering if we have nearly run out of
+ * memory in a JIT memory allocation.
+ */
+ if (region->threshold_pages &&
+ kbase_reg_current_backed_size(region) >
+ region->threshold_pages) {
+
+ dev_dbg(kctx->kbdev->dev,
+ "%zu pages exceeded IR threshold %zu\n",
+ new_pages + current_backed_size,
+ region->threshold_pages);
+
+ if (kbase_mmu_switch_to_ir(kctx, region) >= 0) {
+ dev_dbg(kctx->kbdev->dev,
+ "Get region %p for IR\n",
+ (void *)region);
+ kbase_va_region_alloc_get(kctx, region);
+ }
+ }
+#endif
/* AS transaction begin */
mutex_lock(&kbdev->mmu_hw_mutex);
@@ -871,6 +904,7 @@ page_fault_retry:
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
"Page allocation failure", fault);
} else {
+ dev_dbg(kbdev->dev, "Try again after pool_grow\n");
goto page_fault_retry;
}
}
@@ -886,6 +920,7 @@ fault_done:
kbasep_js_runpool_release_ctx(kbdev, kctx);
atomic_dec(&kbdev->faults_pending);
+ dev_dbg(kbdev->dev, "Leaving page_fault_worker %p\n", (void *)data);
}
static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
diff --git a/mali_kbase/mmu/mali_kbase_mmu_internal.h b/mali_kbase/mmu/mali_kbase_mmu_internal.h
index 54b0c35..28bd341 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_internal.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_internal.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -46,4 +46,18 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
struct kbase_context *kctx, struct kbase_as *as,
struct kbase_fault *fault);
+/**
+ * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible
+ * @kctx The kbase_context for the faulting address space.
+ * @reg Reference of a growable GPU memory region in the same context.
+ * Takes ownership of the reference if successful.
+ *
+ * Used to switch to incremental rendering if we have nearly run out of
+ * virtual address space in a growable memory region.
+ *
+ * Return 0 if successful, otherwise a negative error code.
+ */
+int kbase_mmu_switch_to_ir(struct kbase_context *kctx,
+ struct kbase_va_region *reg);
+
#endif /* _KBASE_MMU_INTERNAL_H_ */
diff --git a/mali_kbase/tests/kutf/build.bp b/mali_kbase/tests/kutf/build.bp
index f0c7a0c..32eab14 100644
--- a/mali_kbase/tests/kutf/build.bp
+++ b/mali_kbase/tests/kutf/build.bp
@@ -1,13 +1,16 @@
/*
- * Copyright:
- * ----------------------------------------------------------------------------
- * This confidential and proprietary software may be used only as authorized
- * by a licensing agreement from ARM Limited.
- * (C) COPYRIGHT 2018-2019 ARM Limited, ALL RIGHTS RESERVED
- * The entire notice above must be reproduced on all authorized copies and
- * copies may only be made to the extent permitted by a licensing agreement
- * from ARM Limited.
- * ----------------------------------------------------------------------------
+ *
+ * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
*/
bob_kernel_module {
diff --git a/mali_kbase/tests/kutf/kutf_suite.c b/mali_kbase/tests/kutf/kutf_suite.c
index 3307c0e..3f15669 100644
--- a/mali_kbase/tests/kutf/kutf_suite.c
+++ b/mali_kbase/tests/kutf/kutf_suite.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014, 2017-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -634,6 +634,17 @@ static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix)
kfree(test_fix);
}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+/* Adapting to the upstream debugfs_create_x32() change */
+static int ktufp_u32_get(void *data, u64 *val)
+{
+ *val = *(u32 *)data;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(kutfp_fops_x32_ro, ktufp_u32_get, NULL, "0x%08llx\n");
+#endif
+
void kutf_add_test_with_filters_and_data(
struct kutf_suite *suite,
unsigned int id,
@@ -668,8 +679,13 @@ void kutf_add_test_with_filters_and_data(
}
test_func->filters = filters;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+ tmp = debugfs_create_file_unsafe("filters", S_IROTH, test_func->dir,
+ &test_func->filters, &kutfp_fops_x32_ro);
+#else
tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir,
&test_func->filters);
+#endif
if (!tmp) {
pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name);
goto fail_file;
diff --git a/mali_kbase/tests/mali_kutf_irq_test/build.bp b/mali_kbase/tests/mali_kutf_irq_test/build.bp
index 971f092..90efdcf 100644
--- a/mali_kbase/tests/mali_kutf_irq_test/build.bp
+++ b/mali_kbase/tests/mali_kutf_irq_test/build.bp
@@ -1,13 +1,16 @@
/*
- * Copyright:
- * ----------------------------------------------------------------------------
- * This confidential and proprietary software may be used only as authorized
- * by a licensing agreement from ARM Limited.
- * (C) COPYRIGHT 2018-2019 ARM Limited, ALL RIGHTS RESERVED
- * The entire notice above must be reproduced on all authorized copies and
- * copies may only be made to the extent permitted by a licensing agreement
- * from ARM Limited.
- * ----------------------------------------------------------------------------
+ *
+ * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
*/
bob_kernel_module {
diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c
index 201b30e..5d073be 100644
--- a/mali_kbase/tl/mali_kbase_timeline.c
+++ b/mali_kbase/tl/mali_kbase_timeline.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -131,6 +131,7 @@ int kbase_timeline_init(struct kbase_timeline **timeline,
kbasep_timeline_autoflush_timer_callback);
result->is_enabled = timeline_is_enabled;
+
*timeline = result;
return 0;
}
@@ -142,6 +143,7 @@ void kbase_timeline_term(struct kbase_timeline *timeline)
if (!timeline)
return;
+
for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++)
kbase_tlstream_term(&timeline->streams[i]);
diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c
index 9a899f2..6e09a17 100644
--- a/mali_kbase/tl/mali_kbase_timeline_io.c
+++ b/mali_kbase/tl/mali_kbase_timeline_io.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -85,6 +85,43 @@ static int kbasep_timeline_io_packet_pending(
}
/**
+ * copy_stream_header() - copy timeline stream header.
+ *
+ * @buffer: Pointer to the buffer provided by user.
+ * @size: Maximum amount of data that can be stored in the buffer.
+ * @copy_len: Pointer to amount of bytes that has been copied already
+ * within the read system call.
+ * @hdr: Pointer to the stream header.
+ * @hdr_size: Header size.
+ * @hdr_btc: Pointer to the remaining number of bytes to copy.
+ *
+ * Returns: 0 if success, -1 otherwise.
+ */
+static inline int copy_stream_header(
+ char __user *buffer, size_t size, ssize_t *copy_len,
+ const char *hdr,
+ size_t hdr_size,
+ size_t *hdr_btc)
+{
+ const size_t offset = hdr_size - *hdr_btc;
+ const size_t copy_size = MIN(size - *copy_len, *hdr_btc);
+
+ if (!*hdr_btc)
+ return 0;
+
+ if (WARN_ON(*hdr_btc > hdr_size))
+ return -1;
+
+ if (copy_to_user(&buffer[*copy_len], &hdr[offset], copy_size))
+ return -1;
+
+ *hdr_btc -= copy_size;
+ *copy_len += copy_size;
+
+ return 0;
+}
+
+/**
* kbasep_timeline_copy_header - copy timeline headers to the user
* @timeline: Timeline instance
* @buffer: Pointer to the buffer provided by user
@@ -93,51 +130,28 @@ static int kbasep_timeline_io_packet_pending(
* within the read system call.
*
* This helper function checks if timeline headers have not been sent
- * to the user, and if so, sends them. @ref copy_len is respectively
+ * to the user, and if so, sends them. copy_len is respectively
* updated.
*
* Returns: 0 if success, -1 if copy_to_user has failed.
*/
-static inline int kbasep_timeline_copy_header(
+static inline int kbasep_timeline_copy_headers(
struct kbase_timeline *timeline,
char __user *buffer,
size_t size,
ssize_t *copy_len)
{
- if (timeline->obj_header_btc) {
- size_t offset = obj_desc_header_size -
- timeline->obj_header_btc;
-
- size_t header_cp_size = MIN(
- size - *copy_len,
- timeline->obj_header_btc);
-
- if (copy_to_user(
- &buffer[*copy_len],
- &obj_desc_header[offset],
- header_cp_size))
- return -1;
-
- timeline->obj_header_btc -= header_cp_size;
- *copy_len += header_cp_size;
- }
-
- if (timeline->aux_header_btc) {
- size_t offset = aux_desc_header_size -
- timeline->aux_header_btc;
- size_t header_cp_size = MIN(
- size - *copy_len,
- timeline->aux_header_btc);
-
- if (copy_to_user(
- &buffer[*copy_len],
- &aux_desc_header[offset],
- header_cp_size))
- return -1;
-
- timeline->aux_header_btc -= header_cp_size;
- *copy_len += header_cp_size;
- }
+ if (copy_stream_header(buffer, size, copy_len,
+ obj_desc_header,
+ obj_desc_header_size,
+ &timeline->obj_header_btc))
+ return -1;
+
+ if (copy_stream_header(buffer, size, copy_len,
+ aux_desc_header,
+ aux_desc_header_size,
+ &timeline->aux_header_btc))
+ return -1;
return 0;
}
@@ -183,7 +197,7 @@ static ssize_t kbasep_timeline_io_read(
unsigned int rb_idx;
size_t rb_size;
- if (kbasep_timeline_copy_header(
+ if (kbasep_timeline_copy_headers(
timeline, buffer, size, &copy_len)) {
copy_len = -EFAULT;
break;
@@ -305,6 +319,7 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp)
timeline = (struct kbase_timeline *) filp->private_data;
+
/* Stop autoflush timer before releasing access to streams. */
atomic_set(&timeline->autoflush_timer_active, 0);
del_timer_sync(&timeline->autoflush_timer);
diff --git a/mali_kbase/tl/mali_kbase_timeline_priv.h b/mali_kbase/tl/mali_kbase_timeline_priv.h
index d4c4773..73499ce 100644
--- a/mali_kbase/tl/mali_kbase_timeline_priv.h
+++ b/mali_kbase/tl/mali_kbase_timeline_priv.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -26,6 +26,7 @@
#include <mali_kbase.h>
#include "mali_kbase_tlstream.h"
+
#include <linux/timer.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
diff --git a/mali_kbase/tl/mali_kbase_tlstream.c b/mali_kbase/tl/mali_kbase_tlstream.c
index 2a76bc0..bec4be7 100644
--- a/mali_kbase/tl/mali_kbase_tlstream.c
+++ b/mali_kbase/tl/mali_kbase_tlstream.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -106,20 +106,31 @@ void kbase_tlstream_reset(struct kbase_tlstream *stream)
atomic_set(&stream->rbi, 0);
}
-/* Configuration of timeline streams generated by kernel.
- * Kernel emit only streams containing either timeline object events or
- * auxiliary events. All streams have stream id value of 1 (as opposed to user
- * space streams that have value of 0).
- */
+/* Configuration of timeline streams generated by kernel. */
static const struct {
enum tl_packet_family pkt_family;
enum tl_packet_class pkt_class;
enum tl_packet_type pkt_type;
- unsigned int stream_id;
+ enum tl_stream_id stream_id;
} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
- {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
- {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY, 1},
- {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY, 1}
+ {
+ TL_PACKET_FAMILY_TL,
+ TL_PACKET_CLASS_OBJ,
+ TL_PACKET_TYPE_SUMMARY,
+ TL_STREAM_ID_KERNEL,
+ },
+ {
+ TL_PACKET_FAMILY_TL,
+ TL_PACKET_CLASS_OBJ,
+ TL_PACKET_TYPE_BODY,
+ TL_STREAM_ID_KERNEL,
+ },
+ {
+ TL_PACKET_FAMILY_TL,
+ TL_PACKET_CLASS_AUX,
+ TL_PACKET_TYPE_BODY,
+ TL_STREAM_ID_KERNEL,
+ },
};
void kbase_tlstream_init(
diff --git a/mali_kbase/tl/mali_kbase_tlstream.h b/mali_kbase/tl/mali_kbase_tlstream.h
index 5797738..427bb09 100644
--- a/mali_kbase/tl/mali_kbase_tlstream.h
+++ b/mali_kbase/tl/mali_kbase_tlstream.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -97,7 +97,6 @@ enum tl_stream_type {
TL_STREAM_TYPE_OBJ_SUMMARY = TL_STREAM_TYPE_FIRST,
TL_STREAM_TYPE_OBJ,
TL_STREAM_TYPE_AUX,
-
TL_STREAM_TYPE_COUNT
};
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index bae95b4..b028ef8 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -118,6 +118,7 @@ enum tl_msg_id_obj {
KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END,
KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER,
+ KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW,
KBASE_OBJ_MSG_COUNT,
};
@@ -136,404 +137,410 @@ enum tl_msg_id_aux {
KBASE_AUX_MSG_COUNT,
};
-#define OBJ_TL_LIST \
- TP_DESC(KBASE_TL_NEW_CTX, \
+#define OBJ_TP_LIST \
+ TRACEPOINT_DESC(KBASE_TL_NEW_CTX, \
"object ctx is created", \
"@pII", \
"ctx,ctx_nr,tgid") \
- TP_DESC(KBASE_TL_NEW_GPU, \
+ TRACEPOINT_DESC(KBASE_TL_NEW_GPU, \
"object gpu is created", \
"@pII", \
"gpu,gpu_id,core_count") \
- TP_DESC(KBASE_TL_NEW_LPU, \
+ TRACEPOINT_DESC(KBASE_TL_NEW_LPU, \
"object lpu is created", \
"@pII", \
"lpu,lpu_nr,lpu_fn") \
- TP_DESC(KBASE_TL_NEW_ATOM, \
+ TRACEPOINT_DESC(KBASE_TL_NEW_ATOM, \
"object atom is created", \
"@pI", \
"atom,atom_nr") \
- TP_DESC(KBASE_TL_NEW_AS, \
+ TRACEPOINT_DESC(KBASE_TL_NEW_AS, \
"address space object is created", \
"@pI", \
"address_space,as_nr") \
- TP_DESC(KBASE_TL_DEL_CTX, \
+ TRACEPOINT_DESC(KBASE_TL_DEL_CTX, \
"context is destroyed", \
"@p", \
"ctx") \
- TP_DESC(KBASE_TL_DEL_ATOM, \
+ TRACEPOINT_DESC(KBASE_TL_DEL_ATOM, \
"atom is destroyed", \
"@p", \
"atom") \
- TP_DESC(KBASE_TL_LIFELINK_LPU_GPU, \
+ TRACEPOINT_DESC(KBASE_TL_LIFELINK_LPU_GPU, \
"lpu is deleted with gpu", \
"@pp", \
"lpu,gpu") \
- TP_DESC(KBASE_TL_LIFELINK_AS_GPU, \
+ TRACEPOINT_DESC(KBASE_TL_LIFELINK_AS_GPU, \
"address space is deleted with gpu", \
"@pp", \
"address_space,gpu") \
- TP_DESC(KBASE_TL_RET_CTX_LPU, \
+ TRACEPOINT_DESC(KBASE_TL_RET_CTX_LPU, \
"context is retained by lpu", \
"@pp", \
"ctx,lpu") \
- TP_DESC(KBASE_TL_RET_ATOM_CTX, \
+ TRACEPOINT_DESC(KBASE_TL_RET_ATOM_CTX, \
"atom is retained by context", \
"@pp", \
"atom,ctx") \
- TP_DESC(KBASE_TL_RET_ATOM_LPU, \
+ TRACEPOINT_DESC(KBASE_TL_RET_ATOM_LPU, \
"atom is retained by lpu", \
"@pps", \
"atom,lpu,attrib_match_list") \
- TP_DESC(KBASE_TL_NRET_CTX_LPU, \
+ TRACEPOINT_DESC(KBASE_TL_NRET_CTX_LPU, \
"context is released by lpu", \
"@pp", \
"ctx,lpu") \
- TP_DESC(KBASE_TL_NRET_ATOM_CTX, \
+ TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_CTX, \
"atom is released by context", \
"@pp", \
"atom,ctx") \
- TP_DESC(KBASE_TL_NRET_ATOM_LPU, \
+ TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_LPU, \
"atom is released by lpu", \
"@pp", \
"atom,lpu") \
- TP_DESC(KBASE_TL_RET_AS_CTX, \
+ TRACEPOINT_DESC(KBASE_TL_RET_AS_CTX, \
"address space is retained by context", \
"@pp", \
"address_space,ctx") \
- TP_DESC(KBASE_TL_NRET_AS_CTX, \
+ TRACEPOINT_DESC(KBASE_TL_NRET_AS_CTX, \
"address space is released by context", \
"@pp", \
"address_space,ctx") \
- TP_DESC(KBASE_TL_RET_ATOM_AS, \
+ TRACEPOINT_DESC(KBASE_TL_RET_ATOM_AS, \
"atom is retained by address space", \
"@pp", \
"atom,address_space") \
- TP_DESC(KBASE_TL_NRET_ATOM_AS, \
+ TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_AS, \
"atom is released by address space", \
"@pp", \
"atom,address_space") \
- TP_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \
+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \
"atom job slot attributes", \
"@pLLI", \
"atom,descriptor,affinity,config") \
- TP_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \
+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \
"atom priority", \
"@pI", \
"atom,prio") \
- TP_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \
+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \
"atom state", \
"@pI", \
"atom,state") \
- TP_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \
+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \
"atom caused priority change", \
"@p", \
"atom") \
- TP_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \
+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \
"jit done for atom", \
"@pLLILILLL", \
"atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \
- TP_DESC(KBASE_TL_JIT_USEDPAGES, \
+ TRACEPOINT_DESC(KBASE_TL_JIT_USEDPAGES, \
"used pages for jit", \
"@LI", \
"used_pages,j_id") \
- TP_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \
+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \
"Information about JIT allocations", \
"@pLLLIIIII", \
"atom,va_pgs,com_pgs,extent,j_id,bin_id,max_allocs,jit_flags,usg_id") \
- TP_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \
+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \
"Information about JIT frees", \
"@pI", \
"atom,j_id") \
- TP_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \
+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \
"address space attributes", \
"@pLLL", \
"address_space,transtab,memattr,transcfg") \
- TP_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \
+ TRACEPOINT_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \
"softstop event on given lpu", \
"@p", \
"lpu") \
- TP_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \
+ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \
"atom softstopped", \
"@p", \
"atom") \
- TP_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \
+ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \
"atom softstop issued", \
"@p", \
"atom") \
- TP_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \
+ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \
"atom soft job has started", \
"@p", \
"atom") \
- TP_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \
+ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \
"atom soft job has completed", \
"@p", \
"atom") \
- TP_DESC(KBASE_JD_GPU_SOFT_RESET, \
+ TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \
"gpu soft reset", \
"@p", \
"gpu") \
- TP_DESC(KBASE_TL_KBASE_NEW_DEVICE, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \
"New KBase Device", \
"@III", \
"kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs") \
- TP_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \
"CSG is programmed to a slot", \
"@III", \
"kbase_device_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index") \
- TP_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \
"CSG is deprogrammed from a slot", \
"@II", \
"kbase_device_id,kbase_device_csg_slot_index") \
- TP_DESC(KBASE_TL_KBASE_NEW_CTX, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \
"New KBase Context", \
"@II", \
"kernel_ctx_id,kbase_device_id") \
- TP_DESC(KBASE_TL_KBASE_DEL_CTX, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_CTX, \
"Delete KBase Context", \
"@I", \
"kernel_ctx_id") \
- TP_DESC(KBASE_TL_KBASE_NEW_KCPUQUEUE, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_KCPUQUEUE, \
"New KCPU Queue", \
"@pII", \
"kcpu_queue,kernel_ctx_id,kcpuq_num_pending_cmds") \
- TP_DESC(KBASE_TL_KBASE_DEL_KCPUQUEUE, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_KCPUQUEUE, \
"Delete KCPU Queue", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \
"KCPU Queue enqueues Signal on Fence", \
"@pp", \
"kcpu_queue,fence") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \
"KCPU Queue enqueues Wait on Fence", \
"@pp", \
"kcpu_queue,fence") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
"Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
"Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object", \
"@pLI", \
"kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
"End array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \
"Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \
"Array item of KCPU Queue enqueues Set on Cross Queue Sync Object", \
"@pL", \
"kcpu_queue,cqs_obj_gpu_addr") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \
"End array of KCPU Queue enqueues Set on Cross Queue Sync Object", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
"Begin array of KCPU Queue enqueues Debug Copy", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
"Array item of KCPU Queue enqueues Debug Copy", \
"@pL", \
"kcpu_queue,debugcopy_dst_size") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
"End array of KCPU Queue enqueues Debug Copy", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \
"KCPU Queue enqueues Map Import", \
"@pL", \
"kcpu_queue,map_import_buf_gpu_addr") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \
"KCPU Queue enqueues Unmap Import", \
"@pL", \
"kcpu_queue,map_import_buf_gpu_addr") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, \
"KCPU Queue enqueues Unmap Import ignoring reference count", \
"@pL", \
"kcpu_queue,map_import_buf_gpu_addr") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
"Begin array of KCPU Queue enqueues JIT Alloc", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
"Array item of KCPU Queue enqueues JIT Alloc", \
"@pLLLLIIIII", \
"kcpu_queue,jit_alloc_gpu_alloc_addr_dest,jit_alloc_va_pages,jit_alloc_commit_pages,jit_alloc_extent,jit_alloc_jit_id,jit_alloc_bin_id,jit_alloc_max_allocations,jit_alloc_flags,jit_alloc_usage_id") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
"End array of KCPU Queue enqueues JIT Alloc", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \
"Begin array of KCPU Queue enqueues JIT Free", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \
"Array item of KCPU Queue enqueues JIT Free", \
"@pI", \
"kcpu_queue,jit_alloc_jit_id") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \
"End array of KCPU Queue enqueues JIT Free", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \
"KCPU Queue starts a Signal on Fence", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \
"KCPU Queue ends a Signal on Fence", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \
"KCPU Queue starts a Wait on Fence", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \
"KCPU Queue ends a Wait on Fence", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \
"KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \
"KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \
"KCPU Queue executes a Set on an array of Cross Queue Sync Objects", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \
"KCPU Queue starts an array of Debug Copys", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \
"KCPU Queue ends an array of Debug Copys", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \
"KCPU Queue starts a Map Import", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \
"KCPU Queue ends a Map Import", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \
"KCPU Queue starts an Unmap Import", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \
"KCPU Queue ends an Unmap Import", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \
"KCPU Queue starts an Unmap Import ignoring reference count", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \
"KCPU Queue ends an Unmap Import ignoring reference count", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \
"KCPU Queue starts an array of JIT Allocs", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
"Begin array of KCPU Queue ends an array of JIT Allocs", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
"Array item of KCPU Queue ends an array of JIT Allocs", \
"@pLL", \
"kcpu_queue,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
"End array of KCPU Queue ends an array of JIT Allocs", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, \
"KCPU Queue starts an array of JIT Frees", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
"Begin array of KCPU Queue ends an array of JIT Frees", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
"Array item of KCPU Queue ends an array of JIT Frees", \
"@pL", \
"kcpu_queue,jit_free_pages_used") \
- TP_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
"End array of KCPU Queue ends an array of JIT Frees", \
"@p", \
"kcpu_queue") \
- TP_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, \
"KCPU Queue executes an Error Barrier", \
"@p", \
"kcpu_queue") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \
+ "An overflow has happened with the CSFFW Timeline stream", \
+ "@LL", \
+ "csffw_timestamp,csffw_cycle") \
-#define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header
-#define MIPE_HEADER_TP_LIST OBJ_TL_LIST
-#define MIPE_HEADER_TP_LIST_COUNT KBASE_OBJ_MSG_COUNT
-#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_OBJ
+#define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header
+#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL
+#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_OBJ
+#define MIPE_HEADER_TRACEPOINT_LIST OBJ_TP_LIST
+#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_OBJ_MSG_COUNT
#include "mali_kbase_mipe_gen_header.h"
const char *obj_desc_header = (const char *) &__obj_desc_header;
const size_t obj_desc_header_size = sizeof(__obj_desc_header);
-#define AUX_TL_LIST \
- TP_DESC(KBASE_AUX_PM_STATE, \
+#define AUX_TP_LIST \
+ TRACEPOINT_DESC(KBASE_AUX_PM_STATE, \
"PM state", \
"@IL", \
"core_type,core_state_bitset") \
- TP_DESC(KBASE_AUX_PAGEFAULT, \
+ TRACEPOINT_DESC(KBASE_AUX_PAGEFAULT, \
"Page fault", \
"@IIL", \
"ctx_nr,as_nr,page_cnt_change") \
- TP_DESC(KBASE_AUX_PAGESALLOC, \
+ TRACEPOINT_DESC(KBASE_AUX_PAGESALLOC, \
"Total alloc pages change", \
"@IL", \
"ctx_nr,page_cnt") \
- TP_DESC(KBASE_AUX_DEVFREQ_TARGET, \
+ TRACEPOINT_DESC(KBASE_AUX_DEVFREQ_TARGET, \
"New device frequency target", \
"@L", \
"target_freq") \
- TP_DESC(KBASE_AUX_PROTECTED_ENTER_START, \
+ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_START, \
"enter protected mode start", \
"@p", \
"gpu") \
- TP_DESC(KBASE_AUX_PROTECTED_ENTER_END, \
+ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_END, \
"enter protected mode end", \
"@p", \
"gpu") \
- TP_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \
+ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \
"leave protected mode start", \
"@p", \
"gpu") \
- TP_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \
+ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \
"leave protected mode end", \
"@p", \
"gpu") \
- TP_DESC(KBASE_AUX_JIT_STATS, \
+ TRACEPOINT_DESC(KBASE_AUX_JIT_STATS, \
"per-bin JIT statistics", \
"@IIIIII", \
"ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \
- TP_DESC(KBASE_AUX_EVENT_JOB_SLOT, \
+ TRACEPOINT_DESC(KBASE_AUX_EVENT_JOB_SLOT, \
"event on a given job slot", \
"@pIII", \
"ctx,slot_nr,atom_nr,event") \
-#define MIPE_HEADER_BLOB_VAR_NAME __aux_desc_header
-#define MIPE_HEADER_TP_LIST AUX_TL_LIST
-#define MIPE_HEADER_TP_LIST_COUNT KBASE_AUX_MSG_COUNT
-#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_AUX
+#define MIPE_HEADER_BLOB_VAR_NAME __aux_desc_header
+#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL
+#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_AUX
+#define MIPE_HEADER_TRACEPOINT_LIST AUX_TP_LIST
+#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_AUX_MSG_COUNT
#include "mali_kbase_mipe_gen_header.h"
@@ -2988,4 +2995,30 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
+ struct kbase_tlstream *stream,
+ u64 csffw_timestamp,
+ u64 csffw_cycle)
+{
+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(csffw_timestamp)
+ + sizeof(csffw_cycle)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &csffw_timestamp, sizeof(csffw_timestamp));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &csffw_cycle, sizeof(csffw_cycle));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
/* clang-format on */
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index b2c20ae..fa2c399 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -454,6 +454,10 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end(
void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier(
struct kbase_tlstream *stream,
const void *kcpu_queue);
+void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
+ struct kbase_tlstream *stream,
+ u64 csffw_timestamp,
+ u64 csffw_cycle);
struct kbase_tlstream;
@@ -2467,6 +2471,27 @@ struct kbase_tlstream;
kcpu_queue); \
} while (0)
+/**
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW -
+ * An overflow has happened with the CSFFW Timeline stream
+ *
+ * @kbdev: Kbase device
+ * @csffw_timestamp: Timestamp of a CSFFW event
+ * @csffw_cycle: Cycle number of a CSFFW event
+ */
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \
+ kbdev, \
+ csffw_timestamp, \
+ csffw_cycle \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ csffw_timestamp, csffw_cycle); \
+ } while (0)
+
/* Gator tracepoints are hooked into TLSTREAM interface.
* When the following tracepoints are called, corresponding