summaryrefslogtreecommitdiff
path: root/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
diff options
context:
space:
mode:
authorSiddharth Kapoor <ksiddharth@google.com>2021-11-26 19:02:16 +0800
committerSiddharth Kapoor <ksiddharth@google.com>2021-12-06 11:32:22 +0800
commit93dab6a30cedaf7e6307492cbaa5f651514a7764 (patch)
tree8290861c9f9c7144bc8ee907089a063f6a6041d0 /mali_kbase/backend/gpu/mali_kbase_jm_rb.c
parente2249eb4503b74f19f9081b3e68fcbc824afdc10 (diff)
parent0c596dc70431fa2c70021fa1685e3efc969a852d (diff)
downloadgpu-93dab6a30cedaf7e6307492cbaa5f651514a7764.tar.gz
Merge r34p0-00dev1 from upstream into android-gs-pixel-5.10-gs101
Change-Id: I051ad3af9ac645fb8585219c48210df424a27807 Signed-off-by: Siddharth Kapoor <ksiddharth@google.com>
Diffstat (limited to 'mali_kbase/backend/gpu/mali_kbase_jm_rb.c')
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c246
1 files changed, 187 insertions, 59 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index b475d79..3cf4431 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -387,6 +387,9 @@ static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
{
lockdep_assert_held(&kbdev->hwaccess_lock);
+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS,
+ katom->kctx, katom, katom->jc,
+ katom->slot_nr, katom->event_code);
kbase_gpu_release_atom(kbdev, katom, NULL);
katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
}
@@ -564,7 +567,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
kbdev->protected_mode_transition = true;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_ENTER_PROTECTED_HWCNT:
/* See if we can get away with disabling hwcnt atomically */
kbdev->protected_mode_hwcnt_desired = false;
@@ -607,7 +610,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
kbase_pm_update_cores_state_nolock(kbdev);
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
/* Avoid unnecessary waiting on non-ACE platforms. */
if (kbdev->system_coherency == COHERENCY_ACE) {
@@ -638,7 +641,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY:
/*
* When entering into protected mode, we must ensure that the
@@ -671,7 +674,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
return -EAGAIN;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
/*
@@ -742,7 +745,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
kbase_pm_update_cores_state_nolock(kbdev);
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) {
/*
@@ -755,7 +758,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
KBASE_ATOM_EXIT_PROTECTED_RESET;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_EXIT_PROTECTED_RESET:
/* Issue the reset to the GPU */
err = kbase_gpu_protected_mode_reset(kbdev);
@@ -797,7 +800,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
/* A GPU reset is issued when exiting protected mode. Once the
* reset is done all atoms' state will also be reset. For this
@@ -854,7 +857,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
if (kbase_gpu_check_secure_atoms(kbdev,
!kbase_jd_katom_is_protected(
@@ -874,7 +877,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
/*
@@ -909,7 +912,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
if (katom[idx]->will_fail_event_code) {
kbase_gpu_mark_atom_for_return(kbdev,
@@ -936,6 +939,11 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
if (katom[idx]->event_code ==
BASE_JD_EVENT_PM_EVENT) {
+ KBASE_KTRACE_ADD_JM_SLOT_INFO(
+ kbdev, JM_MARK_FOR_RETURN_TO_JS,
+ katom[idx]->kctx, katom[idx],
+ katom[idx]->jc, js,
+ katom[idx]->event_code);
katom[idx]->gpu_rb_state =
KBASE_ATOM_GPU_RB_RETURN_TO_JS;
break;
@@ -948,7 +956,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
KBASE_ATOM_GPU_RB_READY;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_GPU_RB_READY:
if (idx == 1) {
@@ -994,7 +1002,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
KBASE_ATOM_GPU_RB_SUBMITTED;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_GPU_RB_SUBMITTED:
/* Inform power management at start/finish of
@@ -1037,9 +1045,55 @@ void kbase_backend_run_atom(struct kbase_device *kbdev,
kbase_backend_slot_update(kbdev);
}
-#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
- (KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
+/**
+ * kbase_rb_atom_might_depend - determine if one atom in the slot ringbuffer
+ * might depend on another from the same kctx
+ * @katom_a: dependee atom
+ * @katom_b: atom to query
+ *
+ * This can be used on atoms that belong to different slot ringbuffers
+ *
+ * Return: true if @katom_b might depend on @katom_a, false if it cannot depend.
+ */
+static inline bool
+kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
+ const struct kbase_jd_atom *katom_b)
+{
+ if (katom_a->kctx != katom_b->kctx)
+ return false;
+ return (katom_b->pre_dep ||
+ (katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED |
+ KBASE_KATOM_FLAG_FAIL_BLOCKER)));
+}
+/**
+ * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is
+ * related to a failed JSn_HEAD atom
+ * @kbdev: kbase device
+ * @js: job slot to check
+ * @completion_code: completion code of the failed atom
+ *
+ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but
+ * unlike other failure codes we _can_ re-run them.
+ *
+ * This forms step 1 in a 2-step process of removing any related atoms from a
+ * slot's JSn_HEAD_NEXT (ringbuffer index 1), should there have
+ * been a 'failure' on an atom in JSn_HEAD (ringbuffer index 0).
+ *
+ * This step only removes the atoms from the HW, and marks them as
+ * (potentially) ready to run again.
+ *
+ * Step 2 is on marking the JSn_HEAD atom as complete
+ * (kbase_gpu_complete_hw()), to dequeue said atoms and return them to the JS
+ * as appropriate, or re-submit them.
+ *
+ * Hence, this function must evict at a minimum the atoms related to the atom
+ * in JSn_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable
+ * if this function evicts more atoms than kbase_gpu_complete_hw() dequeues, as
+ * the next kbase_backend_slot_update() will resubmit any remaining.
+ *
+ * Return: true if an atom was evicted, false otherwise.
+ */
bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
u32 completion_code)
{
@@ -1051,14 +1105,12 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
katom = kbase_gpu_inspect(kbdev, js, 0);
next_katom = kbase_gpu_inspect(kbdev, js, 1);
- if (next_katom && katom->kctx == next_katom->kctx &&
- next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
- (HAS_DEP(next_katom) || next_katom->sched_priority ==
- katom->sched_priority) &&
- (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO))
- != 0 ||
- kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI))
- != 0)) {
+ if (next_katom &&
+ next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+ (kbase_rb_atom_might_depend(katom, next_katom) ||
+ kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) &&
+ (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) != 0 ||
+ kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) {
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
JS_COMMAND_NOP);
next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
@@ -1083,6 +1135,29 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
return false;
}
+/**
+ * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD
+ * @kbdev: kbase device
+ * @js: job slot to check
+ * @completion_code: completion code of the completed atom
+ * @job_tail: value read from JSn_TAIL, for STOPPED atoms
+ * @end_timestamp: pointer to approximate ktime value when the katom completed
+ *
+ * Among other operations, this also executes step 2 of a 2-step process of
+ * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1),
+ * should there have been a 'failure' on an atom in JSn_HEAD (ringbuffer index
+ * 0). The first step is done in kbase_gpu_irq_evict().
+ *
+ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but
+ * unlike other failure codes we _can_ re-run them.
+ *
+ * When the JSn_HEAD atom is considered to be 'failed', then this will dequeue
+ * and return to the JS some (usually all) of the atoms evicted from the HW
+ * during the kbase_gpu_irq_evict() for that JSn_HEAD atom. If it dequeues an
+ * atom, that atom must not have been running or must already be evicted, as
+ * otherwise we would be in the incorrect state of having an atom both running
+ * on the HW and returned to the JS.
+ */
void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
u32 completion_code,
u64 job_tail,
@@ -1133,9 +1208,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
* registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
* the atoms on this slot are returned in the correct order.
*/
- if (next_katom && katom->kctx == next_katom->kctx &&
- next_katom->sched_priority ==
- katom->sched_priority) {
+ if (next_katom &&
+ kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) {
WARN_ON(next_katom->gpu_rb_state ==
KBASE_ATOM_GPU_RB_SUBMITTED);
kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
@@ -1145,12 +1219,14 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
int i;
- if (!kbase_ctx_flag(katom->kctx, KCTX_DYING))
+ if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) {
dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
js, completion_code,
kbase_gpu_exception_name(
completion_code));
+ }
+
#if KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR != 0
KBASE_KTRACE_DUMP(kbdev);
#endif
@@ -1168,18 +1244,17 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *katom_idx1 =
kbase_gpu_inspect(kbdev, i, 1);
- if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
- HAS_DEP(katom_idx0) &&
- katom_idx0->gpu_rb_state !=
- KBASE_ATOM_GPU_RB_SUBMITTED) {
+ if (katom_idx0 &&
+ kbase_rb_atom_might_depend(katom, katom_idx0) &&
+ katom_idx0->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
/* Dequeue katom_idx0 from ringbuffer */
kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
- if (katom_idx1 &&
- katom_idx1->kctx == katom->kctx
- && HAS_DEP(katom_idx1) &&
- katom_idx0->gpu_rb_state !=
- KBASE_ATOM_GPU_RB_SUBMITTED) {
+ if (katom_idx1 && kbase_rb_atom_might_depend(
+ katom, katom_idx1) &&
+ katom_idx0->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
/* Dequeue katom_idx1 from ringbuffer */
kbase_gpu_dequeue_atom(kbdev, i,
end_timestamp);
@@ -1192,11 +1267,10 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
kbase_jm_return_atom_to_js(kbdev, katom_idx0);
- } else if (katom_idx1 &&
- katom_idx1->kctx == katom->kctx &&
- HAS_DEP(katom_idx1) &&
- katom_idx1->gpu_rb_state !=
- KBASE_ATOM_GPU_RB_SUBMITTED) {
+ } else if (katom_idx1 && kbase_rb_atom_might_depend(
+ katom, katom_idx1) &&
+ katom_idx1->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
/* Can not dequeue this atom yet - will be
* dequeued when atom at idx0 completes
*/
@@ -1369,17 +1443,63 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
kbase_pm_protected_override_disable(kbdev);
}
+/**
+ * should_stop_next_atom - given a soft/hard stop action, determine if the next
+ * atom on a slot should be stopped
+ * @kbdev: kbase devices
+ * @head_katom: atom currently in the JSn_HEAD
+ * @next_katom: atom currently in the JSn_HEAD_NEXT
+ * @action: JS_COMMAND_<...> action for soft/hard-stop
+ *
+ * This is used in cases where @head_katom is the target of the soft/hard-stop.
+ * It only makes sense to call this when @head_katom and @next_katom are from
+ * the same slot.
+ *
+ * Return: true if @next_katom should also be stopped with the given action,
+ * false otherwise
+ */
+static bool should_stop_next_atom(struct kbase_device *kbdev,
+ const struct kbase_jd_atom *head_katom,
+ const struct kbase_jd_atom *next_katom,
+ u32 action)
+{
+ bool ret = false;
+ u32 hw_action = action & JS_COMMAND_MASK;
+
+ switch (hw_action) {
+ case JS_COMMAND_SOFT_STOP:
+ ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom,
+ 0u);
+ break;
+ case JS_COMMAND_HARD_STOP:
+ /* Unlike soft-stop, a hard-stop targeting a particular atom
+ * should not cause atoms from unrelated contexts to be
+ * removed
+ */
+ ret = (head_katom->kctx == next_katom->kctx);
+ break;
+ default:
+ /* Other stop actions are possible, but the driver should not
+ * be generating them at this point in the call chain
+ */
+ WARN(1, "Unexpected stop action: 0x%.8x", hw_action);
+ break;
+ }
+ return ret;
+}
+
static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
int js,
struct kbase_jd_atom *katom,
u32 action)
{
+ struct kbase_context *kctx = katom->kctx;
u32 hw_action = action & JS_COMMAND_MASK;
kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
katom->core_req, katom);
- katom->kctx->blocked_js[js][katom->sched_priority] = true;
+ kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority);
}
static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
@@ -1387,11 +1507,14 @@ static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
u32 action,
bool disjoint)
{
+ struct kbase_context *kctx = katom->kctx;
+
lockdep_assert_held(&kbdev->hwaccess_lock);
katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
kbase_gpu_mark_atom_for_return(kbdev, katom);
- katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
+ kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr,
+ katom->sched_priority);
if (disjoint)
kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
@@ -1419,7 +1542,9 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
u32 action)
{
struct kbase_jd_atom *katom_idx0;
+ struct kbase_context *kctx_idx0 = NULL;
struct kbase_jd_atom *katom_idx1;
+ struct kbase_context *kctx_idx1 = NULL;
bool katom_idx0_valid, katom_idx1_valid;
@@ -1433,30 +1558,32 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
- if (katom_idx0)
+ if (katom_idx0) {
+ kctx_idx0 = katom_idx0->kctx;
prio_idx0 = katom_idx0->sched_priority;
- if (katom_idx1)
+ }
+ if (katom_idx1) {
+ kctx_idx1 = katom_idx1->kctx;
prio_idx1 = katom_idx1->sched_priority;
+ }
if (katom) {
katom_idx0_valid = (katom_idx0 == katom);
- /* If idx0 is to be removed and idx1 is on the same context,
- * then idx1 must also be removed otherwise the atoms might be
- * returned out of order
- */
if (katom_idx1)
- katom_idx1_valid = (katom_idx1 == katom) ||
- (katom_idx0_valid &&
- (katom_idx0->kctx ==
- katom_idx1->kctx));
+ katom_idx1_valid = (katom_idx1 == katom);
else
katom_idx1_valid = false;
} else {
- katom_idx0_valid =
- (katom_idx0 && (!kctx || katom_idx0->kctx == kctx));
- katom_idx1_valid =
- (katom_idx1 && (!kctx || katom_idx1->kctx == kctx));
+ katom_idx0_valid = (katom_idx0 && (!kctx || kctx_idx0 == kctx));
+ katom_idx1_valid = (katom_idx1 && (!kctx || kctx_idx1 == kctx));
}
+ /* If there's an atom in JSn_HEAD_NEXT that we haven't already decided
+ * to stop, but we're stopping the JSn_HEAD atom, see if they are
+ * related/ordered in some way that would require the same stop action
+ */
+ if (!katom_idx1_valid && katom_idx0_valid && katom_idx1)
+ katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0,
+ katom_idx1, action);
if (katom_idx0_valid)
stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
@@ -1472,14 +1599,15 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
katom_idx1->event_code =
BASE_JD_EVENT_REMOVED_FROM_NEXT;
kbase_jm_return_atom_to_js(kbdev, katom_idx1);
- katom_idx1->kctx->blocked_js[js][prio_idx1] =
- true;
+ kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js,
+ prio_idx1);
}
katom_idx0->event_code =
BASE_JD_EVENT_REMOVED_FROM_NEXT;
kbase_jm_return_atom_to_js(kbdev, katom_idx0);
- katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
+ kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js,
+ prio_idx0);
} else {
/* katom_idx0 is on GPU */
if (katom_idx1_valid && katom_idx1->gpu_rb_state ==