diff options
author | Siddharth Kapoor <ksiddharth@google.com> | 2021-11-26 19:02:16 +0800 |
---|---|---|
committer | Siddharth Kapoor <ksiddharth@google.com> | 2021-12-06 11:32:22 +0800 |
commit | 93dab6a30cedaf7e6307492cbaa5f651514a7764 (patch) | |
tree | 8290861c9f9c7144bc8ee907089a063f6a6041d0 /mali_kbase/backend/gpu/mali_kbase_jm_rb.c | |
parent | e2249eb4503b74f19f9081b3e68fcbc824afdc10 (diff) | |
parent | 0c596dc70431fa2c70021fa1685e3efc969a852d (diff) | |
download | gpu-93dab6a30cedaf7e6307492cbaa5f651514a7764.tar.gz |
Merge r34p0-00dev1 from upstream into android-gs-pixel-5.10-gs101
Change-Id: I051ad3af9ac645fb8585219c48210df424a27807
Signed-off-by: Siddharth Kapoor <ksiddharth@google.com>
Diffstat (limited to 'mali_kbase/backend/gpu/mali_kbase_jm_rb.c')
-rw-r--r-- | mali_kbase/backend/gpu/mali_kbase_jm_rb.c | 246 |
1 files changed, 187 insertions, 59 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index b475d79..3cf4431 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -387,6 +387,9 @@ static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, { lockdep_assert_held(&kbdev->hwaccess_lock); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS, + katom->kctx, katom, katom->jc, + katom->slot_nr, katom->event_code); kbase_gpu_release_atom(kbdev, katom, NULL); katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; } @@ -564,7 +567,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, kbdev->protected_mode_transition = true; /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_ENTER_PROTECTED_HWCNT: /* See if we can get away with disabling hwcnt atomically */ kbdev->protected_mode_hwcnt_desired = false; @@ -607,7 +610,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, kbase_pm_update_cores_state_nolock(kbdev); /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: /* Avoid unnecessary waiting on non-ACE platforms. */ if (kbdev->system_coherency == COHERENCY_ACE) { @@ -638,7 +641,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: /* * When entering into protected mode, we must ensure that the @@ -671,7 +674,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, return -EAGAIN; /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_ENTER_PROTECTED_FINISHED: if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { /* @@ -742,7 +745,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, kbase_pm_update_cores_state_nolock(kbdev); /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) { /* @@ -755,7 +758,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, KBASE_ATOM_EXIT_PROTECTED_RESET; /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_EXIT_PROTECTED_RESET: /* Issue the reset to the GPU */ err = kbase_gpu_protected_mode_reset(kbdev); @@ -797,7 +800,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: /* A GPU reset is issued when exiting protected mode. Once the * reset is done all atoms' state will also be reset. For this @@ -854,7 +857,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: if (kbase_gpu_check_secure_atoms(kbdev, !kbase_jd_katom_is_protected( @@ -874,7 +877,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: /* @@ -909,7 +912,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: if (katom[idx]->will_fail_event_code) { kbase_gpu_mark_atom_for_return(kbdev, @@ -936,6 +939,11 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) if (katom[idx]->event_code == BASE_JD_EVENT_PM_EVENT) { + KBASE_KTRACE_ADD_JM_SLOT_INFO( + kbdev, JM_MARK_FOR_RETURN_TO_JS, + katom[idx]->kctx, katom[idx], + katom[idx]->jc, js, + katom[idx]->event_code); katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; break; @@ -948,7 +956,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) KBASE_ATOM_GPU_RB_READY; /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_GPU_RB_READY: if (idx == 1) { @@ -994,7 +1002,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) KBASE_ATOM_GPU_RB_SUBMITTED; /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ + fallthrough; case KBASE_ATOM_GPU_RB_SUBMITTED: /* Inform power management at start/finish of @@ -1037,9 +1045,55 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, kbase_backend_slot_update(kbdev); } -#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \ - (KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER)) +/** + * kbase_rb_atom_might_depend - determine if one atom in the slot ringbuffer + * might depend on another from the same kctx + * @katom_a: dependee atom + * @katom_b: atom to query + * + * This can be used on atoms that belong to different slot ringbuffers + * + * Return: true if @katom_b might depend on @katom_a, false if it cannot depend. + */ +static inline bool +kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, + const struct kbase_jd_atom *katom_b) +{ + if (katom_a->kctx != katom_b->kctx) + return false; + return (katom_b->pre_dep || + (katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED | + KBASE_KATOM_FLAG_FAIL_BLOCKER))); +} +/** + * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is + * related to a failed JSn_HEAD atom + * @kbdev: kbase device + * @js: job slot to check + * @completion_code: completion code of the failed atom + * + * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but + * unlike other failure codes we _can_ re-run them. + * + * This forms step 1 in a 2-step process of removing any related atoms from a + * slot's JSn_HEAD_NEXT (ringbuffer index 1), should there have + * been a 'failure' on an atom in JSn_HEAD (ringbuffer index 0). + * + * This step only removes the atoms from the HW, and marks them as + * (potentially) ready to run again. + * + * Step 2 is on marking the JSn_HEAD atom as complete + * (kbase_gpu_complete_hw()), to dequeue said atoms and return them to the JS + * as appropriate, or re-submit them. + * + * Hence, this function must evict at a minimum the atoms related to the atom + * in JSn_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable + * if this function evicts more atoms than kbase_gpu_complete_hw() dequeues, as + * the next kbase_backend_slot_update() will resubmit any remaining. + * + * Return: true if an atom was evicted, false otherwise. + */ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, u32 completion_code) { @@ -1051,14 +1105,12 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, katom = kbase_gpu_inspect(kbdev, js, 0); next_katom = kbase_gpu_inspect(kbdev, js, 1); - if (next_katom && katom->kctx == next_katom->kctx && - next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && - (HAS_DEP(next_katom) || next_katom->sched_priority == - katom->sched_priority) && - (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) - != 0 || - kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) - != 0)) { + if (next_katom && + next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && + (kbase_rb_atom_might_depend(katom, next_katom) || + kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) && + (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) != 0 || + kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) { kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_NOP); next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; @@ -1083,6 +1135,29 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, return false; } +/** + * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD + * @kbdev: kbase device + * @js: job slot to check + * @completion_code: completion code of the completed atom + * @job_tail: value read from JSn_TAIL, for STOPPED atoms + * @end_timestamp: pointer to approximate ktime value when the katom completed + * + * Among other operations, this also executes step 2 of a 2-step process of + * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1), + * should there have been a 'failure' on an atom in JSn_HEAD (ringbuffer index + * 0). The first step is done in kbase_gpu_irq_evict(). + * + * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but + * unlike other failure codes we _can_ re-run them. + * + * When the JSn_HEAD atom is considered to be 'failed', then this will dequeue + * and return to the JS some (usually all) of the atoms evicted from the HW + * during the kbase_gpu_irq_evict() for that JSn_HEAD atom. If it dequeues an + * atom, that atom must not have been running or must already be evicted, as + * otherwise we would be in the incorrect state of having an atom both running + * on the HW and returned to the JS. + */ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, u32 completion_code, u64 job_tail, @@ -1133,9 +1208,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that * the atoms on this slot are returned in the correct order. */ - if (next_katom && katom->kctx == next_katom->kctx && - next_katom->sched_priority == - katom->sched_priority) { + if (next_katom && + kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) { WARN_ON(next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED); kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); @@ -1145,12 +1219,14 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbasep_js_device_data *js_devdata = &kbdev->js_data; int i; - if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) + if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) { dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", js, completion_code, kbase_gpu_exception_name( completion_code)); + } + #if KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR != 0 KBASE_KTRACE_DUMP(kbdev); #endif @@ -1168,18 +1244,17 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbase_jd_atom *katom_idx1 = kbase_gpu_inspect(kbdev, i, 1); - if (katom_idx0 && katom_idx0->kctx == katom->kctx && - HAS_DEP(katom_idx0) && - katom_idx0->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom_idx0 && + kbase_rb_atom_might_depend(katom, katom_idx0) && + katom_idx0->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { /* Dequeue katom_idx0 from ringbuffer */ kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); - if (katom_idx1 && - katom_idx1->kctx == katom->kctx - && HAS_DEP(katom_idx1) && - katom_idx0->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom_idx1 && kbase_rb_atom_might_depend( + katom, katom_idx1) && + katom_idx0->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { /* Dequeue katom_idx1 from ringbuffer */ kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); @@ -1192,11 +1267,10 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, katom_idx0->event_code = BASE_JD_EVENT_STOPPED; kbase_jm_return_atom_to_js(kbdev, katom_idx0); - } else if (katom_idx1 && - katom_idx1->kctx == katom->kctx && - HAS_DEP(katom_idx1) && - katom_idx1->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { + } else if (katom_idx1 && kbase_rb_atom_might_depend( + katom, katom_idx1) && + katom_idx1->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { /* Can not dequeue this atom yet - will be * dequeued when atom at idx0 completes */ @@ -1369,17 +1443,63 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) kbase_pm_protected_override_disable(kbdev); } +/** + * should_stop_next_atom - given a soft/hard stop action, determine if the next + * atom on a slot should be stopped + * @kbdev: kbase devices + * @head_katom: atom currently in the JSn_HEAD + * @next_katom: atom currently in the JSn_HEAD_NEXT + * @action: JS_COMMAND_<...> action for soft/hard-stop + * + * This is used in cases where @head_katom is the target of the soft/hard-stop. + * It only makes sense to call this when @head_katom and @next_katom are from + * the same slot. + * + * Return: true if @next_katom should also be stopped with the given action, + * false otherwise + */ +static bool should_stop_next_atom(struct kbase_device *kbdev, + const struct kbase_jd_atom *head_katom, + const struct kbase_jd_atom *next_katom, + u32 action) +{ + bool ret = false; + u32 hw_action = action & JS_COMMAND_MASK; + + switch (hw_action) { + case JS_COMMAND_SOFT_STOP: + ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom, + 0u); + break; + case JS_COMMAND_HARD_STOP: + /* Unlike soft-stop, a hard-stop targeting a particular atom + * should not cause atoms from unrelated contexts to be + * removed + */ + ret = (head_katom->kctx == next_katom->kctx); + break; + default: + /* Other stop actions are possible, but the driver should not + * be generating them at this point in the call chain + */ + WARN(1, "Unexpected stop action: 0x%.8x", hw_action); + break; + } + return ret; +} + static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, int js, struct kbase_jd_atom *katom, u32 action) { + struct kbase_context *kctx = katom->kctx; u32 hw_action = action & JS_COMMAND_MASK; kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, katom->core_req, katom); - katom->kctx->blocked_js[js][katom->sched_priority] = true; + kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority); } static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, @@ -1387,11 +1507,14 @@ static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, u32 action, bool disjoint) { + struct kbase_context *kctx = katom->kctx; + lockdep_assert_held(&kbdev->hwaccess_lock); katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; kbase_gpu_mark_atom_for_return(kbdev, katom); - katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true; + kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, + katom->sched_priority); if (disjoint) kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, @@ -1419,7 +1542,9 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, u32 action) { struct kbase_jd_atom *katom_idx0; + struct kbase_context *kctx_idx0 = NULL; struct kbase_jd_atom *katom_idx1; + struct kbase_context *kctx_idx1 = NULL; bool katom_idx0_valid, katom_idx1_valid; @@ -1433,30 +1558,32 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); - if (katom_idx0) + if (katom_idx0) { + kctx_idx0 = katom_idx0->kctx; prio_idx0 = katom_idx0->sched_priority; - if (katom_idx1) + } + if (katom_idx1) { + kctx_idx1 = katom_idx1->kctx; prio_idx1 = katom_idx1->sched_priority; + } if (katom) { katom_idx0_valid = (katom_idx0 == katom); - /* If idx0 is to be removed and idx1 is on the same context, - * then idx1 must also be removed otherwise the atoms might be - * returned out of order - */ if (katom_idx1) - katom_idx1_valid = (katom_idx1 == katom) || - (katom_idx0_valid && - (katom_idx0->kctx == - katom_idx1->kctx)); + katom_idx1_valid = (katom_idx1 == katom); else katom_idx1_valid = false; } else { - katom_idx0_valid = - (katom_idx0 && (!kctx || katom_idx0->kctx == kctx)); - katom_idx1_valid = - (katom_idx1 && (!kctx || katom_idx1->kctx == kctx)); + katom_idx0_valid = (katom_idx0 && (!kctx || kctx_idx0 == kctx)); + katom_idx1_valid = (katom_idx1 && (!kctx || kctx_idx1 == kctx)); } + /* If there's an atom in JSn_HEAD_NEXT that we haven't already decided + * to stop, but we're stopping the JSn_HEAD atom, see if they are + * related/ordered in some way that would require the same stop action + */ + if (!katom_idx1_valid && katom_idx0_valid && katom_idx1) + katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0, + katom_idx1, action); if (katom_idx0_valid) stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); @@ -1472,14 +1599,15 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, katom_idx1->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; kbase_jm_return_atom_to_js(kbdev, katom_idx1); - katom_idx1->kctx->blocked_js[js][prio_idx1] = - true; + kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js, + prio_idx1); } katom_idx0->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; kbase_jm_return_atom_to_js(kbdev, katom_idx0); - katom_idx0->kctx->blocked_js[js][prio_idx0] = true; + kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js, + prio_idx0); } else { /* katom_idx0 is on GPU */ if (katom_idx1_valid && katom_idx1->gpu_rb_state == |