diff options
author | Sidath Senanayake <sidaths@google.com> | 2016-11-09 14:53:08 +0100 |
---|---|---|
committer | Sidath Senanayake <sidaths@google.com> | 2016-11-09 14:53:08 +0100 |
commit | 9232778c98af5ff85caca9e5cd37b963fb3196d5 (patch) | |
tree | 9edd429dc765d69dc72ffce9d9598ba8e2a4cd0f /mali_kbase/backend | |
parent | 192bd796c27ac6eb9443af4b7e027294316f7c5b (diff) | |
download | gpu-9232778c98af5ff85caca9e5cd37b963fb3196d5.tar.gz |
Mali Bifrost DDK r2p0 KMD
Provenance:
6aec14e96 (collaborate/EAC/b_r2p0)
BX304L01B-BU-00000-r2p0-03rel0
BX304L06A-BU-00000-r2p0-03rel0
BX304X07X-BU-00000-r2p0-03bet0
Signed-off-by: Sidath Senanayake <sidaths@google.com>
Change-Id: Ic7007d25551bd1478b491ec5a0fc9ecb400aa25d
Diffstat (limited to 'mali_kbase/backend')
22 files changed, 917 insertions, 457 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c index c686253..fef9a2c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c @@ -21,6 +21,8 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode) { + kbdev->current_gpu_coherency_mode = mode; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL); } diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index 032ca4b..e0752ab 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -16,6 +16,7 @@ #include <mali_kbase.h> +#include <mali_kbase_tlstream.h> #include <mali_kbase_config_defaults.h> #include <backend/gpu/mali_kbase_pm_internal.h> #ifdef CONFIG_DEVFREQ_THERMAL @@ -105,6 +106,8 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) kbdev->current_voltage = voltage; kbdev->current_freq = freq; + kbase_tlstream_aux_devfreq_target((u64)freq); + kbase_pm_reset_dvfs_utilisation(kbdev); return err; diff --git a/mali_kbase/backend/gpu/mali_kbase_device_hw.c b/mali_kbase/backend/gpu/mali_kbase_device_hw.c index b9238a3..dcdf15c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_device_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_device_hw.c @@ -26,16 +26,145 @@ #include <backend/gpu/mali_kbase_device_internal.h> #if !defined(CONFIG_MALI_NO_MALI) + + +#ifdef CONFIG_DEBUG_FS + + +int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) +{ + struct kbase_io_access *old_buf; + struct kbase_io_access *new_buf; + unsigned long flags; + + if (!new_size) + goto out_err; /* The new size must not be 0 */ + + new_buf = vmalloc(new_size * sizeof(*h->buf)); + if (!new_buf) + goto out_err; + + spin_lock_irqsave(&h->lock, flags); + + old_buf = h->buf; + + /* Note: we won't bother with copying the old data over. The dumping + * logic wouldn't work properly as it relies on 'count' both as a + * counter and as an index to the buffer which would have changed with + * the new array. This is a corner case that we don't need to support. + */ + h->count = 0; + h->size = new_size; + h->buf = new_buf; + + spin_unlock_irqrestore(&h->lock, flags); + + vfree(old_buf); + + return 0; + +out_err: + return -1; +} + + +int kbase_io_history_init(struct kbase_io_history *h, u16 n) +{ + h->enabled = false; + spin_lock_init(&h->lock); + h->count = 0; + h->size = 0; + h->buf = NULL; + if (kbase_io_history_resize(h, n)) + return -1; + + return 0; +} + + +void kbase_io_history_term(struct kbase_io_history *h) +{ + vfree(h->buf); + h->buf = NULL; +} + + +/* kbase_io_history_add - add new entry to the register access history + * + * @h: Pointer to the history data structure + * @addr: Register address + * @value: The value that is either read from or written to the register + * @write: 1 if it's a register write, 0 if it's a read + */ +static void kbase_io_history_add(struct kbase_io_history *h, + void __iomem const *addr, u32 value, u8 write) +{ + struct kbase_io_access *io; + unsigned long flags; + + spin_lock_irqsave(&h->lock, flags); + + io = &h->buf[h->count % h->size]; + io->addr = (uintptr_t)addr | write; + io->value = value; + ++h->count; + /* If count overflows, move the index by the buffer size so the entire + * buffer will still be dumped later */ + if (unlikely(!h->count)) + h->count = h->size; + + spin_unlock_irqrestore(&h->lock, flags); +} + + +void kbase_io_history_dump(struct kbase_device *kbdev) +{ + struct kbase_io_history *const h = &kbdev->io_history; + u16 i; + size_t iters; + unsigned long flags; + + if (!unlikely(h->enabled)) + return; + + spin_lock_irqsave(&h->lock, flags); + + dev_err(kbdev->dev, "Register IO History:"); + iters = (h->size > h->count) ? h->count : h->size; + dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, + h->count); + for (i = 0; i < iters; ++i) { + struct kbase_io_access *io = + &h->buf[(h->count - iters + i) % h->size]; + char const access = (io->addr & 1) ? 'w' : 'r'; + + dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access, + (void *)(io->addr & ~0x1), io->value); + } + + spin_unlock_irqrestore(&h->lock, flags); +} + + +#endif /* CONFIG_DEBUG_FS */ + + void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); KBASE_DEBUG_ASSERT(kbdev->dev != NULL); - dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); writel(value, kbdev->reg + offset); +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, + value, 1); +#endif /* CONFIG_DEBUG_FS */ + dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); + if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_WRITE, offset, value); @@ -53,7 +182,13 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, val = readl(kbdev->reg + offset); +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, + val, 0); +#endif /* CONFIG_DEBUG_FS */ dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val); + if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_READ, offset, val); return val; diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index 3f06a10..7ad309e 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -45,11 +45,11 @@ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) KBASE_INSTR_STATE_REQUEST_CLEAN); /* Enable interrupt */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | CLEAN_CACHES_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* clean&invalidate the caches so we're sure the mmu tables for the dump * buffer is valid */ @@ -96,11 +96,11 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, } /* Enable interrupt */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | PRFCNT_SAMPLE_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* In use, this context is the owner */ kbdev->hwcnt.kctx = kctx; @@ -185,7 +185,9 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); return err; out_unrequest_cores: + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); out_err: return err; } @@ -226,11 +228,10 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) kbdev->hwcnt.backend.triggered = 0; /* Disable interrupt */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); /* Disable the counters */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx); @@ -243,10 +244,11 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) kbase_pm_unrequest_cores(kbdev, true, kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - kbase_pm_release_l2_caches(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); @@ -391,12 +393,12 @@ void kbase_clean_caches_done(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Disable interrupt */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~CLEAN_CACHES_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* Wakeup... */ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c index b891b12..8416b80 100644 --- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c @@ -148,6 +148,8 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) return IRQ_HANDLED; } +KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler); + static irq_handler_t kbase_handler_table[] = { [JOB_IRQ_TAG] = kbase_job_irq_handler, [MMU_IRQ_TAG] = kbase_mmu_irq_handler, diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_as.c b/mali_kbase/backend/gpu/mali_kbase_jm_as.c index f216788..202dcfa 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_as.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_as.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,8 +52,7 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); lockdep_assert_held(&js_devdata->runpool_mutex); - lockdep_assert_held(¤t_as->transaction_mutex); - lockdep_assert_held(&js_devdata->runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; @@ -142,8 +141,7 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, return; } - lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr]; if (js_per_as_data->as_busy_refcount != 0) { @@ -219,8 +217,7 @@ static bool check_is_runpool_full(struct kbase_device *kbdev, is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >= kbdev->nr_hw_address_spaces); - if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags & - KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { + if (kctx && !kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); /* Contexts that submit might use less of the address spaces * available, due to HW workarounds. In which case, the runpool @@ -267,7 +264,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, return i; } - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* No address space currently free, see if we can release one */ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { @@ -281,16 +278,14 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, /* Don't release privileged or active contexts, or contexts with * jobs running */ - if (as_kctx && !(as_kctx->jctx.sched_info.ctx.flags & - KBASE_CTX_FLAG_PRIVILEGED) && + if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && js_per_as_data->as_busy_refcount == 0) { if (!kbasep_js_runpool_retain_ctx_nolock(kbdev, as_kctx)) { WARN(1, "Failed to retain active context\n"); - spin_unlock_irqrestore( - &js_devdata->runpool_irq.lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -303,8 +298,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, * context we're about to release without violating lock * ordering */ - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -315,7 +309,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); - if (!as_js_kctx_info->ctx.is_scheduled) { + if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { kbasep_js_runpool_requeue_or_kill_ctx(kbdev, as_kctx, true); @@ -336,11 +330,11 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, mutex_lock(&js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); } } - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -369,12 +363,11 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev, new_address_space = &kbdev->as[as_nr]; lockdep_assert_held(&js_devdata->runpool_mutex); - lockdep_assert_held(&new_address_space->transaction_mutex); - lockdep_assert_held(&js_devdata->runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); - if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) { + if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { /* We need to retain it to keep the corresponding address space */ kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h b/mali_kbase/backend/gpu/mali_kbase_jm_defs.h index 83d4778..08a7400 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_defs.h @@ -73,8 +73,7 @@ struct slot_rb { * @reset_timer: Timeout for soft-stops before the reset * @timeouts_updated: Have timeout values just been updated? * - * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when - * accessing this structure + * The hwaccess_lock (a spinlock) must be held when accessing this structure */ struct kbase_backend_data { struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 00900a9..668258b 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -221,19 +221,40 @@ static void kbasep_job_slot_update_head_start_timestamp( /** * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint * @kbdev: kbase device - * @i: job slot + * @js: job slot * * Get kbase atom by calling kbase_gpu_inspect for given job slot. * Then use obtained katom and name of slot associated with the given * job slot number in tracepoint call to the instrumentation module * informing that given atom is no longer executed on given lpu (job slot). */ -static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i) +static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int js) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0); + int i; + for (i = 0; + i < kbase_backend_nr_atoms_submitted(kbdev, js); + i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + kbase_tlstream_tl_nret_atom_lpu(katom, + &kbdev->gpu_props.props.raw_props.js_features[js]); + } +} - kbase_tlstream_tl_nret_atom_lpu(katom, - &kbdev->gpu_props.props.raw_props.js_features[i]); +/** + * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline + * tracepoint + * @kbdev: kbase device + * @js: job slot + * + * Make a tracepoint call to the instrumentation module informing that + * softstop happened on given lpu (job slot). + */ +static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, + int js) +{ + kbase_tlstream_tl_event_lpu_softstop( + &kbdev->gpu_props.props.raw_props.js_features[js]); } void kbase_job_done(struct kbase_device *kbdev, u32 done) @@ -262,7 +283,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) irq_throttle_cycles, NULL); } - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); while (done) { u32 failed = done >> 16; @@ -297,7 +318,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) NULL, 0); #endif - kbase_tlstream_aux_job_softstop(i); + kbasep_trace_tl_event_lpu_softstop( + kbdev, i); kbasep_trace_tl_nret_atom_lpu( kbdev, i); @@ -456,7 +478,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) end_timestamp); } - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); #if KBASE_GPU_RESET_EN if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_COMMITTED) { @@ -539,7 +561,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; /* Mark the point where we issue the soft-stop command */ - kbase_tlstream_aux_issue_job_softstop(target_katom); + kbase_tlstream_tl_event_atom_softstop_issue(target_katom); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { int i; @@ -725,7 +747,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) /* Cancel any remaining running jobs for this kctx */ mutex_lock(&kctx->jctx.lock); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Invalidate all jobs in context, to prevent re-submitting */ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { @@ -737,7 +759,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) kbase_job_slot_hardstop(kctx, i, NULL); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kctx->jctx.lock); } @@ -748,12 +770,13 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, int js = target_katom->slot_nr; int priority = target_katom->sched_priority; int i; + bool stop_sent = false; KBASE_DEBUG_ASSERT(kctx != NULL); kbdev = kctx->kbdev; KBASE_DEBUG_ASSERT(kbdev != NULL); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { struct kbase_jd_atom *katom; @@ -765,8 +788,14 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, if (katom->kctx != kctx) continue; - if (katom->sched_priority > priority) + if (katom->sched_priority > priority) { + if (!stop_sent) + kbase_tlstream_tl_attrib_atom_priority_change( + target_katom); + kbase_job_slot_softstop(kbdev, js, katom); + stop_sent = true; + } } } @@ -835,7 +864,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) * policy queue either */ wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0); wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, - kctx->jctx.sched_info.ctx.is_scheduled == false); + !kbase_ctx_flag(kctx, KCTX_SCHEDULED)); spin_lock_irqsave(&reset_data.lock, flags); if (reset_data.stage == 1) { @@ -945,7 +974,7 @@ static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev, bool ret = false; int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); /* When we have an atom the decision can be made straight away. */ if (target_katom) @@ -1034,7 +1063,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, #if KBASE_GPU_RESET_EN /* We make the check for AFBC before evicting/stopping atoms. Note * that no other thread can modify the slots whilst we have the - * runpool_irq lock. */ + * hwaccess_lock. */ int needs_workaround_for_afbc = kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542) && kbasep_check_for_afbc_on_slot(kbdev, kctx, js, @@ -1130,6 +1159,8 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) { int i; + kbase_io_history_dump(kbdev); + dev_err(kbdev->dev, "Register state:"); dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL), @@ -1162,13 +1193,14 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) static void kbasep_reset_timeout_worker(struct work_struct *data) { - unsigned long flags, mmu_flags; + unsigned long flags; struct kbase_device *kbdev; int i; ktime_t end_timestamp = ktime_get(); struct kbasep_js_device_data *js_devdata; bool try_schedule = false; bool silent = false; + u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; KBASE_DEBUG_ASSERT(data); @@ -1206,17 +1238,19 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); - spin_lock_irqsave(&kbdev->mmu_mask_change, mmu_flags); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + spin_lock(&kbdev->hwaccess_lock); + spin_lock(&kbdev->mmu_mask_change); /* We're about to flush out the IRQs and their bottom half's */ kbdev->irq_reset_flush = true; /* Disable IRQ to avoid IRQ handlers to kick in after releasing the * spinlock; this also clears any outstanding interrupts */ - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - kbase_pm_disable_interrupts(kbdev); - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + kbase_pm_disable_interrupts_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->mmu_mask_change, mmu_flags); + spin_unlock(&kbdev->mmu_mask_change); + spin_unlock(&kbdev->hwaccess_lock); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); /* Ensure that any IRQ handlers have finished * Must be done without any locks IRQ handlers will take */ @@ -1228,6 +1262,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) /* The flush has completed so reset the active indicator */ kbdev->irq_reset_flush = false; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { + /* Ensure that L2 is not transitioning when we send the reset + * command */ + while (--max_loops && kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2)) + ; + + WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); + } + mutex_lock(&kbdev->pm.lock); /* We hold the pm lock, so there ought to be a current policy */ KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); @@ -1250,21 +1294,19 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_pm_init_hw(kbdev, 0); /* Complete any jobs that were still on the GPU */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_backend_reset(kbdev, &end_timestamp); kbase_pm_metrics_update(kbdev, NULL); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->pm.lock); mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->mmu_hw_mutex); /* Reprogram the GPU's MMU */ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - struct kbase_as *as = &kbdev->as[i]; - - mutex_lock(&as->transaction_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (js_devdata->runpool_irq.per_as_data[i].kctx) kbase_mmu_update( @@ -1272,9 +1314,9 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) else kbase_mmu_disable_as(kbdev, i); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&as->transaction_mutex); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } + mutex_unlock(&kbdev->mmu_hw_mutex); kbase_pm_enable_interrupts(kbdev); @@ -1382,9 +1424,9 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) struct kbasep_js_device_data *js_devdata; js_devdata = &kbdev->js_data; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbasep_try_reset_gpu_early_locked(kbdev); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } /** @@ -1429,9 +1471,9 @@ bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) struct kbasep_js_device_data *js_devdata; js_devdata = &kbdev->js_data; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ret = kbase_prepare_to_reset_gpu_locked(kbdev); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return ret; } diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index 8f1e561..89b1288 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -40,7 +40,7 @@ * calling this. * * The following locking conditions are made on the caller: - * - it must hold the kbasep_js_device_data::runpoool_irq::lock + * - it must hold the hwaccess_lock */ void kbase_job_submit_nolock(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js); @@ -74,7 +74,7 @@ static inline char *kbasep_make_job_slot_string(int js, char *js_string) * calling this. * * The following locking conditions are made on the caller: - * - it must hold the kbasep_js_device_data::runpoool_irq::lock + * - it must hold the hwaccess_lock */ void kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, @@ -91,7 +91,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, * @target_katom: Atom to stop * * The following locking conditions are made on the caller: - * - it must hold the kbasep_js_device_data::runpool_irq::lock + * - it must hold the hwaccess_lock */ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, int js, diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index da7c4df..d7b4d3f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -26,6 +26,7 @@ #include <mali_kbase_js.h> #include <mali_kbase_tlstream.h> #include <mali_kbase_10969_workaround.h> +#include <backend/gpu/mali_kbase_cache_policy_backend.h> #include <backend/gpu/mali_kbase_device_internal.h> #include <backend/gpu/mali_kbase_jm_internal.h> #include <backend/gpu/mali_kbase_js_affinity.h> @@ -56,7 +57,7 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; rb->write_idx++; @@ -88,7 +89,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, return NULL; } - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; @@ -108,7 +109,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if ((SLOT_RB_ENTRIES(rb) - 1) < idx) return NULL; /* idx out of range */ @@ -146,7 +147,7 @@ static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js) { int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -173,7 +174,7 @@ static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) int js; int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { for (i = 0; i < SLOT_RB_SIZE; i++) { @@ -191,7 +192,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) int nr = 0; int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -209,7 +210,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) int nr = 0; int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (i = 0; i < SLOT_RB_SIZE; i++) { if (kbase_gpu_inspect(kbdev, js, i)) @@ -225,7 +226,7 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, int nr = 0; int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -237,6 +238,56 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, return nr; } +/** + * check_secure_atom - Check if the given atom is in the given secure state and + * has a ringbuffer state of at least + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @katom: Atom pointer + * @secure: Desired secure state + * + * Return: true if atom is in the given state, false otherwise + */ +static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) +{ + if (katom->gpu_rb_state >= + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && + ((kbase_jd_katom_is_protected(katom) && secure) || + (!kbase_jd_katom_is_protected(katom) && !secure))) + return true; + + return false; +} + +/** + * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given + * secure state in the ringbuffers of at least + * state + * KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE + * @kbdev: Device pointer + * @secure: Desired secure state + * + * Return: true if any atoms are in the given state, false otherwise + */ +static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, + bool secure) +{ + int js, i; + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, i); + + if (katom) { + if (check_secure_atom(katom, secure)) + return true; + } + } + } + + return false; +} + int kbase_backend_slot_free(struct kbase_device *kbdev, int js) { if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != @@ -439,7 +490,7 @@ static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev, recently_chosen_affinity); /* Note: this is where the caller must've taken the - * runpool_irq.lock */ + * hwaccess_lock */ /* Check for affinity violations - if there are any, * then we just ask the caller to requeue and try again @@ -586,15 +637,12 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, kbase_pm_metrics_update(kbdev, end_timestamp); if (katom->core_req & BASE_JD_REQ_PERMON) - kbase_pm_release_gpu_cycle_counter(kbdev); + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_READY: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr, katom->affinity); @@ -603,7 +651,21 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: break; - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + if (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_CHECK || + katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_CHECK) + kbdev->protected_mode_transition = false; + + if (kbase_jd_katom_is_protected(katom) && + (katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) + kbase_vinstr_resume(kbdev->vinstr_ctx); + + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: @@ -666,11 +728,19 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) { int err = -EINVAL; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); WARN_ONCE(!kbdev->protected_ops, "Cannot enter protected mode: protected callbacks not specified.\n"); + /* + * When entering into protected mode, we must ensure that the + * GPU is not operating in coherent mode as well. This is to + * ensure that no protected memory can be leaked. + */ + if (kbdev->system_coherency == COHERENCY_ACE) + kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); + if (kbdev->protected_ops) { /* Switch GPU to protected mode */ err = kbdev->protected_ops->protected_mode_enter(kbdev); @@ -687,7 +757,7 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); WARN_ONCE(!kbdev->protected_ops, "Cannot exit protected mode: protected callbacks not specified.\n"); @@ -695,53 +765,146 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) if (!kbdev->protected_ops) return -EINVAL; - kbdev->protected_mode_transition = true; kbase_reset_gpu_silent(kbdev); return 0; } -static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, +static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, struct kbase_jd_atom **katom, int idx, int js) { int err = 0; - switch (katom[idx]->exit_protected_state) { - case KBASE_ATOM_EXIT_PROTECTED_CHECK: + switch (katom[idx]->protected_state.enter) { + case KBASE_ATOM_ENTER_PROTECTED_CHECK: + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that + * there are no atoms currently on the GPU. */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + + kbdev->protected_mode_transition = true; + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_VINSTR; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_ENTER_PROTECTED_VINSTR: + if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { + /* + * We can't switch now because + * the vinstr core state switch + * is not done yet. + */ + return -EAGAIN; + } + + /* Once reaching this point GPU must be + * switched to protected mode or vinstr + * re-enabled. */ + /* - * If the atom ahead of this one hasn't got to being - * submitted yet then bail. + * Not in correct mode, begin protected mode switch. + * Entering protected mode requires us to power down the L2, + * and drop out of fully coherent mode. */ - if (idx == 1 && - (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && - katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) - return -EAGAIN; + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + + kbase_pm_update_cores_state_nolock(kbdev); + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: + /* Avoid unnecessary waiting on non-ACE platforms. */ + if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) { + if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { + /* + * The L2 is still powered, wait for all the users to + * finish with it before doing the actual reset. + */ + return -EAGAIN; + } + } - /* If we're not exiting protected mode then we're done here. */ - if (!(kbase_gpu_in_protected_mode(kbdev) && - !kbase_jd_katom_is_protected(katom[idx]))) - return 0; + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_FINISHED; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_ENTER_PROTECTED_FINISHED: + + /* No jobs running, so we can switch GPU mode right now. */ + err = kbase_gpu_protected_mode_enter(kbdev); /* - * If there is a transition in progress, or work still - * on the GPU try again later. + * Regardless of result, we are no longer transitioning + * the GPU. */ - if (kbdev->protected_mode_transition || - kbase_gpu_atoms_submitted_any(kbdev)) - return -EAGAIN; + kbdev->protected_mode_transition = false; + + if (err) { + /* + * Failed to switch into protected mode, resume + * vinstr core and fail atom. + */ + kbase_vinstr_resume(kbdev->vinstr_ctx); + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order. */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + return -EINVAL; + } + + /* Protected mode sanity checks. */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom[idx]) == + kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), + kbase_gpu_in_protected_mode(kbdev)); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_READY; + } + + return 0; +} + +static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + + switch (katom[idx]->protected_state.exit) { + case KBASE_ATOM_EXIT_PROTECTED_CHECK: + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that + * there are no atoms currently on the GPU. */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); /* * Exiting protected mode requires a reset, but first the L2 * needs to be powered down to ensure it's not active when the * reset is issued. */ - katom[idx]->exit_protected_state = + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + kbdev->protected_mode_transition = true; + kbase_pm_update_cores_state_nolock(kbdev); + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: - if (kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_L2) || + if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { /* * The L2 is still powered, wait for all the users to @@ -749,7 +912,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, */ return -EAGAIN; } - katom[idx]->exit_protected_state = + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_RESET; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ @@ -757,7 +920,10 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, case KBASE_ATOM_EXIT_PROTECTED_RESET: /* Issue the reset to the GPU */ err = kbase_gpu_protected_mode_reset(kbdev); + if (err) { + kbdev->protected_mode_transition = false; + /* Failed to exit protected mode, fail atom */ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); @@ -775,7 +941,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, return -EINVAL; } - katom[idx]->exit_protected_state = + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ @@ -784,6 +950,9 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, if (kbase_reset_gpu_active(kbdev)) return -EAGAIN; + kbdev->protected_mode_transition = false; + kbdev->protected_mode = false; + /* protected mode sanity checks */ KBASE_DEBUG_ASSERT_MSG( kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), @@ -798,11 +967,11 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, return 0; } -void kbase_gpu_slot_update(struct kbase_device *kbdev) +void kbase_backend_slot_update(struct kbase_device *kbdev) { int js; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { struct kbase_jd_atom *katom[2]; @@ -831,11 +1000,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) break; katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT; + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: + if (kbase_gpu_check_secure_atoms(kbdev, + !kbase_jd_katom_is_protected( + katom[idx]))) + break; + + if (kbdev->protected_mode_transition) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + /* * Exiting protected mode must be done before * the references on the cores are taken as @@ -843,10 +1027,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) * can't happen after the references for this * atom are taken. */ - ret = kbase_jm_exit_protected_mode(kbdev, - katom, idx, js); - if (ret) - break; + + if (!kbase_gpu_in_protected_mode(kbdev) && + kbase_jd_katom_is_protected(katom[idx])) { + /* Atom needs to transition into protected mode. */ + ret = kbase_jm_enter_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + } else if (kbase_gpu_in_protected_mode(kbdev) && + !kbase_jd_katom_is_protected(katom[idx])) { + /* Atom needs to transition out of protected mode. */ + ret = kbase_jm_exit_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + } + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_CHECK; + + /* Atom needs no protected mode transition. */ katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; @@ -872,7 +1072,6 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) break; } - cores_ready = kbasep_js_job_check_ref_cores(kbdev, js, katom[idx]); @@ -899,81 +1098,6 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) break; katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: - - /* Only submit if head atom or previous atom - * already submitted */ - if (idx == 1 && - (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && - katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) - break; - - /* - * If the GPU is transitioning protected mode - * then bail now and we'll be called when the - * new state has settled. - */ - if (kbdev->protected_mode_transition) - break; - - if (!kbase_gpu_in_protected_mode(kbdev) && kbase_jd_katom_is_protected(katom[idx])) { - int err = 0; - - /* Not in correct mode, take action */ - if (kbase_gpu_atoms_submitted_any(kbdev)) { - /* - * We are not in the correct - * GPU mode for this job, and - * we can't switch now because - * there are jobs already - * running. - */ - break; - } - if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { - /* - * We can't switch now because - * the vinstr core state switch - * is not done yet. - */ - break; - } - /* Once reaching this point GPU must be - * switched to protected mode or vinstr - * re-enabled. */ - - /* No jobs running, so we can switch GPU mode right now */ - err = kbase_gpu_protected_mode_enter(kbdev); - if (err) { - /* - * Failed to switch into protected mode, resume - * vinstr core and fail atom. - */ - kbase_vinstr_resume(kbdev->vinstr_ctx); - katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; - kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); - /* Only return if head atom or previous atom - * already removed - as atoms must be returned - * in order */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, katom[idx]); - } - break; - } - } - - /* Protected mode sanity checks */ - KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); - katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ @@ -1037,10 +1161,9 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - + lockdep_assert_held(&kbdev->hwaccess_lock); kbase_gpu_enqueue_atom(kbdev, katom); - kbase_gpu_slot_update(kbdev); + kbase_backend_slot_update(kbdev); } bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) @@ -1048,7 +1171,7 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) struct kbase_jd_atom *katom; struct kbase_jd_atom *next_katom; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); katom = kbase_gpu_inspect(kbdev, js, 0); next_katom = kbase_gpu_inspect(kbdev, js, 1); @@ -1076,7 +1199,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); struct kbase_context *kctx = katom->kctx; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) && completion_code != BASE_JD_EVENT_DONE && @@ -1241,31 +1364,42 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, #endif if (completion_code == BASE_JD_EVENT_STOPPED) - kbase_jm_return_atom_to_js(kbdev, katom); + katom = kbase_jm_return_atom_to_js(kbdev, katom); else - kbase_jm_complete(kbdev, katom, end_timestamp); + katom = kbase_jm_complete(kbdev, katom, end_timestamp); + + if (katom) { + /* Cross-slot dependency has now become runnable. Try to submit + * it. */ + + /* Check if there are lower priority jobs to soft stop */ + kbase_job_slot_ctx_priority_check_locked(kctx, katom); + + kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); + } /* Job completion may have unblocked other atoms. Try to update all job * slots */ - kbase_gpu_slot_update(kbdev); + kbase_backend_slot_update(kbdev); } void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) { int js; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int atom_idx = 0; int idx; - for (idx = 0; idx < 2; idx++) { + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, 0); + js, atom_idx); bool keep_in_jm_rb = false; if (!katom) - continue; + break; if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) keep_in_jm_rb = true; @@ -1279,7 +1413,12 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) */ if (keep_in_jm_rb) { katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; - katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK; + katom->affinity = 0; + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; + /* As the atom was not removed, increment the + * index so that we read the correct atom in the + * next iteration. */ + atom_idx++; continue; } @@ -1292,6 +1431,9 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) kbase_jm_complete(kbdev, katom, end_timestamp); } } + + kbdev->protected_mode_transition = false; + kbdev->protected_mode = false; } static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, @@ -1361,7 +1503,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); @@ -1548,6 +1690,7 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev, /* Limit the number of loops to avoid a hang if the interrupt is missed */ u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + unsigned long flags; mutex_lock(&kbdev->cacheclean_lock); @@ -1574,8 +1717,10 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev, mutex_unlock(&kbdev->cacheclean_lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_unrequest_cores(kbdev, false, katom->need_cache_flush_cores_retained); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_backend_complete_wq(struct kbase_device *kbdev, @@ -1620,8 +1765,12 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, base_jd_core_req core_req, u64 affinity, enum kbase_atom_coreref_state coreref_state) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity, coreref_state); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (!kbdev->pm.active_count) { mutex_lock(&kbdev->js_data.runpool_mutex); @@ -1640,7 +1789,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) js_devdata = &kbdev->js_data; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n"); @@ -1662,7 +1811,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) } } - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.h b/mali_kbase/backend/gpu/mali_kbase_jm_rb.h index 102d94b..1e0e05a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -67,17 +67,6 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, int idx); /** - * kbase_gpu_slot_update - Update state based on slot ringbuffers - * - * @kbdev: Device pointer - * - * Inspect the jobs in the slot ringbuffers and update state. - * - * This will cause jobs to be submitted to hardware if they are unblocked - */ -void kbase_gpu_slot_update(struct kbase_device *kbdev); - -/** * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers * * @kbdev: Device pointer diff --git a/mali_kbase/backend/gpu/mali_kbase_js_affinity.c b/mali_kbase/backend/gpu/mali_kbase_js_affinity.c index d665420..54d8ddd 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_affinity.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_affinity.c @@ -94,9 +94,8 @@ bool kbase_js_choose_affinity(u64 * const affinity, base_jd_core_req core_req = katom->core_req; unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; u64 core_availability_mask; - unsigned long flags; - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); core_availability_mask = kbase_pm_ca_get_core_mask(kbdev); @@ -105,7 +104,6 @@ bool kbase_js_choose_affinity(u64 * const affinity, * transitioning) then fail. */ if (0 == core_availability_mask) { - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); *affinity = 0; return false; } @@ -114,7 +112,6 @@ bool kbase_js_choose_affinity(u64 * const affinity, if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == BASE_JD_REQ_T) { - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); /* If the hardware supports XAFFINITY then we'll only enable * the tiler (which is the default so this is a no-op), * otherwise enable shader core 0. */ @@ -169,8 +166,6 @@ bool kbase_js_choose_affinity(u64 * const affinity, } } - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - /* * If no cores are currently available in the desired core group(s) * (core availability policy is transitioning) then fail. diff --git a/mali_kbase/backend/gpu/mali_kbase_js_affinity.h b/mali_kbase/backend/gpu/mali_kbase_js_affinity.h index fbffa3b..35d9781 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_affinity.h +++ b/mali_kbase/backend/gpu/mali_kbase_js_affinity.h @@ -38,10 +38,9 @@ * violated. * * The following locking conditions are made on the caller - * - it must hold kbasep_js_device_data.runpool_irq.lock + * - it must hold hwaccess_lock */ -bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, - int js); +bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js); /** * kbase_js_choose_affinity - Compute affinity for a given job. @@ -71,7 +70,7 @@ bool kbase_js_choose_affinity(u64 * const affinity, * @affinity: The affinity mask to test * * The following locks must be held by the caller - * - kbasep_js_device_data.runpool_irq.lock + * - hwaccess_lock * * Return: true if the affinity would violate the restrictions */ @@ -87,7 +86,7 @@ bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, * @affinity: The cores to retain * * The following locks must be held by the caller - * - kbasep_js_device_data.runpool_irq.lock + * - hwaccess_lock */ void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, u64 affinity); @@ -106,7 +105,7 @@ void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, * %BASE_JM_SUBMIT_SLOTS. * * The following locks must be held by the caller - * - kbasep_js_device_data.runpool_irq.lock + * - hwaccess_lock */ void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, u64 affinity); diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c index a23deb4..b09d491 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c @@ -104,7 +104,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) js_devdata = &kbdev->js_data; /* Loop through the slots */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { struct kbase_jd_atom *atom = NULL; @@ -168,8 +168,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * However, if it's about to be * increased then the new context can't * run any jobs until they take the - * runpool_irq lock, so it's OK to - * observe the older value. + * hwaccess_lock, so it's OK to observe + * the older value. * * Similarly, if it's about to be * decreased, the last job from another @@ -270,7 +270,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) backend->timeouts_updated = false; - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return HRTIMER_NORESTART; } @@ -285,9 +285,9 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) if (!timer_callback_should_run(kbdev)) { /* Take spinlock to force synchronisation with timer */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = false; - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* From now on, return value of timer_callback_should_run() will * also cause the timer to not requeue itself. Its return value * cannot change, because it depends on variables updated with @@ -298,9 +298,9 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) if (timer_callback_should_run(kbdev) && !backend->timer_running) { /* Take spinlock to force synchronisation with timer */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = true; - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); hrtimer_start(&backend->scheduling_timer, HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), HRTIMER_MODE_REL); diff --git a/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c b/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c index 4a3572d..08eea1c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -97,6 +97,30 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd, return status; } +static void validate_protected_page_fault(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + /* GPUs which support (native) protected mode shall not report page + * fault addresses unless it has protected debug mode and protected + * debug mode is turned on */ + u32 protected_debug_mode = 0; + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) + return; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + protected_debug_mode = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_STATUS), + kctx) & GPU_DBGEN; + } + + if (!protected_debug_mode) { + /* fault_addr should never be reported in protected mode. + * However, we just continue by printing an error message */ + dev_err(kbdev->dev, "Fault address reported in protected mode\n"); + } +} + void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) { const int num_as = 16; @@ -141,6 +165,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) */ kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); + /* find faulting address */ as->fault_addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, @@ -152,6 +177,15 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) AS_FAULTADDRESS_LO), kctx); + /* Mark the fault protected or not */ + as->protected_mode = kbdev->protected_mode; + + if (kbdev->protected_mode && as->fault_addr) + { + /* check if address reporting is allowed */ + validate_protected_page_fault(kbdev, kctx); + } + /* report the fault to debugfs */ kbase_as_fault_debugfs_new(kbdev, as_no); @@ -195,10 +229,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) } /* Process the interrupt for this address space */ - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_mmu_interrupt_process(kbdev, kctx, as); - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } /* reenable interrupts */ @@ -268,6 +301,8 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, { int ret; + lockdep_assert_held(&kbdev->mmu_hw_mutex); + if (op == AS_COMMAND_UNLOCK) { /* Unlock doesn't require a lock first */ ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index 711e44c..7690ec5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -28,10 +28,12 @@ #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ #include <mali_kbase_pm.h> -#include <backend/gpu/mali_kbase_jm_internal.h> +#include <mali_kbase_hwaccess_jm.h> #include <backend/gpu/mali_kbase_js_internal.h> #include <backend/gpu/mali_kbase_pm_internal.h> +static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); + void kbase_pm_register_access_enable(struct kbase_device *kbdev) { struct kbase_pm_callback_conf *callbacks; @@ -65,6 +67,14 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) mutex_init(&kbdev->pm.lock); + kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!kbdev->pm.backend.gpu_poweroff_wait_wq) + return -ENOMEM; + + INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, + kbase_pm_gpu_poweroff_wait_wq); + kbdev->pm.backend.gpu_powered = false; kbdev->pm.suspending = false; #ifdef CONFIG_MALI_DEBUG @@ -119,10 +129,11 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) init_waitqueue_head(&kbdev->pm.zero_active_count_wait); kbdev->pm.active_count = 0; - spin_lock_init(&kbdev->pm.power_change_lock); spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); spin_lock_init(&kbdev->pm.backend.gpu_powered_lock); + init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); + if (kbase_pm_ca_init(kbdev) != 0) goto workq_fail; @@ -157,47 +168,121 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) * will wait for that state to be reached anyway */ } -bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) +static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) { + struct kbase_device *kbdev = container_of(data, struct kbase_device, + pm.backend.gpu_poweroff_wait_work); + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; unsigned long flags; - bool cores_are_available; - - lockdep_assert_held(&kbdev->pm.lock); - - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); - - /* Force all cores off */ - kbdev->pm.backend.desired_shader_state = 0; - kbdev->pm.backend.desired_tiler_state = 0; - - /* Force all cores to be unavailable, in the situation where - * transitions are in progress for some cores but not others, - * and kbase_pm_check_transitions_nolock can not immediately - * power off the cores */ - kbdev->shader_available_bitmap = 0; - kbdev->tiler_available_bitmap = 0; - kbdev->l2_available_bitmap = 0; + /* Wait for power transitions to complete. We do this with no locks held + * so that we don't deadlock with any pending workqueues */ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START); - cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + kbase_pm_check_transitions_sync(kbdev); KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END); - /* Don't need 'cores_are_available', because we don't return anything */ - CSTD_UNUSED(cores_are_available); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + if (!backend->poweron_required) { + WARN_ON(kbdev->l2_available_bitmap || + kbdev->shader_available_bitmap || + kbdev->tiler_available_bitmap); + + /* Consume any change-state events */ + kbase_timeline_pm_check_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + + /* Disable interrupts and turn the clock off */ + if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) { + /* + * Page/bus faults are pending, must drop locks to + * process. Interrupts are disabled so no more faults + * should be generated at this point. + */ + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + kbase_flush_mmu_wqs(kbdev); + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + /* Turn off clock now that fault have been handled. We + * dropped locks so poweron_required may have changed - + * power back on if this is the case.*/ + if (backend->poweron_required) + kbase_pm_clock_on(kbdev, false); + else + WARN_ON(!kbase_pm_clock_off(kbdev, + backend->poweroff_is_suspend)); + } + } - /* NOTE: We won't wait to reach the core's desired state, even if we're - * powering off the GPU itself too. It's safe to cut the power whilst - * they're transitioning to off, because the cores should be idle and - * all cache flushes should already have occurred */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->poweroff_wait_in_progress = false; + if (backend->poweron_required) { + backend->poweron_required = false; + kbase_pm_update_cores_state_nolock(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - /* Consume any change-state events */ - kbase_timeline_pm_check_handle_event(kbdev, - KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); - /* Disable interrupts and turn the clock off */ - return kbase_pm_clock_off(kbdev, is_suspend); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + + wake_up(&kbdev->pm.backend.poweroff_wait); +} + +void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) +{ + unsigned long flags; + + lockdep_assert_held(&kbdev->pm.lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (!kbdev->pm.backend.poweroff_wait_in_progress) { + /* Force all cores off */ + kbdev->pm.backend.desired_shader_state = 0; + kbdev->pm.backend.desired_tiler_state = 0; + + /* Force all cores to be unavailable, in the situation where + * transitions are in progress for some cores but not others, + * and kbase_pm_check_transitions_nolock can not immediately + * power off the cores */ + kbdev->shader_available_bitmap = 0; + kbdev->tiler_available_bitmap = 0; + kbdev->l2_available_bitmap = 0; + + kbdev->pm.backend.poweroff_wait_in_progress = true; + kbdev->pm.backend.poweroff_is_suspend = is_suspend; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + /*Kick off wq here. Callers will have to wait*/ + queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, + &kbdev->pm.backend.gpu_poweroff_wait_work); + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +} + +static bool is_poweroff_in_progress(struct kbase_device *kbdev) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev) +{ + wait_event_killable(kbdev->pm.backend.poweroff_wait, + is_poweroff_in_progress(kbdev)); } int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, @@ -269,15 +354,7 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) mutex_lock(&kbdev->pm.lock); kbase_pm_cancel_deferred_poweroff(kbdev); - if (!kbase_pm_do_poweroff(kbdev, false)) { - /* Page/bus faults are pending, must drop pm.lock to process. - * Interrupts are disabled so no more faults should be - * generated at this point */ - mutex_unlock(&kbdev->pm.lock); - kbase_flush_mmu_wqs(kbdev); - mutex_lock(&kbdev->pm.lock); - WARN_ON(!kbase_pm_do_poweroff(kbdev, false)); - } + kbase_pm_do_poweroff(kbdev, false); mutex_unlock(&kbdev->pm.lock); } @@ -295,6 +372,8 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev) /* Shut down the metrics subsystem */ kbasep_pm_metrics_term(kbdev); + + destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); } void kbase_pm_power_changed(struct kbase_device *kbdev) @@ -304,9 +383,8 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); cores_are_available = kbase_pm_check_transitions_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END); @@ -316,10 +394,9 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); - kbase_gpu_slot_update(kbdev); - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + kbase_backend_slot_update(kbdev); } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, @@ -354,21 +431,16 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) * off prematurely) */ mutex_lock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); + kbase_pm_cancel_deferred_poweroff(kbdev); - if (!kbase_pm_do_poweroff(kbdev, true)) { - /* Page/bus faults are pending, must drop pm.lock to process. - * Interrupts are disabled so no more faults should be - * generated at this point */ - mutex_unlock(&kbdev->pm.lock); - kbase_flush_mmu_wqs(kbdev); - mutex_lock(&kbdev->pm.lock); - WARN_ON(!kbase_pm_do_poweroff(kbdev, false)); - } + kbase_pm_do_poweroff(kbdev, true); kbase_backend_timer_suspend(kbdev); mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); + + kbase_pm_wait_for_poweroff_complete(kbdev); } void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c index 4b903cc..e8cd8cb 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,10 +95,10 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, mutex_lock(&kbdev->pm.lock); /* Remove the policy to prevent IRQ handlers from working on it */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); old_policy = kbdev->pm.backend.ca_current_policy; kbdev->pm.backend.ca_current_policy = NULL; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (old_policy->term) old_policy->term(kbdev); @@ -106,7 +106,7 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, if (new_policy->init) new_policy->init(kbdev); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.ca_current_policy = new_policy; /* If any core power state changes were previously attempted, but @@ -118,7 +118,7 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, kbdev->shader_ready_bitmap, kbdev->shader_transitioning_bitmap); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->pm.lock); @@ -131,7 +131,7 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy); u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); /* All cores must be enabled when instrumentation is in use */ if (kbdev->pm.backend.instr_enabled) @@ -151,7 +151,7 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning) { - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (kbdev->pm.backend.ca_current_policy != NULL) kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, @@ -163,20 +163,17 @@ void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) { unsigned long flags; - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.instr_enabled = true; kbase_pm_update_cores_state_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->pm.backend.instr_enabled = false; kbase_pm_update_cores_state_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h index e8f96fe..99fb62d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h @@ -207,6 +207,17 @@ union kbase_pm_ca_policy_data { * power_change_lock should be held when accessing, * unless there is no way the timer can be running (eg * hrtimer_cancel() was called immediately before) + * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. + * hwaccess_lock must be held when accessing + * @poweron_required: true if a GPU power on is required. Should only be set + * when poweroff_wait_in_progress is true, and therefore the + * GPU can not immediately be powered on. pm.lock must be + * held when accessing + * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend + * request. pm.lock must be held when accessing + * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off + * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq + * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete * @callback_power_on: Callback when the GPU needs to be turned on. See * &struct kbase_pm_callback_conf * @callback_power_off: Callback when the GPU may be turned off. See @@ -281,6 +292,15 @@ struct kbase_pm_backend_data { bool poweroff_timer_needed; bool poweroff_timer_running; + bool poweroff_wait_in_progress; + bool poweron_required; + bool poweroff_is_suspend; + + struct workqueue_struct *gpu_poweroff_wait_wq; + struct work_struct gpu_poweroff_wait_work; + + wait_queue_head_t poweroff_wait; + int (*callback_power_on)(struct kbase_device *kbdev); void (*callback_power_off)(struct kbase_device *kbdev); void (*callback_power_suspend)(struct kbase_device *kbdev); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index 046ebcb..a162ff8 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -151,7 +151,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, u32 lo = cores & 0xFFFFFFFF; u32 hi = (cores >> 32) & 0xFFFFFFFF; - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); reg = core_type_to_reg(core_type, action); @@ -407,7 +407,7 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, u64 powering_on_trans; u64 desired_state_in_use; - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); /* Get current state */ present = kbase_pm_get_present_cores(kbdev, type); @@ -451,7 +451,7 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, kbdev->pm.backend.l2_powered = 0; } - if (desired_state_in_use == ready && (trans == 0)) + if (desired_state == ready && (trans == 0)) return true; /* Restrict the cores to those that are actually present */ @@ -562,7 +562,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) u64 prev_l2_available_bitmap; KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); spin_lock(&kbdev->pm.backend.gpu_powered_lock); if (kbdev->pm.backend.gpu_powered == false) { @@ -734,6 +734,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); /* Wake slow-path waiters. Job scheduler does not use this. */ KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); + wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); } @@ -790,11 +791,13 @@ void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) /* Force the transition to be checked and reported - the cores may be * 'available' (for job submission) but not fully powered up. */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + /* Don't need 'cores_are_available', because we don't return anything */ CSTD_UNUSED(cores_are_available); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); timeout = jiffies + PM_TIMEOUT; @@ -865,12 +868,12 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) * Clear all interrupts, * and unmask them all. */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, NULL); @@ -882,21 +885,18 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); -void kbase_pm_disable_interrupts(struct kbase_device *kbdev) +void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) { - unsigned long flags; - KBASE_DEBUG_ASSERT(NULL != kbdev); /* * Mask all interrupts, * and clear them all. */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, NULL); @@ -905,8 +905,18 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev) kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); } +void kbase_pm_disable_interrupts(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_disable_interrupts_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); + /* * pmu layout: * 0x0000: PMU TAG (RO) (0xCAFECAFE) @@ -959,12 +969,10 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); } + mutex_lock(&kbdev->mmu_hw_mutex); /* Reprogram the GPU's MMU */ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - struct kbase_as *as = &kbdev->as[i]; - - mutex_lock(&as->transaction_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (js_devdata->runpool_irq.per_as_data[i].kctx) kbase_mmu_update( @@ -972,9 +980,9 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) else kbase_mmu_disable_as(kbdev, i); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&as->transaction_mutex); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } + mutex_unlock(&kbdev->mmu_hw_mutex); /* Lastly, enable the interrupts */ kbase_pm_enable_interrupts(kbdev); @@ -1211,7 +1219,7 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) { - if ((kbdev->system_coherency == COHERENCY_ACE) && + if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && !kbdev->cci_snoop_enabled) { #ifdef CONFIG_ARM64 if (kbdev->snoop_enable_smc != 0) @@ -1224,8 +1232,7 @@ void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) { - if ((kbdev->system_coherency == COHERENCY_ACE) && - kbdev->cci_snoop_enabled) { + if (kbdev->cci_snoop_enabled) { #ifdef CONFIG_ARM64 if (kbdev->snoop_disable_smc != 0) { mali_cci_flush_l2(kbdev); @@ -1355,7 +1362,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbdev->pm.backend.reset_done = false; /* The cores should be made unavailable due to the reset */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); if (kbdev->shader_available_bitmap != 0u) KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, NULL, 0u, (u32)0u); @@ -1365,7 +1372,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbdev->shader_available_bitmap = 0u; kbdev->tiler_available_bitmap = 0u; kbdev->l2_available_bitmap = 0u; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); /* Soft reset the GPU */ if (kbdev->protected_mode_support && @@ -1374,12 +1381,11 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) else err = kbase_pm_reset_do_normal(kbdev); - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, irq_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); if (kbdev->protected_mode) resume_vinstr = true; - kbdev->protected_mode_transition = false; kbdev->protected_mode = false; - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, irq_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); if (err) goto exit; @@ -1388,7 +1394,6 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_hw_issues_detect(kbdev); kbase_pm_hw_issues_apply(kbdev); - kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); /* Sanity check protected mode was left after reset */ @@ -1418,7 +1423,10 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) &kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); kbase_pm_release_l2_caches(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + kbase_pm_disable_interrupts(kbdev); } @@ -1500,12 +1508,14 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); -void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) +void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) { unsigned long flags; KBASE_DEBUG_ASSERT(kbdev != NULL); + lockdep_assert_held(&kbdev->hwaccess_lock); + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); @@ -1524,4 +1534,15 @@ void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) kbase_pm_release_l2_caches(kbdev); } +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index aa51b8c..ad2667a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -168,6 +168,16 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev); void kbase_pm_disable_interrupts(struct kbase_device *kbdev); /** + * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() + * that does not take the hwaccess_lock + * + * Caller must hold the hwaccess_lock. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); + +/** * kbase_pm_init_hw - Initialize the hardware. * @kbdev: The kbase device structure for the device (must be a valid pointer) * @flags: Flags specifying the type of PM init @@ -373,15 +383,36 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no * longer in use * - * If the caller is the - * last caller then the GPU cycle counters will be disabled. A request must have - * been made before a call to this. + * If the caller is the last caller then the GPU cycle counters will be + * disabled. A request must have been made before a call to this. + * + * Caller must not hold the hwaccess_lock, as it will be taken in this function. + * If the caller is already holding this lock then + * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. * * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); /** + * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() + * that does not take hwaccess_lock + * + * Caller must hold the hwaccess_lock. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to + * complete + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev); + +/** * kbase_pm_register_access_enable - Enable access to GPU registers * * Enables access to the GPU registers before power management has powered up @@ -454,12 +485,8 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); * pointer) * @is_suspend: true if power off due to suspend, * false otherwise - * Return: - * true if power was turned off, else - * false if power can not be turned off due to pending page/bus - * fault workers. Caller must flush MMU workqueues and retry */ -bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend); +void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend); #ifdef CONFIG_PM_DEVFREQ void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, @@ -496,7 +523,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev); * @kbdev: The kbase device structure for the device (must be a valid pointer) * @now: Pointer to the timestamp of the change, or NULL to use current time * - * Caller must hold runpool_irq.lock + * Caller must hold hwaccess_lock */ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *now); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c index ae63256..7613e1d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -362,14 +362,15 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) int device_nr = (katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) ? katom->device_nr : 0; - WARN_ON(device_nr >= 2); - kbdev->pm.backend.metrics.active_cl_ctx[ - device_nr] = 1; + if (!WARN_ON(device_nr >= 2)) + kbdev->pm.backend.metrics. + active_cl_ctx[device_nr] = 1; } else { /* Slot 2 should not be running non-compute * atoms */ - WARN_ON(js >= 2); - kbdev->pm.backend.metrics.active_gl_ctx[js] = 1; + if (!WARN_ON(js >= 2)) + kbdev->pm.backend.metrics. + active_gl_ctx[js] = 1; } kbdev->pm.backend.metrics.gpu_active = true; } @@ -382,7 +383,7 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) unsigned long flags; ktime_t now; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index 4d00602..92457e8 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -156,7 +156,7 @@ static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state; - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->pm.backend.desired_shader_state &= ~kbdev->pm.backend.shader_poweroff_pending; @@ -193,7 +193,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) kbdev = container_of(timer, struct kbase_device, pm.backend.gpu_poweroff_timer); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* It is safe for this call to do nothing if the work item is already * queued. The worker function will read the must up-to-date state of @@ -220,7 +220,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) } if (kbdev->pm.backend.poweroff_timer_needed) { - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); @@ -228,7 +228,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) } kbdev->pm.backend.poweroff_timer_running = false; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return HRTIMER_NORESTART; } @@ -258,13 +258,13 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Only power off the GPU if a request is still pending */ if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev)) do_poweroff = true; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (do_poweroff) { kbdev->pm.backend.poweroff_timer_needed = false; @@ -272,14 +272,7 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) kbdev->pm.backend.poweroff_timer_running = false; /* Power off the GPU */ - if (!kbase_pm_do_poweroff(kbdev, false)) { - /* GPU can not be powered off at present */ - kbdev->pm.backend.poweroff_timer_needed = true; - kbdev->pm.backend.poweroff_timer_running = true; - hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer, - kbdev->pm.gpu_poweroff_time, - HRTIMER_MODE_REL); - } + kbase_pm_do_poweroff(kbdev, false); } mutex_unlock(&kbdev->pm.lock); @@ -325,7 +318,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) kbdev->pm.backend.poweroff_timer_needed = false; hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.poweroff_timer_running = false; /* If wq is already running but is held off by pm.lock, make sure it has @@ -336,7 +329,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) kbdev->pm.backend.tiler_poweroff_pending = 0; kbdev->pm.backend.shader_poweroff_pending_time = 0; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_pm_update_active(struct kbase_device *kbdev) @@ -351,7 +344,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) /* pm_current_policy will never be NULL while pm.lock is held */ KBASE_DEBUG_ASSERT(backend->pm_current_policy); - spin_lock_irqsave(&pm->power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); active = backend->pm_current_policy->get_core_active(kbdev); @@ -363,7 +356,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) /* If a request was pending then the GPU was still * powered, so no need to continue */ if (!kbdev->poweroff_pending) { - spin_unlock_irqrestore(&pm->power_change_lock, + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return; } @@ -379,10 +372,14 @@ void kbase_pm_update_active(struct kbase_device *kbdev) HRTIMER_MODE_REL); } - spin_unlock_irqrestore(&pm->power_change_lock, flags); - /* Power on the GPU and any cores requested by the policy */ - kbase_pm_do_poweron(kbdev, false); + if (pm->backend.poweroff_wait_in_progress) { + pm->backend.poweron_required = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_pm_do_poweron(kbdev, false); + } } else { /* It is an error for the power policy to power off the GPU * when there are contexts active */ @@ -414,35 +411,17 @@ void kbase_pm_update_active(struct kbase_device *kbdev) pm->gpu_poweroff_time, HRTIMER_MODE_REL); } - spin_unlock_irqrestore(&pm->power_change_lock, + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } else { - spin_unlock_irqrestore(&pm->power_change_lock, + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* Power off the GPU immediately */ - if (!kbase_pm_do_poweroff(kbdev, false)) { - /* GPU can not be powered off at present - */ - spin_lock_irqsave( - &pm->power_change_lock, - flags); - backend->poweroff_timer_needed = true; - if (!backend->poweroff_timer_running) { - backend->poweroff_timer_running - = true; - hrtimer_start( - &backend->gpu_poweroff_timer, - pm->gpu_poweroff_time, - HRTIMER_MODE_REL); - } - spin_unlock_irqrestore( - &pm->power_change_lock, - flags); - } + kbase_pm_do_poweroff(kbdev, false); } } else { - spin_unlock_irqrestore(&pm->power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } } } @@ -454,25 +433,37 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) bool cores_are_available; bool do_poweroff = false; - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (kbdev->pm.backend.pm_current_policy == NULL) return; + if (kbdev->pm.backend.poweroff_wait_in_progress) + return; - desired_bitmap = - kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); - desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); - - if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) - desired_tiler_bitmap = 1; - else + if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap && + !kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt + && !kbdev->tiler_inuse_cnt) { + /* We are trying to change in/out of protected mode - force all + * cores off so that the L2 powers down */ + desired_bitmap = 0; desired_tiler_bitmap = 0; + } else { + desired_bitmap = + kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); + desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { - /* Unless XAFFINITY is supported, enable core 0 if tiler - * required, regardless of core availability */ if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) - desired_bitmap |= 1; + desired_tiler_bitmap = 1; + else + desired_tiler_bitmap = 0; + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { + /* Unless XAFFINITY is supported, enable core 0 if tiler + * required, regardless of core availability */ + if (kbdev->tiler_needed_cnt > 0 || + kbdev->tiler_inuse_cnt > 0) + desired_bitmap |= 1; + } } if (kbdev->pm.backend.desired_shader_state != desired_bitmap) @@ -495,7 +486,8 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) (kbdev->pm.backend.desired_tiler_state & ~desired_tiler_bitmap); - if (kbdev->pm.poweroff_shader_ticks) + if (kbdev->pm.poweroff_shader_ticks && + !kbdev->protected_mode_transition) kbdev->pm.backend.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks; else @@ -517,7 +509,8 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) kbdev->pm.backend.tiler_poweroff_pending |= (kbdev->pm.backend.desired_tiler_state & ~desired_tiler_bitmap); - if (kbdev->pm.poweroff_shader_ticks) + if (kbdev->pm.poweroff_shader_ticks && + !kbdev->protected_mode_transition) kbdev->pm.backend.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks; else @@ -563,11 +556,11 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev) { unsigned long flags; - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_update_cores_state_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } int kbase_pm_list_policies(const struct kbase_pm_policy * const **list) @@ -612,10 +605,10 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, mutex_lock(&kbdev->pm.lock); /* Remove the policy to prevent IRQ handlers from working on it */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); old_policy = kbdev->pm.backend.pm_current_policy; kbdev->pm.backend.pm_current_policy = NULL; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u, old_policy->id); @@ -627,9 +620,9 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, if (new_policy->init) new_policy->init(kbdev); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.pm_current_policy = new_policy; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* If any core power state changes were previously attempted, but * couldn't be made because the policy was changing (current_policy was @@ -664,14 +657,13 @@ kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev) void kbase_pm_request_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { - unsigned long flags; u64 cores; kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); cores = shader_cores; while (cores) { @@ -714,8 +706,6 @@ void kbase_pm_request_cores(struct kbase_device *kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_END, change_gpu_state); } - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_request_cores); @@ -723,13 +713,11 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_cores); void kbase_pm_unrequest_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { - unsigned long flags; - kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); while (shader_cores) { int bitnum = fls64(shader_cores) - 1; @@ -770,8 +758,6 @@ void kbase_pm_unrequest_cores(struct kbase_device *kbdev, * - no-one will wait on the state change */ kbase_pm_trace_check_and_finish_state_change(kbdev); } - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores); @@ -780,11 +766,10 @@ enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { - unsigned long flags; u64 prev_shader_needed; /* Just for tracing */ u64 prev_shader_inuse; /* Just for tracing */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); prev_shader_needed = kbdev->shader_needed_bitmap; prev_shader_inuse = kbdev->shader_inuse_bitmap; @@ -795,16 +780,15 @@ kbase_pm_register_inuse_cores(struct kbase_device *kbdev, * be chosen */ if ((kbdev->pm.backend.desired_shader_state & shader_cores) != shader_cores) { - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - - return KBASE_NEW_AFFINITY; + return (kbdev->pm.backend.poweroff_wait_in_progress || + kbdev->pm.backend.pm_current_policy == NULL) ? + KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY; } if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores || (tiler_required && !kbdev->tiler_available_bitmap)) { /* Trace ongoing core transition */ kbase_timeline_pm_l2_transition_start(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); return KBASE_CORES_NOT_READY; } @@ -853,8 +837,6 @@ kbase_pm_register_inuse_cores(struct kbase_device *kbdev, KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - return KBASE_CORES_READY; } @@ -863,12 +845,11 @@ KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores); void kbase_pm_release_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { - unsigned long flags; kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); while (shader_cores) { int bitnum = fls64(shader_cores) - 1; @@ -913,8 +894,6 @@ void kbase_pm_release_cores(struct kbase_device *kbdev, /* Trace that any state change completed immediately */ kbase_pm_trace_check_and_finish_state_change(kbdev); } - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_release_cores); @@ -923,7 +902,13 @@ void kbase_pm_request_cores_sync(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { + unsigned long flags; + + kbase_pm_wait_for_poweroff_complete(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_request_cores(kbdev, tiler_required, shader_cores); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_pm_check_transitions_sync(kbdev); } @@ -935,7 +920,7 @@ void kbase_pm_request_l2_caches(struct kbase_device *kbdev) unsigned long flags; u32 prior_l2_users_count; - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); prior_l2_users_count = kbdev->l2_users_count++; @@ -947,7 +932,7 @@ void kbase_pm_request_l2_caches(struct kbase_device *kbdev) if (!prior_l2_users_count || !kbdev->l2_available_bitmap) kbase_pm_check_transitions_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); wait_event(kbdev->pm.backend.l2_powered_wait, kbdev->pm.backend.l2_powered == 1); @@ -959,22 +944,16 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches); void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->l2_users_count++; - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on); void kbase_pm_release_l2_caches(struct kbase_device *kbdev) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0); @@ -985,8 +964,6 @@ void kbase_pm_release_l2_caches(struct kbase_device *kbdev) /* Trace that any state change completed immediately */ kbase_pm_trace_check_and_finish_state_change(kbdev); } - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches); |