diff options
author | Vamsidhar reddy Gaddam <gvamsi@google.com> | 2023-12-20 12:42:26 +0000 |
---|---|---|
committer | Vamsidhar reddy Gaddam <gvamsi@google.com> | 2024-01-05 09:19:17 +0000 |
commit | 11473542814286e59a89a70c969fb50a25ba921f (patch) | |
tree | bd4aa60e7d3dc895d82a36fcea0026569e3a04aa /mali_kbase/mali_kbase_dummy_job_wa.c | |
parent | 8768eedce66a4373c96f35c8dfb73d4668703180 (diff) | |
parent | 049a542207ed694271316782397b78b2e202086a (diff) | |
download | gpu-11473542814286e59a89a70c969fb50a25ba921f.tar.gz |
Merge branch 'upstream' into HEAD
Update KMD to R47P0
Bug: 315267052
Test: Outlined in go/pixel-gpu-kmd-r47p0
Change-Id: I89454c4c862033fe330b260a9bc6cc777a3ca231
Signed-off-by: Vamsidhar reddy Gaddam <gvamsi@google.com>
Diffstat (limited to 'mali_kbase/mali_kbase_dummy_job_wa.c')
-rw-r--r-- | mali_kbase/mali_kbase_dummy_job_wa.c | 185 |
1 files changed, 68 insertions, 117 deletions
diff --git a/mali_kbase/mali_kbase_dummy_job_wa.c b/mali_kbase/mali_kbase_dummy_job_wa.c index c3c6046..6457f51 100644 --- a/mali_kbase/mali_kbase_dummy_job_wa.c +++ b/mali_kbase/mali_kbase_dummy_job_wa.c @@ -53,9 +53,9 @@ struct wa_blob { u32 blob_offset; } __packed; -static bool in_range(const u8 *base, const u8 *end, off_t off, size_t sz) +static bool within_range(const u8 *base, const u8 *end, off_t off, size_t sz) { - return !(end - base - off < sz); + return !((size_t)(end - base - off) < sz); } static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits) @@ -65,7 +65,7 @@ static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits) u32 val; for (loop = 0; loop < timeout; loop++) { - val = kbase_reg_read(kbdev, offset); + val = kbase_reg_read32(kbdev, offset); if (val & bits) break; udelay(10); @@ -74,75 +74,35 @@ static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits) if (loop == timeout) { dev_err(kbdev->dev, "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", - (unsigned long)offset, (unsigned long)bits, - (unsigned long)val); + (unsigned long)offset, (unsigned long)bits, (unsigned long)val); } return (val & bits); } -static int wait(struct kbase_device *kbdev, off_t offset, u32 bits, bool set) -{ - int loop; - const int timeout = 100; - u32 val; - u32 target = 0; - - if (set) - target = bits; - - for (loop = 0; loop < timeout; loop++) { - val = kbase_reg_read(kbdev, (offset)); - if ((val & bits) == target) - break; - - udelay(10); - } - - if (loop == timeout) { - dev_err(kbdev->dev, - "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", - (unsigned long)offset, (unsigned long)bits, - (unsigned long)val); - return -ETIMEDOUT; - } - - return 0; -} - -static inline int run_job(struct kbase_device *kbdev, int as, int slot, - u64 cores, u64 jc) +static inline int run_job(struct kbase_device *kbdev, int as, u32 slot, u64 cores, u64 jc) { u32 done; /* setup job */ - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_LO), - jc & U32_MAX); - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_HI), - jc >> 32); - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_LO), - cores & U32_MAX); - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_HI), - cores >> 32); - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_CONFIG_NEXT), - JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | as); + kbase_reg_write64(kbdev, JOB_SLOT_OFFSET(slot, HEAD_NEXT), jc); + kbase_reg_write64(kbdev, JOB_SLOT_OFFSET(slot, AFFINITY_NEXT), cores); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(slot, CONFIG_NEXT), + JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | (unsigned int)as); /* go */ - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_COMMAND_NEXT), - JS_COMMAND_START); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(slot, COMMAND_NEXT), JS_COMMAND_START); /* wait for the slot to finish (done, error) */ - done = wait_any(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), - (1ul << (16+slot)) | (1ul << slot)); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done); + done = wait_any(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT), + (1ul << (16 + slot)) | (1ul << slot)); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), done); if (done != (1ul << slot)) { - dev_err(kbdev->dev, - "Failed to run WA job on slot %d cores 0x%llx: done 0x%lx\n", - slot, (unsigned long long)cores, - (unsigned long)done); + dev_err(kbdev->dev, "Failed to run WA job on slot %u cores 0x%llx: done 0x%lx\n", + slot, (unsigned long long)cores, (unsigned long)done); dev_err(kbdev->dev, "JS_STATUS on failure: 0x%x\n", - kbase_reg_read(kbdev, JOB_SLOT_REG(slot, JS_STATUS))); + kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(slot, STATUS))); return -EFAULT; } else { @@ -154,42 +114,42 @@ static inline int run_job(struct kbase_device *kbdev, int as, int slot, int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) { int as; - int slot; + u32 slot; u64 jc; int failed = 0; int runs = 0; u32 old_gpu_mask; u32 old_job_mask; + u64 val; + const u32 timeout_us = 10000; if (!kbdev) return -EFAULT; - if (!kbdev->dummy_job_wa.ctx) + if (!kbdev->dummy_job_wa.kctx) return -EFAULT; - as = kbdev->dummy_job_wa.ctx->as_nr; + as = kbdev->dummy_job_wa.kctx->as_nr; slot = kbdev->dummy_job_wa.slot; jc = kbdev->dummy_job_wa.jc; /* mask off all but MMU IRQs */ - old_gpu_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - old_job_mask = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); + old_gpu_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); + old_job_mask = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK)); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), 0); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK), 0); /* power up requested cores */ - kbase_reg_write(kbdev, SHADER_PWRON_LO, (cores & U32_MAX)); - kbase_reg_write(kbdev, SHADER_PWRON_HI, (cores >> 32)); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(SHADER_PWRON), cores); if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) { /* wait for power-ups */ - wait(kbdev, GPU_CONTROL_REG(SHADER_READY_LO), (cores & U32_MAX), true); - if (cores >> 32) - wait(kbdev, GPU_CONTROL_REG(SHADER_READY_HI), (cores >> 32), true); + kbase_reg_poll64_timeout(kbdev, GPU_CONTROL_ENUM(SHADER_READY), val, + (val & cores) == cores, 10, timeout_us, false); } if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) { - int i; + size_t i; /* do for each requested core */ for (i = 0; i < sizeof(cores) * 8; i++) { @@ -211,45 +171,42 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) runs++; } - if (kbdev->dummy_job_wa.flags & - KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { + if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { /* power off shader cores (to reduce any dynamic leakage) */ - kbase_reg_write(kbdev, SHADER_PWROFF_LO, (cores & U32_MAX)); - kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32)); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(SHADER_PWROFF), cores); /* wait for power off complete */ - wait(kbdev, GPU_CONTROL_REG(SHADER_READY_LO), (cores & U32_MAX), false); - wait(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_LO), (cores & U32_MAX), false); - if (cores >> 32) { - wait(kbdev, GPU_CONTROL_REG(SHADER_READY_HI), (cores >> 32), false); - wait(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_HI), (cores >> 32), false); - } - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX); + kbase_reg_poll64_timeout(kbdev, GPU_CONTROL_ENUM(SHADER_READY), val, !(val & cores), + 10, timeout_us, false); + kbase_reg_poll64_timeout(kbdev, GPU_CONTROL_ENUM(SHADER_PWRTRANS), val, + !(val & cores), 10, timeout_us, false); + + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), U32_MAX); } /* restore IRQ masks */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), old_gpu_mask); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), old_job_mask); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), old_gpu_mask); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK), old_job_mask); if (failed) - dev_err(kbdev->dev, - "WA complete with %d failures out of %d runs\n", failed, - runs); + dev_err(kbdev->dev, "WA complete with %d failures out of %d runs\n", failed, runs); return failed ? -EFAULT : 0; } -static ssize_t dummy_job_wa_info_show(struct device * const dev, - struct device_attribute * const attr, char * const buf) +static ssize_t dummy_job_wa_info_show(struct device *const dev, struct device_attribute *const attr, + char *const buf) { struct kbase_device *const kbdev = dev_get_drvdata(dev); int err; - if (!kbdev || !kbdev->dummy_job_wa.ctx) + CSTD_UNUSED(attr); + + if (!kbdev || !kbdev->dummy_job_wa.kctx) return -ENODEV; - err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n", - kbdev->dummy_job_wa.slot, kbdev->dummy_job_wa.flags); + err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n", kbdev->dummy_job_wa.slot, + kbdev->dummy_job_wa.flags); return err; } @@ -292,14 +249,13 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) err = request_firmware(&firmware, wa_name, kbdev->dev); if (err) { - dev_err(kbdev->dev, "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, " - "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed"); + dev_err(kbdev->dev, + "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, " + "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed"); return -ENODEV; } - kctx = kbase_create_context(kbdev, true, - BASE_CONTEXT_CREATE_FLAG_NONE, 0, - NULL); + kctx = kbase_create_context(kbdev, true, BASE_CONTEXT_CREATE_FLAG_NONE, 0, NULL); if (!kctx) { dev_err(kbdev->dev, "Failed to create WA context\n"); @@ -309,10 +265,9 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) fw = firmware->data; fw_end = fw + firmware->size; - dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n", - firmware->size); + dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n", firmware->size); - if (!in_range(fw, fw_end, 0, sizeof(*header))) { + if (!within_range(fw, fw_end, 0, sizeof(*header))) { dev_err(kbdev->dev, "WA too small\n"); goto bad_fw; } @@ -331,7 +286,7 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) goto bad_fw; } - if (!in_range(fw, fw_end, header->info_offset, sizeof(*v2_info))) { + if (!within_range(fw, fw_end, header->info_offset, sizeof(*v2_info))) { dev_err(kbdev->dev, "WA info offset out of bounds\n"); goto bad_fw; } @@ -357,14 +312,14 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) u64 gpu_va; struct kbase_va_region *va_region; - if (!in_range(fw, fw_end, blob_offset, sizeof(*blob))) { + if (!within_range(fw, fw_end, blob_offset, sizeof(*blob))) { dev_err(kbdev->dev, "Blob offset out-of-range: 0x%lx\n", (unsigned long)blob_offset); goto bad_fw; } blob = (const struct wa_blob *)(fw + blob_offset); - if (!in_range(fw, fw_end, blob->payload_offset, blob->size)) { + if (!within_range(fw, fw_end, blob->payload_offset, blob->size)) { dev_err(kbdev->dev, "Payload out-of-bounds\n"); goto bad_fw; } @@ -377,8 +332,8 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) nr_pages = PFN_UP(blob->size); flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED; - va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, - &gpu_va, mmu_sync_info); + va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &gpu_va, + mmu_sync_info); if (!va_region) { dev_err(kbdev->dev, "Failed to allocate for blob\n"); @@ -390,18 +345,15 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) /* copy the payload, */ payload = fw + blob->payload_offset; - dst = kbase_vmap(kctx, - va_region->start_pfn << PAGE_SHIFT, + dst = kbase_vmap(kctx, va_region->start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT, &vmap); if (dst) { memcpy(dst, payload, blob->size); kbase_vunmap(kctx, &vmap); } else { - dev_err(kbdev->dev, - "Failed to copy payload\n"); + dev_err(kbdev->dev, "Failed to copy payload\n"); } - } blob_offset = blob->blob_offset; /* follow chain */ } @@ -410,10 +362,9 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) kbasep_js_schedule_privileged_ctx(kbdev, kctx); - kbdev->dummy_job_wa.ctx = kctx; + kbdev->dummy_job_wa.kctx = kctx; - err = sysfs_create_file(&kbdev->dev->kobj, - &dev_attr_dummy_job_wa_info.attr); + err = sysfs_create_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr); if (err) dev_err(kbdev->dev, "SysFS file creation for dummy job wa failed\n"); @@ -428,7 +379,7 @@ no_ctx: void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) { - struct kbase_context *wa_ctx; + struct kbase_context *wa_kctx; /* return if the dummy job has not been loaded */ if (kbdev->dummy_job_wa_loaded == false) @@ -437,13 +388,13 @@ void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) /* Can be safely called even if the file wasn't created on probe */ sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr); - wa_ctx = READ_ONCE(kbdev->dummy_job_wa.ctx); - WRITE_ONCE(kbdev->dummy_job_wa.ctx, NULL); + wa_kctx = READ_ONCE(kbdev->dummy_job_wa.kctx); + WRITE_ONCE(kbdev->dummy_job_wa.kctx, NULL); /* make this write visible before we tear down the ctx */ smp_mb(); - if (wa_ctx) { - kbasep_js_release_privileged_ctx(kbdev, wa_ctx); - kbase_destroy_context(wa_ctx); + if (wa_kctx) { + kbasep_js_release_privileged_ctx(kbdev, wa_kctx); + kbase_destroy_context(wa_kctx); } } |