diff options
author | Akash Goel <akash.goel@arm.com> | 2024-04-02 12:08:36 +0000 |
---|---|---|
committer | Renato Grottesi <otaner@google.com> | 2024-04-10 16:03:46 +0000 |
commit | 656ed97ecba51a1656d1f1deb5b0659ebf073a59 (patch) | |
tree | 93da56c4068e4c44c9a24916f71c0fe5d60041d2 /mali_kbase | |
parent | 10cee2ea3b7ca2089ac98fbe16ef1110e2298003 (diff) | |
download | gpu-656ed97ecba51a1656d1f1deb5b0659ebf073a59.tar.gz |
GPUCORE-41611: Debug num_valid_entries > 512 warning issue
Adding extra output for the num_valid_entries > 512 assertion.
Provenance: https://code.ipdelivery.arm.com/c/GPU/mali-ddk/+/6548
Bug: 315967882
Test: Local boot and run of GFXBench5
Signed-off-by: Renato Grottesi <otaner@google.com>
Change-Id: I9f7b385777688e77389656f8bfd06c0bc493a567
Diffstat (limited to 'mali_kbase')
-rw-r--r-- | mali_kbase/context/mali_kbase_context.c | 4 | ||||
-rw-r--r-- | mali_kbase/mali_kbase_defs.h | 1 | ||||
-rw-r--r-- | mali_kbase/mali_kbase_jd.c | 9 | ||||
-rw-r--r-- | mali_kbase/mali_kbase_mem.c | 10 | ||||
-rw-r--r-- | mali_kbase/mmu/mali_kbase_mmu.c | 148 |
5 files changed, 159 insertions, 13 deletions
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c index 9b8cc5d..7fb7b43 100644 --- a/mali_kbase/context/mali_kbase_context.c +++ b/mali_kbase/context/mali_kbase_context.c @@ -327,7 +327,9 @@ void kbase_context_common_term(struct kbase_context *kctx) pages = atomic_read(&kctx->used_pages); if (pages != 0) - dev_warn(kctx->kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + dev_warn(kctx->kbdev->dev, "%s: %d pages (pgd cnt %u) in use for kctx %d_%d", + __func__, pages, + kctx->pgd_cnt, kctx->tgid, kctx->id); WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index d7adc7d..1162b95 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -2032,6 +2032,7 @@ struct kbase_context { atomic_t used_pages; atomic_t nonmapped_pages; atomic_t permanent_mapped_pages; + u32 pgd_cnt; struct kbase_mem_pool_group mem_pools; diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c index 08c840a..6c8dd11 100644 --- a/mali_kbase/mali_kbase_jd.c +++ b/mali_kbase/mali_kbase_jd.c @@ -1383,8 +1383,13 @@ void kbase_jd_done_worker(struct kthread_work *data) if ((katom->event_code != BASE_JD_EVENT_DONE) && !kbase_ctx_flag(katom->kctx, KCTX_DYING) && !kbase_ctx_flag(katom->kctx, KCTX_PAGE_FAULT_REPORT_SKIP)) - dev_err(kbdev->dev, "t6xx: GPU fault 0x%02lx from job slot %d\n", - (unsigned long)katom->event_code, katom->slot_nr); + dev_err(kbdev->dev, + "Atom %d (age %x core_req %x) of kctx %d_%d" + " completed with event_code %x on job slot %d", + kbase_jd_atom_id(kctx, katom), + katom->age, katom->core_req, + kctx->tgid, kctx->id, + katom->event_code, katom->slot_nr); /* Retain state before the katom disappears */ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index c3eb2cc..337fe2a 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -2706,7 +2706,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo { size_t delta; size_t pages_required; - size_t old_size; + size_t old_size, old_size_orig; struct kbase_mem_pool *pool; int ret = -ENOMEM; struct tagged_addr *gpu_pages; @@ -2725,6 +2725,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo if (reg->gpu_alloc->nents >= info->commit_pages) goto done; + old_size_orig = reg->gpu_alloc->nents; /* Allocate some more pages */ delta = info->commit_pages - reg->gpu_alloc->nents; pages_required = delta; @@ -2802,6 +2803,13 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo spin_unlock(&kctx->mem_partials_lock); ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, old_size, mmu_sync_info); + if (unlikely(old_size_orig != old_size)) { + dev_warn( + kctx->kbdev->dev, + "JIT alloc %llx backing changed from %zu to %zu during grow for kctx %d_%d", + reg->start_pfn << PAGE_SHIFT, old_size_orig, old_size, + kctx->tgid, kctx->id); + } /* * The grow failed so put the allocation back in the * pool and return failure. diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index 0e73641..6bab554 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -334,6 +334,7 @@ static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase if (mmut->kctx) { kbase_process_page_usage_dec(mmut->kctx, 1); atomic_sub(1, &mmut->kctx->used_pages); + mmut->kctx->pgd_cnt--; } kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); @@ -1162,9 +1163,12 @@ page_fault_retry: if (fault_rel_pfn < current_backed_size) { struct kbase_mmu_hw_op_param op_param; - dev_dbg(kbdev->dev, - "Page fault @ VA 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", - fault->addr, region->start_pfn, region->start_pfn + current_backed_size); + dev_warn_ratelimited(kbdev->dev, + "Page fault @VA 0x%llx in allocated region 0x%llx-0x%llx" + " (status %x) of kctx %d_%d (as %d)", + fault->addr, region->start_pfn << PAGE_SHIFT, + (region->start_pfn + current_backed_size) << PAGE_SHIFT, + fault_status, kctx->tgid, kctx->id, as_no); kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); /* [1] in case another page fault occurred while we were @@ -1460,6 +1464,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_ new_page_count = atomic_add_return(1, &mmut->kctx->used_pages); KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, (u64)new_page_count); kbase_process_page_usage_inc(mmut->kctx, 1); + mmut->kctx->pgd_cnt++; } atomic_add(1, &kbdev->memdev.used_pages); @@ -1782,6 +1787,77 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); } +static void dump_region_info(struct kbase_device *kbdev, struct kbase_va_region *reg) +{ + if (!reg) + return; + +#if MALI_JIT_PRESSURE_LIMIT_BASE + dev_err(kbdev->dev, + "Region info: GPU VA %llx, nr_pages %zu, used_pages %zu," + " commit_pages %zu, flags %lx, type %d", + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages, reg->used_pages, reg->gpu_alloc->nents, + reg->flags, reg->gpu_alloc->type); +#else + dev_err(kbdev->dev, + "Region info: GPU VA %llx, nr_pages %zu," + " commit_pages %zu, flags %lx, type %d", + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages, reg->gpu_alloc->nents, + reg->flags, reg->gpu_alloc->type); +#endif +} + +static void dump_zones_info(struct kbase_context *const kctx) +{ + enum kbase_memory_zone zone_idx; + + if (!kctx) + return; + + for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) { + struct kbase_reg_zone *reg_zone = &kctx->reg_zone[zone_idx]; + + if (!reg_zone->base_pfn) + continue; + dev_warn( + kctx->kbdev->dev, + "%15s %u 0x%.16llx 0x%.16llx", + kbase_reg_zone_get_name(zone_idx), zone_idx, reg_zone->base_pfn, + reg_zone->va_size_pages); + } +} + +static void dump_custom_va_zone_regions_info(struct kbase_context *const kctx, u64 insert_vpfn) +{ + struct kbase_reg_zone *reg_zone; + + if (!kctx) + return; + + reg_zone = &kctx->reg_zone[CUSTOM_VA_ZONE]; + + if (reg_zone->base_pfn && + (insert_vpfn >= reg_zone->base_pfn) && + (insert_vpfn < kbase_reg_zone_end_pfn(reg_zone))) { + struct rb_root *rbtree = ®_zone->reg_rbtree; + struct rb_node *p; + + dev_err(kctx->kbdev->dev, "Dumping CUSTOM_VA regions info"); + + for (p = rb_first(rbtree); p; p = rb_next(p)) { + struct kbase_va_region *reg = rb_entry(p, struct kbase_va_region, rblink); + + /* Empty region - ignore */ + if (reg->gpu_alloc == NULL) + continue; + + dump_region_info(kctx->kbdev, reg); + } + } +} + /** * update_parent_pgds() - Updates the page table from bottom level towards * the top level to insert a new ATE @@ -1797,6 +1873,7 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, * the physical addresses of newly allocated PGDs from index * insert_level+1 to cur_level, and an existing PGD at index * insert_level. + * @reg: Pointer to the VA region that needs to be mapped. * * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD * at insert_level which already exists in the MMU Page Tables. Migration status is also @@ -1809,7 +1886,8 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, */ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int cur_level, int insert_level, u64 insert_vpfn, - phys_addr_t *pgds_to_insert) + phys_addr_t *pgds_to_insert, + struct kbase_va_region *reg) { int pgd_index; int err = 0; @@ -1827,6 +1905,8 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF; struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd)); u64 *parent_page_va; + u32 mmut_kctx_tgid = mmut->kctx ? mmut->kctx->tgid : 0; + u32 mmut_kctx_id = mmut->kctx ? mmut->kctx->id : 0; if (WARN_ON_ONCE(target_pgd == KBASE_INVALID_PHYSICAL_ADDRESS)) { err = -EFAULT; @@ -1844,8 +1924,32 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va); kbdev->mmu_mode->entry_set_pte(&pte, target_pgd); + if (WARN_ON_ONCE((parent_page_va[parent_vpfn] & 1UL) != 0)) { + dev_err(kbdev->dev, + "Valid PTE found at index %lld of" + " Level %d table page for VA %llx of kctx %d_%d", + parent_vpfn, parent_index, insert_vpfn << 12, + mmut_kctx_tgid, mmut_kctx_id); + dump_region_info(kbdev, reg); + dev_err(kbdev->dev, "cur_level %d insert_level %d current_valid_entries %u", + cur_level, insert_level, current_valid_entries); + dump_zones_info(mmut->kctx); + dump_custom_va_zone_regions_info(mmut->kctx, insert_vpfn); + } parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte); + if (unlikely((current_valid_entries + 1) > KBASE_MMU_PAGE_ENTRIES)) { + dev_err(kbdev->dev, + "Unexpected valid entry count after updating entry at index %lld of" + " Level %d table page for VA %llx of kctx %d_%d", + parent_vpfn, parent_index, insert_vpfn << 12, + mmut_kctx_tgid, mmut_kctx_id); + dump_region_info(kbdev, reg); + dev_err(kbdev->dev, "cur_level %d insert_level %d current_valid_entries %u", + cur_level, insert_level, current_valid_entries); + dump_zones_info(mmut->kctx); + dump_custom_va_zone_regions_info(mmut->kctx, insert_vpfn); + } kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1); kbase_kunmap(parent_page, parent_page_va); @@ -2111,7 +2215,7 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp if (newly_created_pgd) { err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, - new_pgds); + new_pgds, NULL); if (err) { dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", __func__, err); @@ -2395,7 +2499,11 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm * should be performed with * kbase_mmu_update_pages() */ - WARN_ON_ONCE((*target & 1UL) != 0); + if (WARN_ON_ONCE((*target & 1UL) != 0)) + dev_err(kbdev->dev, + "valid ATE found at entry %u of" + " Level 3 table page for VA %llx", + ofs, insert_vpfn << 12); *target = kbase_mmu_create_ate(kbdev, as_tagged(page_address), @@ -2413,6 +2521,21 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm num_of_valid_entries += count; } + if (unlikely(num_of_valid_entries > KBASE_MMU_PAGE_ENTRIES)) { + u32 mmut_kctx_tgid = mmut->kctx ? mmut->kctx->tgid : 0; + u32 mmut_kctx_id = mmut->kctx ? mmut->kctx->id : 0; + dev_err(kbdev->dev, + "Unexpected valid entry count after updating %u entries" + " at index %d of Level %d table page for VA %llx of kctx %d_%d", + count, vindex, cur_level, insert_vpfn << 12, mmut_kctx_tgid, + mmut_kctx_id); + dump_region_info(kbdev, reg); + dev_err(kbdev->dev, + "newly_created_pgd %d insert_level %d num_of_valid_entries %u", + newly_created_pgd, insert_level, num_of_valid_entries); + dump_zones_info(mmut->kctx); + dump_custom_va_zone_regions_info(mmut->kctx, insert_vpfn); + } mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); if (dirty_pgds) @@ -2433,7 +2556,7 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm if (newly_created_pgd) { err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, - new_pgds); + new_pgds, reg); if (err) { dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", __func__, err); @@ -2918,8 +3041,15 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase if (mmu_mode->ate_is_valid(page[index], level)) break; /* keep the mapping */ else if (!mmu_mode->pte_is_valid(page[index], level)) { - dev_warn(kbdev->dev, "Invalid PTE found @ level %d for VA %llx", - level, vpfn << PAGE_SHIFT); + u32 mmut_kctx_tgid = mmut->kctx ? mmut->kctx->tgid : 0; + u32 mmut_kctx_id = mmut->kctx ? mmut->kctx->id : 0; + dev_warn(kbdev->dev, + "Invalid PTE found @ level %d for VA %llx" + " (nr %zu, valid entries %u) of kctx %d_%d", + level, vpfn << PAGE_SHIFT, nr, + mmu_mode->get_num_valid_entries(page), + mmut_kctx_tgid, mmut_kctx_id); + WARN_ON_ONCE(1); /* nothing here, advance to the next PTE of the current level */ count = (1 << ((3 - level) * 9)); count -= (vpfn & (count - 1)); |