summaryrefslogtreecommitdiff
path: root/mali_kbase
diff options
context:
space:
mode:
authorAkash Goel <akash.goel@arm.com>2024-04-02 12:08:36 +0000
committerRenato Grottesi <otaner@google.com>2024-04-10 16:03:46 +0000
commit656ed97ecba51a1656d1f1deb5b0659ebf073a59 (patch)
tree93da56c4068e4c44c9a24916f71c0fe5d60041d2 /mali_kbase
parent10cee2ea3b7ca2089ac98fbe16ef1110e2298003 (diff)
downloadgpu-656ed97ecba51a1656d1f1deb5b0659ebf073a59.tar.gz
GPUCORE-41611: Debug num_valid_entries > 512 warning issue
Adding extra output for the num_valid_entries > 512 assertion. Provenance: https://code.ipdelivery.arm.com/c/GPU/mali-ddk/+/6548 Bug: 315967882 Test: Local boot and run of GFXBench5 Signed-off-by: Renato Grottesi <otaner@google.com> Change-Id: I9f7b385777688e77389656f8bfd06c0bc493a567
Diffstat (limited to 'mali_kbase')
-rw-r--r--mali_kbase/context/mali_kbase_context.c4
-rw-r--r--mali_kbase/mali_kbase_defs.h1
-rw-r--r--mali_kbase/mali_kbase_jd.c9
-rw-r--r--mali_kbase/mali_kbase_mem.c10
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c148
5 files changed, 159 insertions, 13 deletions
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 9b8cc5d..7fb7b43 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -327,7 +327,9 @@ void kbase_context_common_term(struct kbase_context *kctx)
pages = atomic_read(&kctx->used_pages);
if (pages != 0)
- dev_warn(kctx->kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+ dev_warn(kctx->kbdev->dev, "%s: %d pages (pgd cnt %u) in use for kctx %d_%d",
+ __func__, pages,
+ kctx->pgd_cnt, kctx->tgid, kctx->id);
WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index d7adc7d..1162b95 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -2032,6 +2032,7 @@ struct kbase_context {
atomic_t used_pages;
atomic_t nonmapped_pages;
atomic_t permanent_mapped_pages;
+ u32 pgd_cnt;
struct kbase_mem_pool_group mem_pools;
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index 08c840a..6c8dd11 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -1383,8 +1383,13 @@ void kbase_jd_done_worker(struct kthread_work *data)
if ((katom->event_code != BASE_JD_EVENT_DONE) && !kbase_ctx_flag(katom->kctx, KCTX_DYING) &&
!kbase_ctx_flag(katom->kctx, KCTX_PAGE_FAULT_REPORT_SKIP))
- dev_err(kbdev->dev, "t6xx: GPU fault 0x%02lx from job slot %d\n",
- (unsigned long)katom->event_code, katom->slot_nr);
+ dev_err(kbdev->dev,
+ "Atom %d (age %x core_req %x) of kctx %d_%d"
+ " completed with event_code %x on job slot %d",
+ kbase_jd_atom_id(kctx, katom),
+ katom->age, katom->core_req,
+ kctx->tgid, kctx->id,
+ katom->event_code, katom->slot_nr);
/* Retain state before the katom disappears */
kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index c3eb2cc..337fe2a 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -2706,7 +2706,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo
{
size_t delta;
size_t pages_required;
- size_t old_size;
+ size_t old_size, old_size_orig;
struct kbase_mem_pool *pool;
int ret = -ENOMEM;
struct tagged_addr *gpu_pages;
@@ -2725,6 +2725,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo
if (reg->gpu_alloc->nents >= info->commit_pages)
goto done;
+ old_size_orig = reg->gpu_alloc->nents;
/* Allocate some more pages */
delta = info->commit_pages - reg->gpu_alloc->nents;
pages_required = delta;
@@ -2802,6 +2803,13 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo
spin_unlock(&kctx->mem_partials_lock);
ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, old_size, mmu_sync_info);
+ if (unlikely(old_size_orig != old_size)) {
+ dev_warn(
+ kctx->kbdev->dev,
+ "JIT alloc %llx backing changed from %zu to %zu during grow for kctx %d_%d",
+ reg->start_pfn << PAGE_SHIFT, old_size_orig, old_size,
+ kctx->tgid, kctx->id);
+ }
/*
* The grow failed so put the allocation back in the
* pool and return failure.
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 0e73641..6bab554 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -334,6 +334,7 @@ static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase
if (mmut->kctx) {
kbase_process_page_usage_dec(mmut->kctx, 1);
atomic_sub(1, &mmut->kctx->used_pages);
+ mmut->kctx->pgd_cnt--;
}
kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
@@ -1162,9 +1163,12 @@ page_fault_retry:
if (fault_rel_pfn < current_backed_size) {
struct kbase_mmu_hw_op_param op_param;
- dev_dbg(kbdev->dev,
- "Page fault @ VA 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
- fault->addr, region->start_pfn, region->start_pfn + current_backed_size);
+ dev_warn_ratelimited(kbdev->dev,
+ "Page fault @VA 0x%llx in allocated region 0x%llx-0x%llx"
+ " (status %x) of kctx %d_%d (as %d)",
+ fault->addr, region->start_pfn << PAGE_SHIFT,
+ (region->start_pfn + current_backed_size) << PAGE_SHIFT,
+ fault_status, kctx->tgid, kctx->id, as_no);
kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE);
/* [1] in case another page fault occurred while we were
@@ -1460,6 +1464,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_
new_page_count = atomic_add_return(1, &mmut->kctx->used_pages);
KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, (u64)new_page_count);
kbase_process_page_usage_inc(mmut->kctx, 1);
+ mmut->kctx->pgd_cnt++;
}
atomic_add(1, &kbdev->memdev.used_pages);
@@ -1782,6 +1787,77 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
}
+static void dump_region_info(struct kbase_device *kbdev, struct kbase_va_region *reg)
+{
+ if (!reg)
+ return;
+
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+ dev_err(kbdev->dev,
+ "Region info: GPU VA %llx, nr_pages %zu, used_pages %zu,"
+ " commit_pages %zu, flags %lx, type %d",
+ reg->start_pfn << PAGE_SHIFT,
+ reg->nr_pages, reg->used_pages, reg->gpu_alloc->nents,
+ reg->flags, reg->gpu_alloc->type);
+#else
+ dev_err(kbdev->dev,
+ "Region info: GPU VA %llx, nr_pages %zu,"
+ " commit_pages %zu, flags %lx, type %d",
+ reg->start_pfn << PAGE_SHIFT,
+ reg->nr_pages, reg->gpu_alloc->nents,
+ reg->flags, reg->gpu_alloc->type);
+#endif
+}
+
+static void dump_zones_info(struct kbase_context *const kctx)
+{
+ enum kbase_memory_zone zone_idx;
+
+ if (!kctx)
+ return;
+
+ for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) {
+ struct kbase_reg_zone *reg_zone = &kctx->reg_zone[zone_idx];
+
+ if (!reg_zone->base_pfn)
+ continue;
+ dev_warn(
+ kctx->kbdev->dev,
+ "%15s %u 0x%.16llx 0x%.16llx",
+ kbase_reg_zone_get_name(zone_idx), zone_idx, reg_zone->base_pfn,
+ reg_zone->va_size_pages);
+ }
+}
+
+static void dump_custom_va_zone_regions_info(struct kbase_context *const kctx, u64 insert_vpfn)
+{
+ struct kbase_reg_zone *reg_zone;
+
+ if (!kctx)
+ return;
+
+ reg_zone = &kctx->reg_zone[CUSTOM_VA_ZONE];
+
+ if (reg_zone->base_pfn &&
+ (insert_vpfn >= reg_zone->base_pfn) &&
+ (insert_vpfn < kbase_reg_zone_end_pfn(reg_zone))) {
+ struct rb_root *rbtree = &reg_zone->reg_rbtree;
+ struct rb_node *p;
+
+ dev_err(kctx->kbdev->dev, "Dumping CUSTOM_VA regions info");
+
+ for (p = rb_first(rbtree); p; p = rb_next(p)) {
+ struct kbase_va_region *reg = rb_entry(p, struct kbase_va_region, rblink);
+
+ /* Empty region - ignore */
+ if (reg->gpu_alloc == NULL)
+ continue;
+
+ dump_region_info(kctx->kbdev, reg);
+ }
+ }
+}
+
/**
* update_parent_pgds() - Updates the page table from bottom level towards
* the top level to insert a new ATE
@@ -1797,6 +1873,7 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
* the physical addresses of newly allocated PGDs from index
* insert_level+1 to cur_level, and an existing PGD at index
* insert_level.
+ * @reg: Pointer to the VA region that needs to be mapped.
*
* The newly allocated PGDs are linked from the bottom level up and inserted into the PGD
* at insert_level which already exists in the MMU Page Tables. Migration status is also
@@ -1809,7 +1886,8 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
*/
static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
int cur_level, int insert_level, u64 insert_vpfn,
- phys_addr_t *pgds_to_insert)
+ phys_addr_t *pgds_to_insert,
+ struct kbase_va_region *reg)
{
int pgd_index;
int err = 0;
@@ -1827,6 +1905,8 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF;
struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd));
u64 *parent_page_va;
+ u32 mmut_kctx_tgid = mmut->kctx ? mmut->kctx->tgid : 0;
+ u32 mmut_kctx_id = mmut->kctx ? mmut->kctx->id : 0;
if (WARN_ON_ONCE(target_pgd == KBASE_INVALID_PHYSICAL_ADDRESS)) {
err = -EFAULT;
@@ -1844,8 +1924,32 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va);
kbdev->mmu_mode->entry_set_pte(&pte, target_pgd);
+ if (WARN_ON_ONCE((parent_page_va[parent_vpfn] & 1UL) != 0)) {
+ dev_err(kbdev->dev,
+ "Valid PTE found at index %lld of"
+ " Level %d table page for VA %llx of kctx %d_%d",
+ parent_vpfn, parent_index, insert_vpfn << 12,
+ mmut_kctx_tgid, mmut_kctx_id);
+ dump_region_info(kbdev, reg);
+ dev_err(kbdev->dev, "cur_level %d insert_level %d current_valid_entries %u",
+ cur_level, insert_level, current_valid_entries);
+ dump_zones_info(mmut->kctx);
+ dump_custom_va_zone_regions_info(mmut->kctx, insert_vpfn);
+ }
parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte);
+ if (unlikely((current_valid_entries + 1) > KBASE_MMU_PAGE_ENTRIES)) {
+ dev_err(kbdev->dev,
+ "Unexpected valid entry count after updating entry at index %lld of"
+ " Level %d table page for VA %llx of kctx %d_%d",
+ parent_vpfn, parent_index, insert_vpfn << 12,
+ mmut_kctx_tgid, mmut_kctx_id);
+ dump_region_info(kbdev, reg);
+ dev_err(kbdev->dev, "cur_level %d insert_level %d current_valid_entries %u",
+ cur_level, insert_level, current_valid_entries);
+ dump_zones_info(mmut->kctx);
+ dump_custom_va_zone_regions_info(mmut->kctx, insert_vpfn);
+ }
kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1);
kbase_kunmap(parent_page, parent_page_va);
@@ -2111,7 +2215,7 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp
if (newly_created_pgd) {
err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
- new_pgds);
+ new_pgds, NULL);
if (err) {
dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
__func__, err);
@@ -2395,7 +2499,11 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
* should be performed with
* kbase_mmu_update_pages()
*/
- WARN_ON_ONCE((*target & 1UL) != 0);
+ if (WARN_ON_ONCE((*target & 1UL) != 0))
+ dev_err(kbdev->dev,
+ "valid ATE found at entry %u of"
+ " Level 3 table page for VA %llx",
+ ofs, insert_vpfn << 12);
*target = kbase_mmu_create_ate(kbdev,
as_tagged(page_address),
@@ -2413,6 +2521,21 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
num_of_valid_entries += count;
}
+ if (unlikely(num_of_valid_entries > KBASE_MMU_PAGE_ENTRIES)) {
+ u32 mmut_kctx_tgid = mmut->kctx ? mmut->kctx->tgid : 0;
+ u32 mmut_kctx_id = mmut->kctx ? mmut->kctx->id : 0;
+ dev_err(kbdev->dev,
+ "Unexpected valid entry count after updating %u entries"
+ " at index %d of Level %d table page for VA %llx of kctx %d_%d",
+ count, vindex, cur_level, insert_vpfn << 12, mmut_kctx_tgid,
+ mmut_kctx_id);
+ dump_region_info(kbdev, reg);
+ dev_err(kbdev->dev,
+ "newly_created_pgd %d insert_level %d num_of_valid_entries %u",
+ newly_created_pgd, insert_level, num_of_valid_entries);
+ dump_zones_info(mmut->kctx);
+ dump_custom_va_zone_regions_info(mmut->kctx, insert_vpfn);
+ }
mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
if (dirty_pgds)
@@ -2433,7 +2556,7 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
if (newly_created_pgd) {
err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
- new_pgds);
+ new_pgds, reg);
if (err) {
dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
__func__, err);
@@ -2918,8 +3041,15 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
if (mmu_mode->ate_is_valid(page[index], level))
break; /* keep the mapping */
else if (!mmu_mode->pte_is_valid(page[index], level)) {
- dev_warn(kbdev->dev, "Invalid PTE found @ level %d for VA %llx",
- level, vpfn << PAGE_SHIFT);
+ u32 mmut_kctx_tgid = mmut->kctx ? mmut->kctx->tgid : 0;
+ u32 mmut_kctx_id = mmut->kctx ? mmut->kctx->id : 0;
+ dev_warn(kbdev->dev,
+ "Invalid PTE found @ level %d for VA %llx"
+ " (nr %zu, valid entries %u) of kctx %d_%d",
+ level, vpfn << PAGE_SHIFT, nr,
+ mmu_mode->get_num_valid_entries(page),
+ mmut_kctx_tgid, mmut_kctx_id);
+ WARN_ON_ONCE(1);
/* nothing here, advance to the next PTE of the current level */
count = (1 << ((3 - level) * 9));
count -= (vpfn & (count - 1));