diff options
author | Sidath Senanayake <sidaths@google.com> | 2018-12-06 09:09:59 +0100 |
---|---|---|
committer | Sidath Senanayake <sidaths@google.com> | 2018-12-06 09:09:59 +0100 |
commit | a970431fa55f99aba31ea4263fdc8e70019a9ccd (patch) | |
tree | 91bb7f49a4869c0385338fe144f53ac8b98468ea /mali_kbase | |
parent | f10b3de5283d0c196459f18160161e48cfadae81 (diff) | |
download | gpu-a970431fa55f99aba31ea4263fdc8e70019a9ccd.tar.gz |
Mali Bifrost DDK r16p0 KMD
Provenance:
aa8b3ff0f (collaborate/EAC/b_r16p0)
BX304L01B-BU-00000-r16p0-01rel0
BX304L06A-BU-00000-r16p0-01rel0
BX304X07X-BU-00000-r16p0-01rel0
Signed-off-by: Sidath Senanayake <sidaths@google.com>
Change-Id: I96125862b7cf6596d1b7109853fb4ca39e851056
Diffstat (limited to 'mali_kbase')
99 files changed, 8921 insertions, 5224 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index 9b3cb91..8e73e1f 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -21,7 +21,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r15p0-01rel0" +MALI_RELEASE_NAME ?= "r16p0-01rel0" # Paths required for build KBASE_PATH = $(src) @@ -33,7 +33,6 @@ MALI_CUSTOMER_RELEASE ?= 1 MALI_USE_CSF ?= 0 MALI_UNIT_TEST ?= 0 MALI_KERNEL_TEST_API ?= 0 -MALI_MOCK_TEST ?= 0 MALI_COVERAGE ?= 0 CONFIG_MALI_PLATFORM_NAME ?= "devicetree" @@ -43,7 +42,6 @@ DEFINES = \ -DMALI_USE_CSF=$(MALI_USE_CSF) \ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ - -DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \ -DMALI_COVERAGE=$(MALI_COVERAGE) \ -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" @@ -61,7 +59,7 @@ DEFINES += -DMALI_KBASE_BUILD # Use our defines when compiling ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux -subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux +subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux SRC := \ mali_kbase_device.c \ @@ -80,10 +78,15 @@ SRC := \ mali_kbase_pm.c \ mali_kbase_config.c \ mali_kbase_vinstr.c \ + mali_kbase_hwcnt.c \ + mali_kbase_hwcnt_backend_gpu.c \ + mali_kbase_hwcnt_gpu.c \ + mali_kbase_hwcnt_legacy.c \ + mali_kbase_hwcnt_types.c \ + mali_kbase_hwcnt_virtualizer.c \ mali_kbase_softjobs.c \ mali_kbase_10969_workaround.c \ mali_kbase_hw.c \ - mali_kbase_utility.c \ mali_kbase_debug.c \ mali_kbase_gpu_memory_debugfs.c \ mali_kbase_mem_linux.c \ @@ -154,11 +157,6 @@ mali_kbase-$(CONFIG_SYNC_FILE) += \ mali_kbase_sync_common.o \ mali_kbase_fence.o -ifeq ($(MALI_MOCK_TEST),1) -# Test functionality -mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o -endif - include $(src)/backend/gpu/Kbuild mali_kbase-y += $(BACKEND:.c=.o) diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig index af2a5aa..7c10016 100644 --- a/mali_kbase/Kconfig +++ b/mali_kbase/Kconfig @@ -31,14 +31,12 @@ menuconfig MALI_MIDGARD this will generate a single module, called mali_kbase. config MALI_GATOR_SUPPORT - bool "Streamline support via Gator" + bool "Enable Streamline tracing support" depends on MALI_MIDGARD - default n + default y help - Adds diagnostic support for use with the ARM Streamline Performance Analyzer. - You will need the Gator device driver already loaded before loading this driver when enabling - Streamline debug support. - This is a legacy interface required by older versions of Streamline. + Enables kbase tracing used by the Arm Streamline Performance Analyzer. + The tracepoints are used to derive GPU activity charts in Streamline. config MALI_MIDGARD_DVFS bool "Enable legacy DVFS" diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile index 13af9f4..08b2fa9 100644 --- a/mali_kbase/Makefile +++ b/mali_kbase/Makefile @@ -25,10 +25,6 @@ KDIR ?= /lib/modules/$(shell uname -r)/build BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. KBASE_PATH_RELATIVE = $(CURDIR) -ifeq ($(MALI_UNIT_TEST), 1) - EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers -endif - ifeq ($(CONFIG_MALI_FPGA_BUS_LOGGER),y) #Add bus logger symbols EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers diff --git a/mali_kbase/Makefile.kbase b/mali_kbase/Makefile.kbase index d7898cb..6b0f81e 100644 --- a/mali_kbase/Makefile.kbase +++ b/mali_kbase/Makefile.kbase @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010, 2013, 2018 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -19,5 +19,5 @@ # # -EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM) +EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(KBASE_PATH)/platform_$(PLATFORM) diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig index 1b6bffc..46dca14 100644 --- a/mali_kbase/Mconfig +++ b/mali_kbase/Mconfig @@ -23,15 +23,12 @@ menuconfig MALI_MIDGARD this will generate a single module, called mali_kbase. config MALI_GATOR_SUPPORT - bool "Streamline support via Gator" + bool "Enable Streamline tracing support" depends on MALI_MIDGARD && !BACKEND_USER - default y if INSTRUMENTATION_STREAMLINE_OLD - default n + default y help - Adds diagnostic support for use with the ARM Streamline Performance Analyzer. - You will need the Gator device driver already loaded before loading this driver when enabling - Streamline debug support. - This is a legacy interface required by older versions of Streamline. + Enables kbase tracing used by the Arm Streamline Performance Analyzer. + The tracepoints are used to derive GPU activity charts in Streamline. config MALI_MIDGARD_DVFS bool "Enable legacy DVFS" @@ -88,11 +85,6 @@ config MALI_PLATFORM_NAME When PLATFORM_CUSTOM is set, this needs to be set manually to pick up the desired platform files. -config MALI_MOCK_TEST - bool - depends on MALI_MIDGARD && !RELEASE - default y - # MALI_EXPERT configuration options menuconfig MALI_EXPERT diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild index dcd8ca4..2dc1455 100644 --- a/mali_kbase/backend/gpu/Kbuild +++ b/mali_kbase/backend/gpu/Kbuild @@ -38,14 +38,12 @@ BACKEND += \ backend/gpu/mali_kbase_pm_ca.c \ backend/gpu/mali_kbase_pm_always_on.c \ backend/gpu/mali_kbase_pm_coarse_demand.c \ - backend/gpu/mali_kbase_pm_demand.c \ backend/gpu/mali_kbase_pm_policy.c \ backend/gpu/mali_kbase_time.c ifeq ($(MALI_CUSTOMER_RELEASE),0) BACKEND += \ - backend/gpu/mali_kbase_pm_demand_always_powered.c \ - backend/gpu/mali_kbase_pm_fast_start.c + backend/gpu/mali_kbase_pm_always_on_demand.c endif ifeq ($(CONFIG_MALI_DEVFREQ),y) diff --git a/mali_kbase/backend/gpu/mali_kbase_backend_config.h b/mali_kbase/backend/gpu/mali_kbase_backend_config.h index 196a776..4a61f96 100644 --- a/mali_kbase/backend/gpu/mali_kbase_backend_config.h +++ b/mali_kbase/backend/gpu/mali_kbase_backend_config.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,8 +27,5 @@ #ifndef _KBASE_BACKEND_CONFIG_H_ #define _KBASE_BACKEND_CONFIG_H_ -/* Enable GPU reset API */ -#define KBASE_GPU_RESET_EN 1 - #endif /* _KBASE_BACKEND_CONFIG_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index 683a24c..5ade012 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -283,8 +283,11 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) real_freq = opp_freq; if (of_property_read_u64(node, "opp-core-mask", &core_mask)) core_mask = shader_present; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) && - core_mask != shader_present) { + if (core_mask != shader_present && + (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) || + corestack_driver_control || + platform_power_down_only)) { + dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", opp_freq); continue; diff --git a/mali_kbase/backend/gpu/mali_kbase_device_hw.c b/mali_kbase/backend/gpu/mali_kbase_device_hw.c index ebc3022..5dd059f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_device_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_device_hw.c @@ -29,6 +29,7 @@ #include <backend/gpu/mali_kbase_pm_internal.h> #include <backend/gpu/mali_kbase_device_internal.h> +#include <mali_kbase_config_defaults.h> #if !defined(CONFIG_MALI_NO_MALI) @@ -220,6 +221,84 @@ static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); } +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) +{ + u32 irq_mask; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->cache_clean_in_progress) { + /* If this is called while another clean is in progress, we + * can't rely on the current one to flush any new changes in + * the cache. Instead, trigger another cache clean immediately + * after this one finishes. + */ + kbdev->cache_clean_queued = true; + return; + } + + /* Enable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask | CLEAN_CACHES_COMPLETED); + + KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES); + + kbdev->cache_clean_in_progress = true; +} + +void kbase_gpu_start_cache_clean(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_gpu_start_cache_clean_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static void kbase_clean_caches_done(struct kbase_device *kbdev) +{ + u32 irq_mask; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->cache_clean_queued) { + kbdev->cache_clean_queued = false; + + KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES); + } else { + /* Disable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~CLEAN_CACHES_COMPLETED); + + kbdev->cache_clean_in_progress = false; + + wake_up(&kbdev->cache_clean_wait); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + while (kbdev->cache_clean_in_progress) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + wait_event_interruptible(kbdev->cache_clean_wait, + !kbdev->cache_clean_in_progress); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) { KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val); @@ -232,18 +311,29 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) if (val & PRFCNT_SAMPLE_COMPLETED) kbase_instr_hwcnt_sample_done(kbdev); - if (val & CLEAN_CACHES_COMPLETED) - kbase_clean_caches_done(kbdev); - KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); - /* kbase_pm_check_transitions must be called after the IRQ has been - * cleared. This is because it might trigger further power transitions - * and we don't want to miss the interrupt raised to notify us that - * these further transitions have finished. + /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must + * be called after the IRQ has been cleared. This is because it might + * trigger further power transitions and we don't want to miss the + * interrupt raised to notify us that these further transitions have + * finished. The same applies to kbase_clean_caches_done() - if another + * clean was queued, it might trigger another clean, which might + * generate another interrupt which shouldn't be missed. + */ + + if (val & CLEAN_CACHES_COMPLETED) + kbase_clean_caches_done(kbdev); + + /* When 'platform_power_down_only' is enabled, the L2 cache is not + * powered down, but flushed before the GPU power down (which is done + * by the platform code). So the L2 state machine requests a cache + * flush. And when that flush completes, the L2 state machine needs to + * be re-invoked to proceed with the GPU power down. */ - if (val & POWER_CHANGED_ALL) + if (val & POWER_CHANGED_ALL || + (platform_power_down_only && (val & CLEAN_CACHES_COMPLETED))) kbase_pm_power_changed(kbdev); KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val); diff --git a/mali_kbase/backend/gpu/mali_kbase_device_internal.h b/mali_kbase/backend/gpu/mali_kbase_device_internal.h index 928efe9..7886e96 100644 --- a/mali_kbase/backend/gpu/mali_kbase_device_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_device_internal.h @@ -50,6 +50,31 @@ void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); */ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); +/** + * kbase_gpu_start_cache_clean - Start a cache clean + * @kbdev: Kbase device + * + * Issue a cache clean and invalidate command to hardware. This function will + * take hwaccess_lock. + */ +void kbase_gpu_start_cache_clean(struct kbase_device *kbdev); + +/** + * kbase_gpu_start_cache_clean_nolock - Start a cache clean + * @kbdev: Kbase device + * + * Issue a cache clean and invalidate command to hardware. hwaccess_lock + * must be held by the caller. + */ +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev); + +/** + * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish + * @kbdev: Kbase device + * + * This function will take hwaccess_lock, and may sleep. + */ +void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); /** * kbase_gpu_interrupt - GPU interrupt handler diff --git a/mali_kbase/backend/gpu/mali_kbase_gpu.c b/mali_kbase/backend/gpu/mali_kbase_gpu.c index 881d50c..995d34d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_gpu.c +++ b/mali_kbase/backend/gpu/mali_kbase_gpu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -56,7 +56,7 @@ int kbase_backend_early_init(struct kbase_device *kbdev) if (err) goto fail_interrupts; - err = kbase_hwaccess_pm_init(kbdev); + err = kbase_hwaccess_pm_early_init(kbdev); if (err) goto fail_pm; @@ -74,7 +74,7 @@ fail_runtime_pm: void kbase_backend_early_term(struct kbase_device *kbdev) { - kbase_hwaccess_pm_term(kbdev); + kbase_hwaccess_pm_early_term(kbdev); kbase_release_interrupts(kbdev); kbase_pm_runtime_term(kbdev); kbasep_platform_device_term(kbdev); @@ -84,10 +84,14 @@ int kbase_backend_late_init(struct kbase_device *kbdev) { int err; - err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); + err = kbase_hwaccess_pm_late_init(kbdev); if (err) return err; + err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); + if (err) + goto fail_pm_powerup; + err = kbase_backend_timer_init(kbdev); if (err) goto fail_timer; @@ -121,6 +125,8 @@ fail_interrupt_test: kbase_backend_timer_term(kbdev); fail_timer: kbase_hwaccess_pm_halt(kbdev); +fail_pm_powerup: + kbase_hwaccess_pm_late_term(kbdev); return err; } @@ -131,5 +137,5 @@ void kbase_backend_late_term(struct kbase_device *kbdev) kbase_job_slot_term(kbdev); kbase_backend_timer_term(kbdev); kbase_hwaccess_pm_halt(kbdev); + kbase_hwaccess_pm_late_term(kbdev); } - diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index 6c69132..79c04d9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -33,49 +33,17 @@ #include <backend/gpu/mali_kbase_pm_internal.h> #include <backend/gpu/mali_kbase_instr_internal.h> -/** - * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to - * hardware - * - * @kbdev: Kbase device - */ -static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) -{ - unsigned long flags; - unsigned long pm_flags; - u32 irq_mask; - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_REQUEST_CLEAN); - - /* Enable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask | CLEAN_CACHES_COMPLETED); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - - /* clean&invalidate the caches so we're sure the mmu tables for the dump - * buffer is valid */ - KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAN_INV_CACHES); - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING; - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -} - int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbase_ioctl_hwcnt_enable *enable) + struct kbase_instr_hwcnt_enable *enable) { - unsigned long flags, pm_flags; + unsigned long flags; int err = -EINVAL; u32 irq_mask; - int ret; u32 prfcnt_config; + lockdep_assert_held(&kbdev->hwaccess_lock); + /* alignment failure */ if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) goto out_err; @@ -84,53 +52,30 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, */ kbase_pm_ca_instr_enable(kbdev); - /* Request the cores early on synchronously - we'll release them on any - * errors (e.g. instrumentation already active) */ - kbase_pm_request_cores_sync(kbdev, true, true); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is already enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - goto out_unrequest_cores; + goto out_err; } /* Enable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | PRFCNT_SAMPLE_COMPLETED); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* In use, this context is the owner */ kbdev->hwcnt.kctx = kctx; /* Remember the dump address so we can reprogram it later */ kbdev->hwcnt.addr = enable->dump_buffer; - - /* Request the clean */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; - kbdev->hwcnt.backend.triggered = 0; - /* Clean&invalidate the caches so we're sure the mmu tables for the dump - * buffer is valid */ - ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, - &kbdev->hwcnt.backend.cache_clean_work); - KBASE_DEBUG_ASSERT(ret); + kbdev->hwcnt.addr_bytes = enable->dump_buffer_bytes; spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - /* Wait for cacheclean to complete */ - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - - KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_IDLE); - - kbase_pm_request_l2_caches(kbdev); - /* Configure */ prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + if (enable->use_secondary) { u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) @@ -140,7 +85,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, if (arch_v6) prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; } -#endif kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), prfcnt_config | PRFCNT_CONFIG_MODE_OFF); @@ -184,10 +128,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); return err; - out_unrequest_cores: - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_release_cores(kbdev, true, true); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); out_err: return err; } @@ -200,17 +140,20 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) struct kbase_device *kbdev = kctx->kbdev; while (1) { + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is not enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); goto out; } if (kbdev->hwcnt.kctx != kctx) { /* Instrumentation has been setup for another context */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); goto out; } @@ -218,6 +161,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) break; spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* Ongoing dump/setup - wait for its completion */ wait_event(kbdev->hwcnt.backend.wait, @@ -228,7 +172,6 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) kbdev->hwcnt.backend.triggered = 0; /* Disable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED); @@ -238,15 +181,12 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) kbdev->hwcnt.kctx = NULL; kbdev->hwcnt.addr = 0ULL; + kbdev->hwcnt.addr_bytes = 0ULL; kbase_pm_ca_instr_disable(kbdev); - kbase_pm_release_cores(kbdev, true, true); - - kbase_pm_release_l2_caches(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); @@ -331,33 +271,34 @@ KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); void kbasep_cache_clean_worker(struct work_struct *data) { struct kbase_device *kbdev; - unsigned long flags; + unsigned long flags, pm_flags; kbdev = container_of(data, struct kbase_device, hwcnt.backend.cache_clean_work); - mutex_lock(&kbdev->cacheclean_lock); - kbasep_instr_hwcnt_cacheclean(kbdev); - + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - /* Wait for our condition, and any reset to complete */ - while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.cache_clean_wait, - kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_CLEANING); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } + + /* Clean and invalidate the caches so we're sure the mmu tables for the + * dump buffer is valid. + */ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_CLEANED); + KBASE_INSTR_STATE_REQUEST_CLEAN); + kbase_gpu_start_cache_clean_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + + kbase_gpu_wait_cache_clean(kbdev); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_REQUEST_CLEAN); /* All finished and idle */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - mutex_unlock(&kbdev->cacheclean_lock); } void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) @@ -389,40 +330,13 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } -void kbase_clean_caches_done(struct kbase_device *kbdev) -{ - u32 irq_mask; - - if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { - unsigned long flags; - unsigned long pm_flags; - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - /* Disable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask & ~CLEAN_CACHES_COMPLETED); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - - /* Wakeup... */ - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { - /* Only wake if we weren't resetting */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; - wake_up(&kbdev->hwcnt.backend.cache_clean_wait); - } - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - } -} - int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; unsigned long flags; int err; - /* Wait for dump & cacheclean to complete */ + /* Wait for dump & cache clean to complete */ wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); @@ -477,7 +391,6 @@ int kbase_instr_backend_init(struct kbase_device *kbdev) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; init_waitqueue_head(&kbdev->hwcnt.backend.wait); - init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait); INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, kbasep_cache_clean_worker); kbdev->hwcnt.backend.triggered = 0; @@ -494,4 +407,3 @@ void kbase_instr_backend_term(struct kbase_device *kbdev) { destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); } - diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h index fb55d2d..c9fb759 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,11 +39,6 @@ enum kbase_instr_state { KBASE_INSTR_STATE_DUMPING, /* We've requested a clean to occur on a workqueue */ KBASE_INSTR_STATE_REQUEST_CLEAN, - /* Hardware is currently cleaning and invalidating caches. */ - KBASE_INSTR_STATE_CLEANING, - /* Cache clean completed, and either a) a dump is complete, or - * b) instrumentation can now be setup. */ - KBASE_INSTR_STATE_CLEANED, /* An error has occured during DUMPING (page fault). */ KBASE_INSTR_STATE_FAULT }; @@ -54,7 +49,6 @@ struct kbase_instr_backend { int triggered; enum kbase_instr_state state; - wait_queue_head_t cache_clean_wait; struct workqueue_struct *cache_clean_wq; struct work_struct cache_clean_work; }; diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_internal.h b/mali_kbase/backend/gpu/mali_kbase_instr_internal.h index 608379e..2254b9f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_instr_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,12 +36,6 @@ void kbasep_cache_clean_worker(struct work_struct *data); /** - * kbase_clean_caches_done() - Cache clean interrupt received - * @kbdev: Kbase device - */ -void kbase_clean_caches_done(struct kbase_device *kbdev); - -/** * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received * @kbdev: Kbase device */ diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index fee19aa..acd4a5a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -31,10 +31,10 @@ #include <mali_kbase_gator.h> #endif #include <mali_kbase_tlstream.h> -#include <mali_kbase_vinstr.h> #include <mali_kbase_hw.h> #include <mali_kbase_hwaccess_jm.h> #include <mali_kbase_ctx_sched.h> +#include <mali_kbase_hwcnt_context.h> #include <backend/gpu/mali_kbase_device_internal.h> #include <backend/gpu/mali_kbase_irq_internal.h> #include <backend/gpu/mali_kbase_jm_internal.h> @@ -42,11 +42,9 @@ #define beenthere(kctx, f, a...) \ dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) -#if KBASE_GPU_RESET_EN static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev); static void kbasep_reset_timeout_worker(struct work_struct *data); static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer); -#endif /* KBASE_GPU_RESET_EN */ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, struct kbase_context *kctx) @@ -77,7 +75,7 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, struct mali_base_gpu_coherent_group_info *coherency_info = &kbdev->gpu_props.props.coherency_info; - affinity = kbase_pm_ca_get_core_mask(kbdev) & + affinity = kbdev->pm.backend.shaders_avail & kbdev->pm.debug_core_mask[js]; /* JS2 on a dual core group system targets core group 1. All @@ -89,7 +87,7 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, affinity &= coherency_info->group[0].core_mask; } else { /* Use all cores */ - affinity = kbase_pm_ca_get_core_mask(kbdev) & + affinity = kbdev->pm.backend.shaders_avail & kbdev->pm.debug_core_mask[js]; } @@ -141,6 +139,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; + else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) + cfg |= JS_CONFIG_END_FLUSH_CLEAN; else cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; @@ -465,7 +465,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -#if KBASE_GPU_RESET_EN if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_COMMITTED) { /* If we're trying to reset the GPU then we might be able to do @@ -474,7 +473,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) */ kbasep_try_reset_gpu_early(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count); } KBASE_EXPORT_TEST_API(kbase_job_done); @@ -800,7 +798,6 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) if (timeout != 0) goto exit; -#if KBASE_GPU_RESET_EN if (kbase_prepare_to_reset_gpu(kbdev)) { dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", @@ -812,12 +809,6 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) wait_event(kbdev->hwaccess.backend.reset_wait, atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING); -#else - dev_warn(kbdev->dev, - "Jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", - ZAP_TIMEOUT); - -#endif exit: dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); @@ -845,7 +836,6 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) int kbase_job_slot_init(struct kbase_device *kbdev) { -#if KBASE_GPU_RESET_EN kbdev->hwaccess.backend.reset_workq = alloc_workqueue( "Mali reset workqueue", 0, 1); if (NULL == kbdev->hwaccess.backend.reset_workq) @@ -858,7 +848,6 @@ int kbase_job_slot_init(struct kbase_device *kbdev) HRTIMER_MODE_REL); kbdev->hwaccess.backend.reset_timer.function = kbasep_reset_timer_callback; -#endif return 0; } @@ -871,13 +860,10 @@ void kbase_job_slot_halt(struct kbase_device *kbdev) void kbase_job_slot_term(struct kbase_device *kbdev) { -#if KBASE_GPU_RESET_EN destroy_workqueue(kbdev->hwaccess.backend.reset_workq); -#endif } KBASE_EXPORT_TEST_API(kbase_job_slot_term); -#if KBASE_GPU_RESET_EN /** * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot * @kbdev: kbase device pointer @@ -935,7 +921,6 @@ static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev, return ret; } -#endif /* KBASE_GPU_RESET_EN */ /** * kbase_job_slot_softstop_swflags - Soft-stop a job with flags @@ -992,7 +977,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, { struct kbase_device *kbdev = kctx->kbdev; bool stopped; -#if KBASE_GPU_RESET_EN /* We make the check for AFBC before evicting/stopping atoms. Note * that no other thread can modify the slots whilst we have the * hwaccess_lock. */ @@ -1000,12 +984,10 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542) && kbasep_check_for_afbc_on_slot(kbdev, kctx, js, target_katom); -#endif stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, target_katom, JS_COMMAND_HARD_STOP); -#if KBASE_GPU_RESET_EN if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) || kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) || needs_workaround_for_afbc)) { @@ -1020,7 +1002,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, kbase_reset_gpu_locked(kbdev); } } -#endif } /** @@ -1085,8 +1066,6 @@ void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, } } - -#if KBASE_GPU_RESET_EN static void kbase_debug_dump_registers(struct kbase_device *kbdev) { int i; @@ -1129,7 +1108,6 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) struct kbase_device *kbdev; ktime_t end_timestamp = ktime_get(); struct kbasep_js_device_data *js_devdata; - bool try_schedule = false; bool silent = false; u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; @@ -1147,9 +1125,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); - /* Suspend vinstr. - * This call will block until vinstr is suspended. */ - kbase_vinstr_suspend(kbdev->vinstr_ctx); + /* Disable GPU hardware counters. + * This call will block until counters are disabled. + */ + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); /* Make sure the timer has completed - this cannot be done from * interrupt context, so this cannot be done within @@ -1164,15 +1143,18 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) KBASE_RESET_GPU_NOT_PENDING); kbase_disjoint_state_down(kbdev); wake_up(&kbdev->hwaccess.backend.reset_wait); - kbase_vinstr_resume(kbdev->vinstr_ctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return; } KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - spin_lock(&kbdev->hwaccess_lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); spin_lock(&kbdev->mmu_mask_change); + kbase_pm_reset_start_locked(kbdev); + /* We're about to flush out the IRQs and their bottom half's */ kbdev->irq_reset_flush = true; @@ -1181,8 +1163,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_pm_disable_interrupts_nolock(kbdev); spin_unlock(&kbdev->mmu_mask_change); - spin_unlock(&kbdev->hwaccess_lock); - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* Ensure that any IRQ handlers have finished * Must be done without any locks IRQ handlers will take */ @@ -1244,37 +1225,33 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_pm_enable_interrupts(kbdev); - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); - kbase_disjoint_state_down(kbdev); - wake_up(&kbdev->hwaccess.backend.reset_wait); - if (!silent) - dev_err(kbdev->dev, "Reset complete"); - - if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending) - try_schedule = true; - mutex_unlock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); + kbase_pm_reset_complete(kbdev); + /* Find out what cores are required now */ kbase_pm_update_cores_state(kbdev); /* Synchronously request and wait for those cores, because if * instrumentation is enabled it would need them immediately. */ - kbase_pm_check_transitions_sync(kbdev); + kbase_pm_wait_for_desired_state(kbdev); mutex_unlock(&kbdev->pm.lock); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + + wake_up(&kbdev->hwaccess.backend.reset_wait); + if (!silent) + dev_err(kbdev->dev, "Reset complete"); + /* Try submitting some jobs to restart processing */ - if (try_schedule) { - KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, - 0); - kbase_js_sched_all(kbdev); - } + KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0); + kbase_js_sched_all(kbdev); /* Process any pending slot updates */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1283,8 +1260,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_pm_context_idle(kbdev); - /* Release vinstr */ - kbase_vinstr_resume(kbdev->vinstr_ctx); + /* Re-enable GPU hardware counters */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); } @@ -1458,20 +1437,22 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) kbasep_try_reset_gpu_early_locked(kbdev); } -void kbase_reset_gpu_silent(struct kbase_device *kbdev) +int kbase_reset_gpu_silent(struct kbase_device *kbdev) { if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING, KBASE_RESET_GPU_SILENT) != KBASE_RESET_GPU_NOT_PENDING) { /* Some other thread is already resetting the GPU */ - return; + return -EAGAIN; } kbase_disjoint_state_up(kbdev); queue_work(kbdev->hwaccess.backend.reset_workq, &kbdev->hwaccess.backend.reset_work); + + return 0; } bool kbase_reset_gpu_active(struct kbase_device *kbdev) @@ -1482,4 +1463,3 @@ bool kbase_reset_gpu_active(struct kbase_device *kbdev) return true; } -#endif /* KBASE_GPU_RESET_EN */ diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index 831491e..452ddee 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -159,31 +159,11 @@ void kbase_job_slot_halt(struct kbase_device *kbdev); void kbase_job_slot_term(struct kbase_device *kbdev); /** - * kbase_gpu_cacheclean - Cause a GPU cache clean & flush + * kbase_gpu_cache_clean - Cause a GPU cache clean & flush * @kbdev: Device pointer * * Caller must not be in IRQ context */ -void kbase_gpu_cacheclean(struct kbase_device *kbdev); - -static inline bool kbase_atom_needs_tiler(struct kbase_device *kbdev, - base_jd_core_req core_req) -{ - return core_req & BASE_JD_REQ_T; -} - -static inline bool kbase_atom_needs_shaders(struct kbase_device *kbdev, - base_jd_core_req core_req) -{ - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) - return true; - if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == - BASE_JD_REQ_T) { - /* Tiler only atom */ - return false; - } - - return true; -} +void kbase_gpu_cache_clean(struct kbase_device *kbdev); #endif /* _KBASE_JM_HWACCESS_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index bdb94be..c714582 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -30,6 +30,7 @@ #include <mali_kbase_jm.h> #include <mali_kbase_js.h> #include <mali_kbase_tlstream.h> +#include <mali_kbase_hwcnt_context.h> #include <mali_kbase_10969_workaround.h> #include <backend/gpu/mali_kbase_cache_policy_backend.h> #include <backend/gpu/mali_kbase_device_internal.h> @@ -296,143 +297,14 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, int js) } -static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - -static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev, - int js, - struct kbase_jd_atom *katom) -{ - base_jd_core_req core_req = katom->core_req; - - /* NOTE: The following uses a number of FALLTHROUGHs to optimize the - * calls to this function. Ending of the function is indicated by BREAK - * OUT. - */ - switch (katom->coreref_state) { - /* State when job is first attempted to be run */ - case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: - /* Request the cores */ - kbase_pm_request_cores(kbdev, - kbase_atom_needs_tiler(kbdev, core_req), - kbase_atom_needs_shaders(kbdev, core_req)); - - /* Proceed to next state */ - katom->coreref_state = - KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: - { - bool cores_ready; - - cores_ready = kbase_pm_cores_requested(kbdev, - kbase_atom_needs_tiler(kbdev, core_req), - kbase_atom_needs_shaders(kbdev, core_req)); - - if (!cores_ready) { - /* Stay in this state and return, to retry at - * this state later. - */ - KBASE_TRACE_ADD_SLOT_INFO(kbdev, - JS_CORE_REF_REGISTER_INUSE_FAILED, - katom->kctx, katom, - katom->jc, js, - (u32) 0); - /* *** BREAK OUT: No state transition *** */ - break; - } - /* Proceed to next state */ - katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY; - /* *** BREAK OUT: Cores Ready *** */ - break; - } - - default: - KBASE_DEBUG_ASSERT_MSG(false, - "Unhandled kbase_atom_coreref_state %d", - katom->coreref_state); - break; - } - - return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY); -} - -static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) -{ - base_jd_core_req core_req = katom->core_req; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(katom != NULL); - - switch (katom->coreref_state) { - case KBASE_ATOM_COREREF_STATE_READY: - /* State where atom was submitted to the HW - just proceed to - * power-down */ - - /* *** FALLTHROUGH *** */ - - case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: - /* State where cores were requested */ - kbase_pm_release_cores(kbdev, - kbase_atom_needs_tiler(kbdev, core_req), - kbase_atom_needs_shaders(kbdev, core_req)); - break; - - case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: - /* Initial state - nothing required */ - break; - - default: - KBASE_DEBUG_ASSERT_MSG(false, - "Unhandled coreref_state: %d", - katom->coreref_state); - break; - } - - katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; -} - -static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev, - base_jd_core_req core_req, - enum kbase_atom_coreref_state coreref_state) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - switch (coreref_state) { - case KBASE_ATOM_COREREF_STATE_READY: - /* State where atom was submitted to the HW - just proceed to - * power-down */ - - /* *** FALLTHROUGH *** */ - - case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: - /* State where cores were requested */ - kbase_pm_release_cores(kbdev, - kbase_atom_needs_tiler(kbdev, core_req), - kbase_atom_needs_shaders(kbdev, core_req)); - break; - - case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: - /* Initial state - nothing required */ - break; - - default: - KBASE_DEBUG_ASSERT_MSG(false, - "Unhandled coreref_state: %d", - coreref_state); - break; - } -} - static void kbase_gpu_release_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom, ktime_t *end_timestamp) { struct kbase_context *kctx = katom->kctx; + lockdep_assert_held(&kbdev->hwaccess_lock); + switch (katom->gpu_rb_state) { case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: /* Should be impossible */ @@ -468,26 +340,47 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, break; case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + if (kbase_jd_katom_is_protected(katom) && + (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_CHECK) && + (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_HWCNT)) + kbase_pm_protected_override_disable(kbdev); + if (!kbase_jd_katom_is_protected(katom) && + (katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_CHECK) && + (katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) + kbase_pm_protected_override_disable(kbdev); + if (katom->protected_state.enter != KBASE_ATOM_ENTER_PROTECTED_CHECK || katom->protected_state.exit != KBASE_ATOM_EXIT_PROTECTED_CHECK) kbdev->protected_mode_transition = false; - + /* If the atom has suspended hwcnt but has not yet entered + * protected mode, then resume hwcnt now. If the GPU is now in + * protected mode then hwcnt will be resumed by GPU reset so + * don't resume it here. + */ if (kbase_jd_katom_is_protected(katom) && ((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || (katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) || - (katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_FINISHED))) { - kbase_vinstr_resume(kbdev->vinstr_ctx); + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) { + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } } if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { if (katom->atom_flags & KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { - kbdev->l2_users_count--; + kbase_pm_protected_l2_override(kbdev, false); katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; } @@ -512,6 +405,8 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { + lockdep_assert_held(&kbdev->hwaccess_lock); + kbase_gpu_release_atom(kbdev, katom, NULL); katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; } @@ -630,9 +525,7 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) /* The protected mode disable callback will be called as part of reset */ - kbase_reset_gpu_silent(kbdev); - - return 0; + return kbase_reset_gpu_silent(kbdev); } static int kbase_jm_protected_entry(struct kbase_device *kbdev, @@ -640,6 +533,8 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, { int err = 0; + lockdep_assert_held(&kbdev->hwaccess_lock); + err = kbase_gpu_protected_mode_enter(kbdev); /* @@ -648,14 +543,23 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, */ kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev); if (err) { /* * Failed to switch into protected mode, resume - * vinstr core and fail atom. + * GPU hwcnt and fail atom. */ - kbase_vinstr_resume(kbdev->vinstr_ctx); + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); /* @@ -692,6 +596,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, { int err = 0; + lockdep_assert_held(&kbdev->hwaccess_lock); + switch (katom[idx]->protected_state.enter) { case KBASE_ATOM_ENTER_PROTECTED_CHECK: KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev); @@ -700,25 +606,41 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, * there are no atoms currently on the GPU. */ WARN_ON(kbdev->protected_mode_transition); WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + /* If hwcnt is disabled, it means we didn't clean up correctly + * during last exit from protected mode. + */ + WARN_ON(kbdev->protected_mode_hwcnt_disabled); - kbdev->protected_mode_transition = true; katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_VINSTR; + KBASE_ATOM_ENTER_PROTECTED_HWCNT; + + kbdev->protected_mode_transition = true; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_ENTER_PROTECTED_VINSTR: - if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { - /* - * We can't switch now because - * the vinstr core state switch - * is not done yet. - */ + case KBASE_ATOM_ENTER_PROTECTED_HWCNT: + /* See if we can get away with disabling hwcnt atomically */ + kbdev->protected_mode_hwcnt_desired = false; + if (!kbdev->protected_mode_hwcnt_disabled) { + if (kbase_hwcnt_context_disable_atomic( + kbdev->hwcnt_gpu_ctx)) + kbdev->protected_mode_hwcnt_disabled = true; + } + + /* We couldn't disable atomically, so kick off a worker */ + if (!kbdev->protected_mode_hwcnt_disabled) { +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, + &kbdev->protected_mode_hwcnt_disable_work); +#else + queue_work(system_highpri_wq, + &kbdev->protected_mode_hwcnt_disable_work); +#endif return -EAGAIN; } /* Once reaching this point GPU must be - * switched to protected mode or vinstr + * switched to protected mode or hwcnt * re-enabled. */ /* @@ -729,6 +651,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + kbase_pm_protected_override_enable(kbdev); kbase_pm_update_cores_state_nolock(kbdev); /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ @@ -764,7 +687,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, * Power on L2 caches; this will also result in the * correct value written to coherency enable register. */ - kbase_pm_request_l2_caches_nolock(kbdev); + kbase_pm_protected_l2_override(kbdev, true); + /* * Set the flag on the atom that additional * L2 references are taken. @@ -787,14 +711,15 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, * Check that L2 caches are powered and, if so, * enter protected mode. */ - if (kbdev->pm.backend.l2_powered != 0) { + if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { /* * Remove additional L2 reference and reset * the atom flag which denotes it. */ if (katom[idx]->atom_flags & KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { - kbdev->l2_users_count--; + kbase_pm_protected_l2_override(kbdev, + false); katom[idx]->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; } @@ -825,6 +750,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, { int err = 0; + lockdep_assert_held(&kbdev->hwaccess_lock); switch (katom[idx]->protected_state.exit) { case KBASE_ATOM_EXIT_PROTECTED_CHECK: @@ -844,6 +770,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; kbdev->protected_mode_transition = true; + kbase_pm_protected_override_enable(kbdev); kbase_pm_update_cores_state_nolock(kbdev); /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ @@ -865,8 +792,12 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, /* Issue the reset to the GPU */ err = kbase_gpu_protected_mode_reset(kbdev); + if (err == -EAGAIN) + return -EAGAIN; + if (err) { kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); /* Failed to exit protected mode, fail atom */ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; @@ -880,7 +811,16 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, kbase_jm_return_atom_to_js(kbdev, katom[idx]); } - kbase_vinstr_resume(kbdev->vinstr_ctx); + /* If we're exiting from protected mode, hwcnt must have + * been disabled during entry. + */ + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } return -EINVAL; } @@ -909,6 +849,9 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); + if (kbase_reset_gpu_active(kbdev)) + return; + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { struct kbase_jd_atom *katom[2]; int idx; @@ -1014,9 +957,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) break; } - cores_ready = - kbasep_js_job_check_ref_cores(kbdev, js, - katom[idx]); + cores_ready = kbase_pm_cores_requested(kbdev, + true); if (katom[idx]->event_code == BASE_JD_EVENT_PM_EVENT) { @@ -1204,19 +1146,11 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, * corruption we need to flush the cache manually before any * affected memory gets reused. */ katom->need_cache_flush_cores_retained = true; - kbase_pm_request_cores(kbdev, - kbase_atom_needs_tiler(kbdev, katom->core_req), - kbase_atom_needs_shaders(kbdev, - katom->core_req)); } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) { if (kbdev->gpu_props.num_core_groups > 1 && katom->device_nr >= 1) { dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n"); katom->need_cache_flush_cores_retained = true; - kbase_pm_request_cores(kbdev, - kbase_atom_needs_tiler(kbdev, katom->core_req), - kbase_atom_needs_shaders(kbdev, - katom->core_req)); } } @@ -1408,10 +1342,6 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) break; if (katom->protected_state.exit == KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { - KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev); - - kbase_vinstr_resume(kbdev->vinstr_ctx); - /* protected mode sanity checks */ KBASE_DEBUG_ASSERT_MSG( kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), @@ -1434,8 +1364,6 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) * it will be processed again from the starting state. */ if (keep_in_jm_rb) { - kbasep_js_job_check_deref_cores(kbdev, katom); - katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; /* As the atom was not removed, increment the * index so that we read the correct atom in the @@ -1454,7 +1382,19 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) } } + /* Re-enable GPU hardware counters if we're resetting from protected + * mode. + */ + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + + KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev); + } + kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); } static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, @@ -1475,6 +1415,8 @@ static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, u32 action, bool disjoint) { + lockdep_assert_held(&kbdev->hwaccess_lock); + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; kbase_gpu_mark_atom_for_return(kbdev, katom); katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true; @@ -1698,52 +1640,13 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, return ret; } -void kbase_gpu_cacheclean(struct kbase_device *kbdev) -{ - /* Limit the number of loops to avoid a hang if the interrupt is missed - */ - u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; - - mutex_lock(&kbdev->cacheclean_lock); - - /* use GPU_COMMAND completion solution */ - /* clean & invalidate the caches */ - KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAN_INV_CACHES); - - /* wait for cache flush to complete before continuing */ - while (--max_loops && - (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & - CLEAN_CACHES_COMPLETED) == 0) - ; - - /* clear the CLEAN_CACHES_COMPLETED irq */ - KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, - CLEAN_CACHES_COMPLETED); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), - CLEAN_CACHES_COMPLETED); - KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_CLEANING, - "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang."); - - mutex_unlock(&kbdev->cacheclean_lock); -} - -void kbase_backend_cacheclean(struct kbase_device *kbdev, +void kbase_backend_cache_clean(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { if (katom->need_cache_flush_cores_retained) { - unsigned long flags; - - kbase_gpu_cacheclean(kbdev); + kbase_gpu_start_cache_clean(kbdev); + kbase_gpu_wait_cache_clean(kbdev); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_release_cores(kbdev, - kbase_atom_needs_tiler(kbdev, katom->core_req), - kbase_atom_needs_shaders(kbdev, - katom->core_req)); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); katom->need_cache_flush_cores_retained = false; } } @@ -1755,7 +1658,7 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, * If cache flush required due to HW workaround then perform the flush * now */ - kbase_backend_cacheclean(kbdev, katom); + kbase_backend_cache_clean(kbdev, katom); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969) && (katom->core_req & BASE_JD_REQ_FS) && @@ -1774,24 +1677,11 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN; } } - - /* Clear the coreref_state now - while check_deref_cores() may not have - * been called yet, the caller will have taken a copy of this field. If - * this is not done, then if the atom is re-scheduled (following a soft - * stop) then the core reference would not be retaken. */ - katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; } void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, - base_jd_core_req core_req, - enum kbase_atom_coreref_state coreref_state) + base_jd_core_req core_req) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, coreref_state); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (!kbdev->pm.active_count) { mutex_lock(&kbdev->js_data.runpool_mutex); mutex_lock(&kbdev->pm.lock); @@ -1830,6 +1720,3 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } - - - diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c index 205a31d..7307be4 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c @@ -250,14 +250,12 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) } } } -#if KBASE_GPU_RESET_EN if (reset_needed) { dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve."); if (kbase_prepare_to_reset_gpu_locked(kbdev)) kbase_reset_gpu_locked(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ /* the timer is re-issued if there is contexts in the run-pool */ if (backend->timer_running) diff --git a/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c b/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c index f3487d9..ba5bf72 100644 --- a/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -242,16 +242,20 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) { struct kbase_mmu_setup *current_setup = &as->current_setup; - u32 transcfg = 0; + u64 transcfg = 0; if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - transcfg = current_setup->transcfg & 0xFFFFFFFFUL; + transcfg = current_setup->transcfg; /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ /* Clear PTW_MEMATTR bits */ transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; /* Enable correct PTW_MEMATTR bits */ transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + /* Ensure page-tables reads use read-allocate cache-policy in + * the L2 + */ + transcfg |= AS_TRANSCFG_R_ALLOCATE; if (kbdev->system_coherency == COHERENCY_ACE) { /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */ @@ -264,7 +268,7 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), transcfg); kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), - (current_setup->transcfg >> 32) & 0xFFFFFFFFUL); + (transcfg >> 32) & 0xFFFFFFFFUL); } else { if (kbdev->system_coherency == COHERENCY_ACE) current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index a448a3b..c19a0d1 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -31,11 +31,13 @@ #include <mali_kbase_pm.h> #include <mali_kbase_hwaccess_jm.h> +#include <mali_kbase_hwcnt_context.h> #include <backend/gpu/mali_kbase_js_internal.h> #include <backend/gpu/mali_kbase_pm_internal.h> #include <backend/gpu/mali_kbase_jm_internal.h> static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); +static void kbase_pm_hwcnt_disable_worker(struct work_struct *data); int kbase_pm_runtime_init(struct kbase_device *kbdev) { @@ -112,7 +114,7 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev) kbdev->pm.backend.gpu_powered = false; } -int kbase_hwaccess_pm_init(struct kbase_device *kbdev) +int kbase_hwaccess_pm_early_init(struct kbase_device *kbdev) { int ret = 0; @@ -128,12 +130,12 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, kbase_pm_gpu_poweroff_wait_wq); + kbdev->pm.backend.ca_cores_enabled = ~0ull; kbdev->pm.backend.gpu_powered = false; kbdev->pm.suspending = false; #ifdef CONFIG_MALI_DEBUG kbdev->pm.backend.driver_ready_for_irqs = false; #endif /* CONFIG_MALI_DEBUG */ - kbdev->pm.backend.gpu_in_desired_state = true; init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); /* Initialise the metrics subsystem */ @@ -141,9 +143,6 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) if (ret) return ret; - init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait); - kbdev->pm.backend.l2_powered = 0; - init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); kbdev->pm.backend.reset_done = false; @@ -161,8 +160,13 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) if (kbase_pm_policy_init(kbdev) != 0) goto pm_policy_fail; + if (kbase_pm_state_machine_init(kbdev) != 0) + goto pm_state_machine_fail; + return 0; +pm_state_machine_fail: + kbase_pm_policy_term(kbdev); pm_policy_fail: kbase_pm_ca_term(kbdev); workq_fail: @@ -170,6 +174,19 @@ workq_fail: return -EINVAL; } +int kbase_hwaccess_pm_late_init(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbdev->pm.backend.hwcnt_desired = false; + kbdev->pm.backend.hwcnt_disabled = true; + INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, + kbase_pm_hwcnt_disable_worker); + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + return 0; +} + void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) { lockdep_assert_held(&kbdev->pm.lock); @@ -178,6 +195,17 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) * kbase_pm_clock_off() */ kbase_pm_clock_on(kbdev, is_resume); + if (!is_resume) { + unsigned long flags; + + /* Force update of L2 state - if we have abandoned a power off + * then this may be required to power the L2 back on. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + /* Update core status as required by the policy */ kbase_pm_update_cores_state(kbdev); @@ -194,36 +222,24 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) struct kbasep_js_device_data *js_devdata = &kbdev->js_data; unsigned long flags; -#if !PLATFORM_POWER_DOWN_ONLY - /* Wait for power transitions to complete. We do this with no locks held - * so that we don't deadlock with any pending workqueues */ - kbase_pm_check_transitions_sync(kbdev); -#endif /* !PLATFORM_POWER_DOWN_ONLY */ + if (!platform_power_down_only) + /* Wait for power transitions to complete. We do this with no locks held + * so that we don't deadlock with any pending workqueues. + */ + kbase_pm_wait_for_desired_state(kbdev); mutex_lock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); -#if PLATFORM_POWER_DOWN_ONLY - if (kbdev->pm.backend.gpu_powered) { - if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) { - /* If L2 cache is powered then we must flush it before - * we power off the GPU. Normally this would have been - * handled when the L2 was powered off. */ - kbase_gpu_cacheclean(kbdev); - } - } -#endif /* PLATFORM_POWER_DOWN_ONLY */ - if (!backend->poweron_required) { -#if !PLATFORM_POWER_DOWN_ONLY - unsigned long flags; + if (!platform_power_down_only) { + unsigned long flags; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - WARN_ON(kbdev->l2_available_bitmap || - kbdev->shader_available_bitmap || - kbdev->tiler_available_bitmap); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -#endif /* !PLATFORM_POWER_DOWN_ONLY */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON(backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF || + backend->l2_state != KBASE_L2_OFF); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } /* Disable interrupts and turn the clock off */ if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) { @@ -256,6 +272,8 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) backend->poweroff_wait_in_progress = false; if (backend->poweron_required) { backend->poweron_required = false; + kbdev->pm.backend.l2_desired = true; + kbase_pm_update_state(kbdev); kbase_pm_update_cores_state_nolock(kbdev); kbase_backend_slot_update(kbdev); } @@ -267,6 +285,45 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) wake_up(&kbdev->pm.backend.poweroff_wait); } +static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + pm.backend.hwcnt_disable_work); + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + + bool do_disable; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!do_disable) + return; + + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; + + if (do_disable) { + /* PM state did not change while we were doing the disable, + * so commit the work we just performed and continue the state + * machine. + */ + backend->hwcnt_disabled = true; + kbase_pm_update_state(kbdev); + } else { + /* PM state was updated while we were doing the disable, + * so we need to undo the disable we just performed. + */ + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) { unsigned long flags; @@ -274,29 +331,36 @@ void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) lockdep_assert_held(&kbdev->pm.lock); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (!kbdev->pm.backend.poweroff_wait_in_progress) { - /* Force all cores off */ - kbdev->pm.backend.desired_shader_state = 0; - kbdev->pm.backend.desired_tiler_state = 0; - - /* Force all cores to be unavailable, in the situation where - * transitions are in progress for some cores but not others, - * and kbase_pm_check_transitions_nolock can not immediately - * power off the cores */ - kbdev->shader_available_bitmap = 0; - kbdev->tiler_available_bitmap = 0; - kbdev->l2_available_bitmap = 0; - - kbdev->pm.backend.poweroff_wait_in_progress = true; - kbdev->pm.backend.poweroff_is_suspend = is_suspend; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - /*Kick off wq here. Callers will have to wait*/ - queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, - &kbdev->pm.backend.gpu_poweroff_wait_work); + spin_lock(&kbdev->pm.backend.gpu_powered_lock); + if (!kbdev->pm.backend.gpu_powered) { + spin_unlock(&kbdev->pm.backend.gpu_powered_lock); + goto unlock_hwaccess; } else { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock(&kbdev->pm.backend.gpu_powered_lock); } + + if (kbdev->pm.backend.poweroff_wait_in_progress) + goto unlock_hwaccess; + + /* Force all cores off */ + kbdev->pm.backend.shaders_desired = false; + kbdev->pm.backend.l2_desired = false; + + kbdev->pm.backend.poweroff_wait_in_progress = true; + kbdev->pm.backend.poweroff_is_suspend = is_suspend; + kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = true; + + /* l2_desired being false should cause the state machine to + * start powering off the L2. When it actually is powered off, + * the interrupt handler will call kbase_pm_l2_update_state() + * again, which will trigger the kbase_pm_gpu_poweroff_wait_wq. + * Callers of this function will need to wait on poweroff_wait. + */ + kbase_pm_update_state(kbdev); + +unlock_hwaccess: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } static bool is_poweroff_in_progress(struct kbase_device *kbdev) @@ -341,8 +405,6 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, return ret; } - kbasep_pm_init_core_use_bitmaps(kbdev); - kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = kbdev->pm.debug_core_mask[1] = kbdev->pm.debug_core_mask[2] = @@ -385,20 +447,20 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev != NULL); mutex_lock(&kbdev->pm.lock); - kbase_pm_cancel_deferred_poweroff(kbdev); kbase_pm_do_poweroff(kbdev, false); mutex_unlock(&kbdev->pm.lock); } KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); -void kbase_hwaccess_pm_term(struct kbase_device *kbdev) +void kbase_hwaccess_pm_early_term(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0); /* Free any resources the policy allocated */ + kbase_pm_state_machine_term(kbdev); kbase_pm_policy_term(kbdev); kbase_pm_ca_term(kbdev); @@ -408,16 +470,29 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev) destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); } +void kbase_hwaccess_pm_late_term(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + cancel_work_sync(&kbdev->pm.backend.hwcnt_disable_work); + + if (kbdev->pm.backend.hwcnt_disabled) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +} + void kbase_pm_power_changed(struct kbase_device *kbdev) { - bool cores_are_available; unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + kbase_pm_update_state(kbdev); - if (cores_are_available) - kbase_backend_slot_update(kbdev); + kbase_backend_slot_update(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } @@ -455,7 +530,6 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) mutex_lock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); - kbase_pm_cancel_deferred_poweroff(kbdev); kbase_pm_do_poweroff(kbdev, true); kbase_backend_timer_suspend(kbdev); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c index d4e8e42..2cb9452 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c @@ -30,15 +30,15 @@ int kbase_pm_ca_init(struct kbase_device *kbdev) { - struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; #ifdef CONFIG_MALI_DEVFREQ + struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; + if (kbdev->current_core_mask) pm_backend->ca_cores_enabled = kbdev->current_core_mask; else pm_backend->ca_cores_enabled = kbdev->gpu_props.props.raw_props.shader_present; #endif - pm_backend->ca_in_transition = false; return 0; } @@ -55,10 +55,17 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (!(core_mask & kbdev->pm.debug_core_mask_all)) { + dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", + core_mask, kbdev->pm.debug_core_mask_all); + goto unlock; + } + pm_backend->ca_cores_enabled = core_mask; - kbase_pm_update_cores_state_nolock(kbdev); + kbase_pm_update_state(kbdev); +unlock: spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", @@ -89,19 +96,12 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->pm.backend.instr_enabled = true; - - kbase_pm_update_cores_state_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->pm.backend.instr_enabled = false; - - kbase_pm_update_cores_state_nolock(kbdev); } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h index 2b005c9..274581d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h index 7fe8eb3..0cff22e 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h @@ -29,10 +29,8 @@ #include "mali_kbase_pm_always_on.h" #include "mali_kbase_pm_coarse_demand.h" -#include "mali_kbase_pm_demand.h" #if !MALI_CUSTOMER_RELEASE -#include "mali_kbase_pm_demand_always_powered.h" -#include "mali_kbase_pm_fast_start.h" +#include "mali_kbase_pm_always_on_demand.h" #endif /* Forward definition - see mali_kbase.h */ @@ -65,6 +63,70 @@ enum kbase_pm_core_type { }; /** + * enum kbase_l2_core_state - The states used for the L2 cache & tiler power + * state machine. + * + * @KBASE_L2_OFF: The L2 cache and tiler are off + * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on + * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being + * enabled + * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled + * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being + * disabled + * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off + * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off + * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state + * are unknown + */ +enum kbase_l2_core_state { + KBASE_L2_OFF = 0, + KBASE_L2_PEND_ON, + KBASE_L2_ON_HWCNT_ENABLE, + KBASE_L2_ON, + KBASE_L2_ON_HWCNT_DISABLE, + KBASE_L2_POWER_DOWN, + KBASE_L2_PEND_OFF, + KBASE_L2_RESET_WAIT +}; + +/** + * enum kbase_shader_core_state - The states used for the shaders' state machine. + * + * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off + * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have + * been requested to power on + * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been + * requested to power on + * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on + * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to + * power off, but they remain on for the + * duration of the hysteresis timer + * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired + * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders + * have been requested to power off + * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks + * have been requested to power off + * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are + * off, but the tick timer + * cancellation is still + * pending. + * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power + * states are unknown + */ +enum kbase_shader_core_state { + KBASE_SHADERS_OFF_CORESTACK_OFF = 0, + KBASE_SHADERS_OFF_CORESTACK_PEND_ON, + KBASE_SHADERS_PEND_ON_CORESTACK_ON, + KBASE_SHADERS_ON_CORESTACK_ON, + KBASE_SHADERS_WAIT_OFF_CORESTACK_ON, + KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON, + KBASE_SHADERS_PEND_OFF_CORESTACK_ON, + KBASE_SHADERS_OFF_CORESTACK_PEND_OFF, + KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF, + KBASE_SHADERS_RESET_WAIT +}; + +/** * struct kbasep_pm_metrics - Metrics data collected for use by the power * management framework. * @@ -128,13 +190,39 @@ struct kbasep_pm_metrics_state { #endif }; +/** + * struct kbasep_pm_tick_timer_state - State for the shader hysteresis timer + * @wq: Work queue to wait for the timer to stopped + * @work: Work item which cancels the timer + * @timer: Timer for powering off the shader cores + * @configured_interval: Period of GPU poweroff timer + * @configured_ticks: User-configured number of ticks to wait after the shader + * power down request is received before turning off the cores + * @remaining_ticks: Number of remaining timer ticks until shaders are powered off + * @cancel_queued: True if the cancellation work item has been queued. This is + * required to ensure that it is not queued twice, e.g. after + * a reset, which could cause the timer to be incorrectly + * cancelled later by a delayed workitem. + * @needed: Whether the timer should restart itself + */ +struct kbasep_pm_tick_timer_state { + struct workqueue_struct *wq; + struct work_struct work; + struct hrtimer timer; + + ktime_t configured_interval; + unsigned int configured_ticks; + unsigned int remaining_ticks; + + bool cancel_queued; + bool needed; +}; + union kbase_pm_policy_data { struct kbasep_pm_policy_always_on always_on; struct kbasep_pm_policy_coarse_demand coarse_demand; - struct kbasep_pm_policy_demand demand; #if !MALI_CUSTOMER_RELEASE - struct kbasep_pm_policy_demand_always_powered demand_always_powered; - struct kbasep_pm_policy_fast_start fast_start; + struct kbasep_pm_policy_always_on_demand always_on_demand; #endif }; @@ -147,39 +235,14 @@ union kbase_pm_policy_data { * @pm_current_policy: The policy that is currently actively controlling the * power state. * @pm_policy_data: Private data for current PM policy - * @ca_in_transition: Flag indicating when core availability policy is - * transitioning cores. The core availability policy must - * set this when a change in core availability is occurring. - * power_change_lock must be held when accessing this. * @reset_done: Flag when a reset is complete * @reset_done_wait: Wait queue to wait for changes to @reset_done - * @l2_powered_wait: Wait queue for whether the l2 cache has been powered as - * requested - * @l2_powered: State indicating whether all the l2 caches are powered. - * Non-zero indicates they're *all* powered - * Zero indicates that some (or all) are not powered * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter * users * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests - * @desired_shader_state: A bit mask identifying the shader cores that the - * power policy would like to be on. The current state - * of the cores may be different, but there should be - * transitions in progress that will eventually achieve - * this state (assuming that the policy doesn't change - * its mind in the mean time). - * @powering_on_shader_state: A bit mask indicating which shader cores are - * currently in a power-on transition - * @desired_tiler_state: A bit mask identifying the tiler cores that the power - * policy would like to be on. See @desired_shader_state - * @powering_on_tiler_state: A bit mask indicating which tiler core are - * currently in a power-on transition - * @powering_on_l2_state: A bit mask indicating which l2-caches are currently - * in a power-on transition - * @powering_on_stack_state: A bit mask indicating which core stacks are - * currently in a power-on transition - * @gpu_in_desired_state: This flag is set if the GPU is powered as requested - * by the desired_xxx_state variables - * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0 + * @gpu_in_desired_state_wait: Wait queue set when the GPU is in the desired + * state according to the L2 and shader power state + * machines * @gpu_powered: Set to true when the GPU is powered and register * accesses are possible, false otherwise * @instr_enabled: Set to true when instrumentation is enabled, @@ -192,26 +255,12 @@ union kbase_pm_policy_data { * @gpu_powered_lock: Spinlock that must be held when writing @gpu_powered or * accessing @driver_ready_for_irqs * @metrics: Structure to hold metrics for the GPU - * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is - * powered off - * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders - * and/or timers are powered off - * @gpu_poweroff_timer: Timer for powering off GPU - * @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires - * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq - * @shader_poweroff_pending: Bit mask of shaders to be powered off on next - * timer callback - * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer - * callback - * @poweroff_timer_needed: true if the poweroff timer is currently required, - * false otherwise - * @poweroff_timer_running: true if the poweroff timer is currently running, - * false otherwise - * power_change_lock should be held when accessing, - * unless there is no way the timer can be running (eg - * hrtimer_cancel() was called immediately before) + * @shader_tick_timer: Structure to hold the shader poweroff tick timer state * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. * hwaccess_lock must be held when accessing + * @invoke_poweroff_wait_wq_when_l2_off: flag indicating that the L2 power state + * machine should invoke the poweroff + * worker after the L2 has turned off. * @poweron_required: true if a GPU power on is required. Should only be set * when poweroff_wait_in_progress is true, and therefore the * GPU can not immediately be powered on. pm.lock must be @@ -236,35 +285,49 @@ union kbase_pm_policy_data { * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See * &struct kbase_pm_callback_conf * @ca_cores_enabled: Cores that are currently available + * @l2_state: The current state of the L2 cache state machine. See + * &enum kbase_l2_core_state + * @l2_desired: True if the L2 cache should be powered on by the L2 cache state + * machine + * @shaders_state: The current state of the shader state machine. + * @shaders_avail: This is updated by the state machine when it is in a state + * where it can handle changes to the core availability. This + * is internal to the shader state machine and should *not* be + * modified elsewhere. + * @shaders_desired: True if the PM active count or power policy requires the + * shader cores to be on. This is used as an input to the + * shader power state machine. The current state of the + * cores may be different, but there should be transitions in + * progress that will eventually achieve this state (assuming + * that the policy doesn't change its mind in the mean time). + * @in_reset: True if a GPU is resetting and normal power manager operation is + * suspended + * @protected_transition_override : True if a protected mode transition is in + * progress and is overriding power manager + * behaviour. + * @protected_l2_override : Non-zero if the L2 cache is required during a + * protected mode transition. Has no effect if not + * transitioning. + * @hwcnt_desired: True if we want GPU hardware counters to be enabled. + * @hwcnt_disabled: True if GPU hardware counters are not enabled. + * @hwcnt_disable_work: Work item to disable GPU hardware counters, used if + * atomic disable is not possible. * * Note: * During an IRQ, @pm_current_policy can be NULL when the policy is being * changed with kbase_pm_set_policy(). The change is protected under - * kbase_device.pm.power_change_lock. Direct access to this from IRQ context + * kbase_device.pm.pcower_change_lock. Direct access to this from IRQ context * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will * re-issue the policy functions that would have been done under IRQ. */ struct kbase_pm_backend_data { const struct kbase_pm_policy *pm_current_policy; union kbase_pm_policy_data pm_policy_data; - bool ca_in_transition; bool reset_done; wait_queue_head_t reset_done_wait; - wait_queue_head_t l2_powered_wait; - int l2_powered; int gpu_cycle_counter_requests; spinlock_t gpu_cycle_counter_requests_lock; - u64 desired_shader_state; - u64 powering_on_shader_state; - u64 desired_tiler_state; - u64 powering_on_tiler_state; - u64 powering_on_l2_state; -#ifdef CONFIG_MALI_CORESTACK - u64 powering_on_stack_state; -#endif /* CONFIG_MALI_CORESTACK */ - - bool gpu_in_desired_state; wait_queue_head_t gpu_in_desired_state_wait; bool gpu_powered; @@ -279,23 +342,12 @@ struct kbase_pm_backend_data { spinlock_t gpu_powered_lock; - struct kbasep_pm_metrics_state metrics; - int gpu_poweroff_pending; - int shader_poweroff_pending_time; - - struct hrtimer gpu_poweroff_timer; - struct workqueue_struct *gpu_poweroff_wq; - struct work_struct gpu_poweroff_work; - - u64 shader_poweroff_pending; - u64 tiler_poweroff_pending; - - bool poweroff_timer_needed; - bool poweroff_timer_running; + struct kbasep_pm_tick_timer_state shader_tick_timer; bool poweroff_wait_in_progress; + bool invoke_poweroff_wait_wq_when_l2_off; bool poweron_required; bool poweroff_is_suspend; @@ -312,25 +364,38 @@ struct kbase_pm_backend_data { void (*callback_power_runtime_off)(struct kbase_device *kbdev); int (*callback_power_runtime_idle)(struct kbase_device *kbdev); -#ifdef CONFIG_MALI_DEVFREQ u64 ca_cores_enabled; -#endif + + enum kbase_l2_core_state l2_state; + enum kbase_shader_core_state shaders_state; + u64 shaders_avail; + bool l2_desired; + bool shaders_desired; + + bool in_reset; + + bool protected_transition_override; + int protected_l2_override; + + bool hwcnt_desired; + bool hwcnt_disabled; + struct work_struct hwcnt_disable_work; }; /* List of policy IDs */ enum kbase_pm_policy_id { - KBASE_PM_POLICY_ID_DEMAND = 1, - KBASE_PM_POLICY_ID_ALWAYS_ON, KBASE_PM_POLICY_ID_COARSE_DEMAND, #if !MALI_CUSTOMER_RELEASE - KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED, - KBASE_PM_POLICY_ID_FAST_START + KBASE_PM_POLICY_ID_ALWAYS_ON_DEMAND, #endif + KBASE_PM_POLICY_ID_ALWAYS_ON }; typedef u32 kbase_pm_policy_flags; +#define KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY (1u) + /** * struct kbase_pm_policy - Power policy structure. * @@ -377,13 +442,8 @@ struct kbase_pm_policy { /** * Function called to find out if shader cores are needed * - * This needs to at least satisfy kbdev->shader_needed_cnt, and so must - * never return false when kbdev->shader_needed_cnt > 0. - * - * Note that kbdev->pm.active_count being 0 is not a good indicator - * that kbdev->shader_needed_cnt is also 0 - refer to the documentation - * on the active_count member in struct kbase_pm_device_data and - * kbase_pm_is_active(). + * This needs to at least satisfy kbdev->pm.backend.shaders_desired, + * and so must never return false when shaders_desired is true. * * @kbdev: The kbase device structure for the device (must be a * valid pointer) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_demand.c b/mali_kbase/backend/gpu/mali_kbase_pm_demand.c deleted file mode 100644 index 01727d6..0000000 --- a/mali_kbase/backend/gpu/mali_kbase_pm_demand.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * A simple demand based power management policy - */ - -#include <mali_kbase.h> -#include <mali_kbase_pm.h> - -static bool demand_shaders_needed(struct kbase_device *kbdev) -{ - return (kbdev->shader_needed_cnt > 0); -} - -static bool demand_get_core_active(struct kbase_device *kbdev) -{ - return kbase_pm_is_active(kbdev); -} - -static void demand_init(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -static void demand_term(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -/* - * The struct kbase_pm_policy structure for the demand power policy. - * - * This is the static structure that defines the demand power policy's callback - * and name. - */ -const struct kbase_pm_policy kbase_pm_demand_policy_ops = { - "demand", /* name */ - demand_init, /* init */ - demand_term, /* term */ - demand_shaders_needed, /* shaders_needed */ - demand_get_core_active, /* get_core_active */ - 0u, /* flags */ - KBASE_PM_POLICY_ID_DEMAND, /* id */ -}; - -KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_demand.h b/mali_kbase/backend/gpu/mali_kbase_pm_demand.h deleted file mode 100644 index 4b05e6d..0000000 --- a/mali_kbase/backend/gpu/mali_kbase_pm_demand.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * A simple demand based power management policy - */ - -#ifndef MALI_KBASE_PM_DEMAND_H -#define MALI_KBASE_PM_DEMAND_H - -/** - * DOC: Demand power management policy - * - * The demand power management policy has the following characteristics: - * - When KBase indicates that the GPU will be powered up, but we don't yet - * know which Job Chains are to be run: - * - The Shader Cores are not powered up - * - * - When KBase indicates that Shader Cores are needed to submit the currently - * queued Job Chains: - * - Shader Cores are powered up - * - * - When KBase indicates that the GPU need not be powered: - * - The Shader Cores are powered off, and the GPU itself is powered off too. - * - * Note: - * - KBase indicates the GPU will be powered up when it has a User Process that - * has just started to submit Job Chains. - * - * - KBase indicates the GPU need not be powered when all the Job Chains from - * User Processes have finished, and it is waiting for a User Process to - * submit some more Job Chains. - */ - -/** - * struct kbasep_pm_policy_demand - Private structure for policy instance data - * - * @dummy: No state is needed, a dummy variable - * - * This contains data that is private to the demand power policy. - */ -struct kbasep_pm_policy_demand { - int dummy; -}; - -extern const struct kbase_pm_policy kbase_pm_demand_policy_ops; - -#endif /* MALI_KBASE_PM_DEMAND_H */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index cdd5cf7..2e6599a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -29,15 +29,14 @@ #include <mali_kbase.h> #include <mali_kbase_config_defaults.h> #include <mali_midg_regmap.h> -#if defined(CONFIG_MALI_GATOR_SUPPORT) #include <mali_kbase_gator.h> -#endif #include <mali_kbase_tlstream.h> #include <mali_kbase_pm.h> #include <mali_kbase_config_defaults.h> #include <mali_kbase_smc.h> #include <mali_kbase_hwaccess_jm.h> #include <mali_kbase_ctx_sched.h> +#include <mali_kbase_hwcnt_context.h> #include <backend/gpu/mali_kbase_cache_policy_backend.h> #include <backend/gpu/mali_kbase_device_internal.h> #include <backend/gpu/mali_kbase_irq_internal.h> @@ -45,11 +44,23 @@ #include <linux/of.h> -#if MALI_MOCK_TEST -#define MOCKABLE(function) function##_original +#ifdef CONFIG_MALI_CORESTACK +bool corestack_driver_control = true; #else -#define MOCKABLE(function) function -#endif /* MALI_MOCK_TEST */ +bool corestack_driver_control; /* Default value of 0/false */ +#endif +module_param(corestack_driver_control, bool, 0000); +MODULE_PARM_DESC(corestack_driver_control, + "Let the driver power on/off the GPU core stack independently " + "without involving the Power Domain Controller. This should " + "only be enabled on platforms for which integration of the PDC " + "to the Mali GPU is known to be problematic."); +KBASE_EXPORT_TEST_API(corestack_driver_control); + +bool platform_power_down_only = PLATFORM_POWER_DOWN_ONLY; +module_param(platform_power_down_only, bool, 0000); +MODULE_PARM_DESC(platform_power_down_only, + "Disable power down of individual cores."); /** * enum kbasep_pm_action - Actions that can be performed on a core. @@ -79,6 +90,47 @@ static u64 kbase_pm_get_state( enum kbase_pm_core_type core_type, enum kbasep_pm_action action); +static bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) +{ + if (kbdev->pm.backend.protected_transition_override && + kbdev->pm.backend.protected_l2_override) + return true; + + if (kbdev->pm.backend.protected_transition_override && + !kbdev->pm.backend.shaders_desired) + return false; + + return kbdev->pm.backend.l2_desired; +} + +void kbase_pm_protected_override_enable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->pm.backend.protected_transition_override = true; +} +void kbase_pm_protected_override_disable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->pm.backend.protected_transition_override = false; +} + +void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (override) { + kbdev->pm.backend.protected_l2_override++; + WARN_ON(kbdev->pm.backend.protected_l2_override <= 0); + } else { + kbdev->pm.backend.protected_l2_override--; + WARN_ON(kbdev->pm.backend.protected_l2_override < 0); + } + + kbase_pm_update_state(kbdev); +} + /** * core_type_to_reg - Decode a core type and action to a register. * @@ -96,24 +148,24 @@ static u64 kbase_pm_get_state( static u32 core_type_to_reg(enum kbase_pm_core_type core_type, enum kbasep_pm_action action) { -#ifdef CONFIG_MALI_CORESTACK - if (core_type == KBASE_PM_CORE_STACK) { - switch (action) { - case ACTION_PRESENT: - return STACK_PRESENT_LO; - case ACTION_READY: - return STACK_READY_LO; - case ACTION_PWRON: - return STACK_PWRON_LO; - case ACTION_PWROFF: - return STACK_PWROFF_LO; - case ACTION_PWRTRANS: - return STACK_PWRTRANS_LO; - default: - BUG(); + if (corestack_driver_control) { + if (core_type == KBASE_PM_CORE_STACK) { + switch (action) { + case ACTION_PRESENT: + return STACK_PRESENT_LO; + case ACTION_READY: + return STACK_READY_LO; + case ACTION_PWRON: + return STACK_PWRON_LO; + case ACTION_PWROFF: + return STACK_PWROFF_LO; + case ACTION_PWRTRANS: + return STACK_PWRTRANS_LO; + default: + WARN(1, "Invalid action for core type\n"); + } } } -#endif /* CONFIG_MALI_CORESTACK */ return (u32)core_type + (u32)action; } @@ -170,6 +222,12 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, u32 lo = cores & 0xFFFFFFFF; u32 hi = (cores >> 32) & 0xFFFFFFFF; + /* When 'platform_power_down_only' is enabled, no core type should be + * turned off individually. + */ + KBASE_DEBUG_ASSERT(!(action == ACTION_PWROFF && + platform_power_down_only)); + lockdep_assert_held(&kbdev->hwaccess_lock); reg = core_type_to_reg(core_type, action); @@ -272,16 +330,6 @@ static u64 kbase_pm_get_state(struct kbase_device *kbdev, return (((u64) hi) << 32) | ((u64) lo); } -void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev) -{ - kbdev->shader_available_bitmap = 0; - kbdev->tiler_available_bitmap = 0; - kbdev->l2_users_count = 0; - kbdev->l2_available_bitmap = 0; - kbdev->tiler_needed_cnt = 0; - kbdev->shader_needed_cnt = 0; -} - /** * kbase_pm_get_present_cores - Get the cores that are present * @@ -385,525 +433,776 @@ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); -/** - * kbase_pm_transition_core_type - Perform power transitions for a particular - * core type. - * - * This function will perform any available power transitions to make the actual - * hardware state closer to the desired state. If a core is currently - * transitioning then changes to the power state of that call cannot be made - * until the transition has finished. Cores which are not present in the - * hardware are ignored if they are specified in the desired_state bitmask, - * however the return value will always be 0 in this case. - * - * @kbdev: The kbase device - * @type: The core type to perform transitions for - * @desired_state: A bit mask of the desired state of the cores - * @in_use: A bit mask of the cores that are currently running - * jobs. These cores have to be kept powered up because - * there are jobs running (or about to run) on them. - * @available: Receives a bit mask of the cores that the job - * scheduler can use to submit jobs to. May be NULL if - * this is not needed. - * @powering_on: Bit mask to update with cores that are - * transitioning to a power-on state. - * - * Return: true if the desired state has been reached, false otherwise - */ -static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, - enum kbase_pm_core_type type, - u64 desired_state, - u64 in_use, - u64 * const available, - u64 *powering_on) +static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev) { - u64 present; - u64 ready; - u64 trans; - u64 powerup; - u64 powerdown; - u64 powering_on_trans; - u64 desired_state_in_use; + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present; + u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; + enum kbase_l2_core_state prev_state; lockdep_assert_held(&kbdev->hwaccess_lock); - /* Get current state */ - present = kbase_pm_get_present_cores(kbdev, type); - trans = kbase_pm_get_trans_cores(kbdev, type); - ready = kbase_pm_get_ready_cores(kbdev, type); - - /* mask off ready from trans in case transitions finished between the - * register reads */ - trans &= ~ready; + do { + /* Get current state */ + u64 l2_trans = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2); + u64 l2_ready = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2); + u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_TILER); + u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_TILER); + + /* mask off ready from trans in case transitions finished + * between the register reads + */ + l2_trans &= ~l2_ready; + tiler_trans &= ~tiler_ready; + + prev_state = backend->l2_state; + + switch (backend->l2_state) { + case KBASE_L2_OFF: + if (kbase_pm_is_l2_desired(kbdev)) { + /* L2 is required, power on. Powering on the + * tiler will also power the first L2 cache. + */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, + tiler_present, ACTION_PWRON); + + /* If we have more than one L2 cache then we + * must power them on explicitly. + */ + if (l2_present != 1) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, + l2_present & ~1, + ACTION_PWRON); + backend->l2_state = KBASE_L2_PEND_ON; + } + break; - powering_on_trans = trans & *powering_on; + case KBASE_L2_PEND_ON: + if (!l2_trans && l2_ready == l2_present && !tiler_trans + && tiler_ready == tiler_present) { + KBASE_TRACE_ADD(kbdev, + PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, NULL, 0u, + (u32)tiler_ready); + /* + * Ensure snoops are enabled after L2 is powered + * up. Note that kbase keeps track of the snoop + * state, so safe to repeatedly call. + */ + kbase_pm_cache_snoop_enable(kbdev); + + /* With the L2 enabled, we can now enable + * hardware counters. + */ + backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; + + /* Now that the L2 is on, the shaders can start + * powering on if they're required. The obvious + * way to do this would be to call + * kbase_pm_shaders_update_state() here. + * However, that would make the two state + * machines mutually recursive, as the opposite + * would be needed for powering down. Instead, + * callers of this function should use the + * kbase_pm_update_state() wrapper, which will + * call the shader state machine immediately + * after the L2 (for power up), or + * automatically re-invoke the L2 state machine + * when the shaders power down. + */ + } + break; + + case KBASE_L2_ON_HWCNT_ENABLE: + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + backend->hwcnt_disabled = false; + } + backend->l2_state = KBASE_L2_ON; + break; + + case KBASE_L2_ON: + if (!kbase_pm_is_l2_desired(kbdev)) { + /* Do not power off L2 until the shaders and + * core stacks are off. + */ + if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + break; + + /* We need to make sure hardware counters are + * disabled before powering down the L2, to + * prevent loss of data. + * + * We waited until after the cores were powered + * down to prevent ping-ponging between hwcnt + * enabled and disabled, which would have + * happened if userspace submitted more work + * while we were trying to power down. + */ + backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE; + } + break; + + case KBASE_L2_ON_HWCNT_DISABLE: + /* If the L2 became desired while we were waiting on the + * worker to do the actual hwcnt disable (which might + * happen if some work was submitted immediately after + * the shaders powered off), then we need to early-out + * of this state and re-enable hwcnt. + * + * If we get lucky, the hwcnt disable might not have + * actually started yet, and the logic in the hwcnt + * enable state will prevent the worker from + * performing the disable entirely, preventing loss of + * any hardware counter data. + * + * If the hwcnt disable has started, then we'll lose + * a tiny amount of hardware counter data between the + * disable and the re-enable occurring. + * + * This loss of data is preferable to the alternative, + * which is to block the shader cores from doing any + * work until we're sure hwcnt has been re-enabled. + */ + if (kbase_pm_is_l2_desired(kbdev)) { + backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; + break; + } - if (available != NULL) - *available = (ready | powering_on_trans) & desired_state; + /* See if we can get away with disabling hwcnt + * atomically, otherwise kick off a worker. + */ + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) { + if (kbase_hwcnt_context_disable_atomic( + kbdev->hwcnt_gpu_ctx)) + backend->hwcnt_disabled = true; + else +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, + &backend->hwcnt_disable_work); +#else + queue_work(system_highpri_wq, + &backend->hwcnt_disable_work); +#endif + } - if (trans) /* Do not progress if any cores are transitioning */ - return false; + if (backend->hwcnt_disabled) + backend->l2_state = KBASE_L2_POWER_DOWN; + break; + + case KBASE_L2_POWER_DOWN: + if (!platform_power_down_only) + /* Powering off the L2 will also power off the + * tiler. + */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, + l2_present, + ACTION_PWROFF); + else + /* If L2 cache is powered then we must flush it + * before we power off the GPU. Normally this + * would have been handled when the L2 was + * powered off. + */ + kbase_gpu_start_cache_clean_nolock( + kbdev); - *powering_on = powering_on_trans; + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, NULL, 0u, 0u); + + backend->l2_state = KBASE_L2_PEND_OFF; + break; + + case KBASE_L2_PEND_OFF: + if (!platform_power_down_only) { + /* We only need to check the L2 here - if the L2 + * is off then the tiler is definitely also off. + */ + if (!l2_trans && !l2_ready) + /* L2 is now powered off */ + backend->l2_state = KBASE_L2_OFF; + } else { + if (!kbdev->cache_clean_in_progress) + backend->l2_state = KBASE_L2_OFF; + } + break; - /* Update desired state to include the in-use cores. These have to be - * kept powered up because there are jobs running or about to run on - * these cores - */ - desired_state_in_use = desired_state | in_use; - - /* Update state of whether l2 caches are powered */ - if (type == KBASE_PM_CORE_L2) { - if ((ready == present) && (desired_state_in_use == ready) && - (trans == 0)) { - /* All are ready, none will be turned off, and none are - * transitioning */ - kbdev->pm.backend.l2_powered = 1; - /* - * Ensure snoops are enabled after L2 is powered up, - * note that kbase keeps track of the snoop state, so - * safe to repeatedly call. - */ - kbase_pm_cache_snoop_enable(kbdev); - if (kbdev->l2_users_count > 0) { - /* Notify any registered l2 cache users - * (optimized out when no users waiting) */ - wake_up(&kbdev->pm.backend.l2_powered_wait); + case KBASE_L2_RESET_WAIT: + if (!backend->in_reset) { + /* Reset complete */ + backend->l2_state = KBASE_L2_OFF; } - } else - kbdev->pm.backend.l2_powered = 0; - } + break; - if (desired_state == ready && (trans == 0)) - return true; + default: + WARN(1, "Invalid state in l2_state: %d", + backend->l2_state); + } + } while (backend->l2_state != prev_state); - /* Restrict the cores to those that are actually present */ - powerup = desired_state_in_use & present; - powerdown = (~desired_state_in_use) & present; - - /* Restrict to cores that are not already in the desired state */ - powerup &= ~ready; - powerdown &= ready; - - /* Don't transition any cores that are already transitioning, except for - * Mali cores that support the following case: - * - * If the SHADER_PWRON or TILER_PWRON registers are written to turn on - * a core that is currently transitioning to power off, then this is - * remembered and the shader core is automatically powered up again once - * the original transition completes. Once the automatic power on is - * complete any job scheduled on the shader core should start. - */ - powerdown &= ~trans; + if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off && + backend->l2_state == KBASE_L2_OFF) { + kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false; + queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, + &kbdev->pm.backend.gpu_poweroff_wait_work); + } + + if (backend->l2_state == KBASE_L2_ON) + return l2_present; + return 0; +} - if (kbase_hw_has_feature(kbdev, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS)) - if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type) - trans = powering_on_trans; /* for exception cases, only - * mask off cores in power on - * transitions */ +static void shader_poweroff_timer_stop_callback(struct work_struct *data) +{ + unsigned long flags; + struct kbasep_pm_tick_timer_state *stt = container_of(data, + struct kbasep_pm_tick_timer_state, work); + struct kbase_device *kbdev = container_of(stt, struct kbase_device, + pm.backend.shader_tick_timer); - powerup &= ~trans; + hrtimer_cancel(&stt->timer); - /* Perform transitions if any */ - kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON); -#if !PLATFORM_POWER_DOWN_ONLY - kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF); -#endif + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - /* Recalculate cores transitioning on, and re-evaluate our state */ - powering_on_trans |= powerup; - *powering_on = powering_on_trans; - if (available != NULL) - *available = (ready | powering_on_trans) & desired_state; + stt->cancel_queued = false; + if (kbdev->pm.backend.gpu_powered) + kbase_pm_update_state(kbdev); - return false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type); - /** - * get_desired_cache_status - Determine which caches should be on for a - * particular core state + * shader_poweroff_timer_queue_cancel - cancel the shader poweroff tick timer + * @kbdev: pointer to kbase device * - * This function takes a bit mask of the present caches and the cores (or - * caches) that are attached to the caches that will be powered. It then - * computes which caches should be turned on to allow the cores requested to be - * powered up. + * Synchronization between the shader state machine and the timer thread is + * difficult. This is because situations may arise where the state machine + * wants to start the timer, but the callback is already running, and has + * already passed the point at which it checks whether it is required, and so + * cancels itself, even though the state machine may have just tried to call + * hrtimer_start. * - * @present: The bit mask of present caches - * @cores_powered: A bit mask of cores (or L2 caches) that are desired to - * be powered - * @tilers_powered: The bit mask of tilers that are desired to be powered + * This cannot be stopped by holding hwaccess_lock in the timer thread, + * because there are still infinitesimally small sections at the start and end + * of the callback where the lock is not held. * - * Return: A bit mask of the caches that should be turned on + * Instead, a new state is added to the shader state machine, + * KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF. This is used to guarantee + * that when the shaders are switched off, the timer has definitely been + * cancelled. As a result, when KBASE_SHADERS_ON_CORESTACK_ON is left and the + * timer is started, it is guaranteed that either the timer is already running + * (from an availability change or cancelled timer), or hrtimer_start will + * succeed. It is critical to avoid ending up in + * KBASE_SHADERS_WAIT_OFF_CORESTACK_ON without the timer running, or it could + * hang there forever. */ -static u64 get_desired_cache_status(u64 present, u64 cores_powered, - u64 tilers_powered) +static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) { - u64 desired = 0; + struct kbasep_pm_tick_timer_state *stt = + &kbdev->pm.backend.shader_tick_timer; - while (present) { - /* Find out which is the highest set bit */ - u64 bit = fls64(present) - 1; - u64 bit_mask = 1ull << bit; - /* Create a mask which has all bits from 'bit' upwards set */ + lockdep_assert_held(&kbdev->hwaccess_lock); - u64 mask = ~(bit_mask - 1); + stt->needed = false; - /* If there are any cores powered at this bit or above (that - * haven't previously been processed) then we need this core on - */ - if (cores_powered & mask) - desired |= bit_mask; - - /* Remove bits from cores_powered and present */ - cores_powered &= ~mask; - present &= ~bit_mask; + if (hrtimer_active(&stt->timer) && !stt->cancel_queued) { + stt->cancel_queued = true; + queue_work(stt->wq, &stt->work); } +} - /* Power up the required L2(s) for the tiler */ - if (tilers_powered) - desired |= 1; +static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + struct kbasep_pm_tick_timer_state *stt = + &kbdev->pm.backend.shader_tick_timer; + enum kbase_shader_core_state prev_state; + u64 stacks_avail = 0; - return desired; -} + lockdep_assert_held(&kbdev->hwaccess_lock); -KBASE_EXPORT_TEST_API(get_desired_cache_status); + if (corestack_driver_control) + /* Always power on all the corestacks. Disabling certain + * corestacks when their respective shaders are not in the + * available bitmap is not currently supported. + */ + stacks_avail = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_STACK); -#ifdef CONFIG_MALI_CORESTACK -u64 kbase_pm_core_stack_mask(u64 cores) -{ - u64 stack_mask = 0; - size_t const MAX_CORE_ID = 31; - size_t const NUM_CORES_PER_STACK = 4; - size_t i; - - for (i = 0; i <= MAX_CORE_ID; ++i) { - if (test_bit(i, (unsigned long *)&cores)) { - /* Every core which ID >= 16 is filled to stacks 4-7 - * instead of 0-3 */ - size_t const stack_num = (i >= 16) ? - (i % NUM_CORES_PER_STACK) + 4 : - (i % NUM_CORES_PER_STACK); - set_bit(stack_num, (unsigned long *)&stack_mask); + do { + u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 stacks_trans = 0; + u64 stacks_ready = 0; + + if (corestack_driver_control) { + stacks_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_STACK); + stacks_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK); } - } - return stack_mask; -} -#endif /* CONFIG_MALI_CORESTACK */ + /* mask off ready from trans in case transitions finished + * between the register reads + */ + shaders_trans &= ~shaders_ready; + stacks_trans &= ~stacks_ready; -bool -MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) -{ - bool cores_are_available = false; - bool in_desired_state = true; - u64 desired_l2_state; -#ifdef CONFIG_MALI_CORESTACK - u64 desired_stack_state; - u64 stacks_powered; -#endif /* CONFIG_MALI_CORESTACK */ - u64 cores_powered; - u64 tilers_powered; - u64 tiler_available_bitmap; - u64 tiler_transitioning_bitmap; - u64 shader_available_bitmap; - u64 shader_ready_bitmap; - u64 shader_transitioning_bitmap; - u64 l2_available_bitmap; - u64 prev_l2_available_bitmap; - u64 l2_inuse_bitmap; + prev_state = backend->shaders_state; - KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&kbdev->hwaccess_lock); + switch (backend->shaders_state) { + case KBASE_SHADERS_OFF_CORESTACK_OFF: + /* Ignore changes to the shader core availability + * except at certain points where we can handle it, + * i.e. off and SHADERS_ON_CORESTACK_ON. + */ + backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); - spin_lock(&kbdev->pm.backend.gpu_powered_lock); - if (kbdev->pm.backend.gpu_powered == false) { - spin_unlock(&kbdev->pm.backend.gpu_powered_lock); - if (kbdev->pm.backend.desired_shader_state == 0 && - kbdev->pm.backend.desired_tiler_state == 0) - return true; - return false; - } + if (backend->shaders_desired && backend->l2_state == KBASE_L2_ON) { + if (corestack_driver_control) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, + stacks_avail, ACTION_PWRON); - /* If any cores are already powered then, we must keep the caches on */ - shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_SHADER); - cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); - cores_powered |= kbdev->pm.backend.desired_shader_state; + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_ON; + } + break; -#ifdef CONFIG_MALI_CORESTACK - /* Work out which core stacks want to be powered */ - desired_stack_state = kbase_pm_core_stack_mask(cores_powered); - stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) | - desired_stack_state; -#endif /* CONFIG_MALI_CORESTACK */ - - /* Work out which tilers want to be powered */ - tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_TILER); - tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); - tilers_powered |= kbdev->pm.backend.desired_tiler_state; - - /* If there are l2 cache users registered, keep all l2s powered even if - * all other cores are off. */ - if (kbdev->l2_users_count > 0) - cores_powered |= kbdev->gpu_props.props.raw_props.l2_present; - - desired_l2_state = get_desired_cache_status( - kbdev->gpu_props.props.raw_props.l2_present, - cores_powered, tilers_powered); - - l2_inuse_bitmap = get_desired_cache_status( - kbdev->gpu_props.props.raw_props.l2_present, - cores_powered | shader_transitioning_bitmap, - tilers_powered | tiler_transitioning_bitmap); + case KBASE_SHADERS_OFF_CORESTACK_PEND_ON: + if (!stacks_trans && stacks_ready == stacks_avail) { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + backend->shaders_avail, ACTION_PWRON); -#ifdef CONFIG_MALI_CORESTACK - if (stacks_powered) - desired_l2_state |= 1; -#endif /* CONFIG_MALI_CORESTACK */ + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; - /* If any l2 cache is on, then enable l2 #0, for use by job manager */ - if (0 != desired_l2_state) - desired_l2_state |= 1; + } + break; + + case KBASE_SHADERS_PEND_ON_CORESTACK_ON: + if (!shaders_trans && shaders_ready == backend->shaders_avail) { + KBASE_TRACE_ADD(kbdev, + PM_CORES_CHANGE_AVAILABLE, + NULL, NULL, 0u, (u32)shaders_ready); + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_ON_CORESTACK_ON: + backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); + + if (!backend->shaders_desired) { + if (kbdev->pm.backend.protected_transition_override || + !stt->configured_ticks || + WARN_ON(stt->cancel_queued)) { + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } else { + stt->remaining_ticks = stt->configured_ticks; + stt->needed = true; + + /* The shader hysteresis timer is not + * done the obvious way, which would be + * to start an hrtimer when the shader + * power off is requested. Instead, + * use a 'tick' timer, and set the + * remaining number of ticks on a power + * off request. This avoids the + * latency of starting, then + * immediately cancelling an hrtimer + * when the shaders are re-requested + * before the timeout expires. + */ + if (!hrtimer_active(&stt->timer)) + hrtimer_start(&stt->timer, + stt->configured_interval, + HRTIMER_MODE_REL); + + backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; + } + } else if (!platform_power_down_only) { + if (backend->shaders_avail & ~shaders_ready) { + backend->shaders_avail |= shaders_ready; + + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + backend->shaders_avail & ~shaders_ready, + ACTION_PWRON); + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; + + } + } + break; - prev_l2_available_bitmap = kbdev->l2_available_bitmap; - in_desired_state &= kbase_pm_transition_core_type(kbdev, - KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap, - &l2_available_bitmap, - &kbdev->pm.backend.powering_on_l2_state); + case KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: + if (WARN_ON(!hrtimer_active(&stt->timer))) { + stt->remaining_ticks = 0; + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } - kbdev->l2_available_bitmap = l2_available_bitmap; + if (backend->shaders_desired) { + stt->remaining_ticks = 0; + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; + } else if (stt->remaining_ticks == 0) { + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } + break; + case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: + shader_poweroff_timer_queue_cancel(kbdev); -#ifdef CONFIG_MALI_CORESTACK - if (in_desired_state) { - in_desired_state &= kbase_pm_transition_core_type(kbdev, - KBASE_PM_CORE_STACK, desired_stack_state, 0, - &kbdev->stack_available_bitmap, - &kbdev->pm.backend.powering_on_stack_state); - } -#endif /* CONFIG_MALI_CORESTACK */ - - if (in_desired_state) { - in_desired_state &= kbase_pm_transition_core_type(kbdev, - KBASE_PM_CORE_TILER, - kbdev->pm.backend.desired_tiler_state, - 0, &tiler_available_bitmap, - &kbdev->pm.backend.powering_on_tiler_state); - in_desired_state &= kbase_pm_transition_core_type(kbdev, - KBASE_PM_CORE_SHADER, - kbdev->pm.backend.desired_shader_state, - 0, &shader_available_bitmap, - &kbdev->pm.backend.powering_on_shader_state); - - if (kbdev->shader_available_bitmap != shader_available_bitmap) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, - NULL, 0u, - (u32) shader_available_bitmap); - - kbdev->shader_available_bitmap = shader_available_bitmap; - - if (kbdev->tiler_available_bitmap != tiler_available_bitmap) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, NULL, 0u, - (u32) tiler_available_bitmap); + if (!platform_power_down_only) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + shaders_ready, ACTION_PWROFF); - kbdev->tiler_available_bitmap = tiler_available_bitmap; + KBASE_TRACE_ADD(kbdev, + PM_CORES_CHANGE_AVAILABLE, + NULL, NULL, 0u, 0u); - } else if ((l2_available_bitmap & - kbdev->gpu_props.props.raw_props.tiler_present) != - kbdev->gpu_props.props.raw_props.tiler_present) { - tiler_available_bitmap = 0; + backend->shaders_state = KBASE_SHADERS_PEND_OFF_CORESTACK_ON; + break; - kbdev->tiler_available_bitmap = tiler_available_bitmap; - } + case KBASE_SHADERS_PEND_OFF_CORESTACK_ON: + if ((!shaders_trans && !shaders_ready) || platform_power_down_only) { + if (corestack_driver_control && !platform_power_down_only) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, + stacks_avail, ACTION_PWROFF); - /* State updated for slow-path waiters */ - kbdev->pm.backend.gpu_in_desired_state = in_desired_state; - - shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_SHADER); - shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_SHADER); - - /* Determine whether the cores are now available (even if the set of - * available cores is empty). Note that they can be available even if - * we've not finished transitioning to the desired state */ - if ((kbdev->shader_available_bitmap & - kbdev->pm.backend.desired_shader_state) - == kbdev->pm.backend.desired_shader_state && - (kbdev->tiler_available_bitmap & - kbdev->pm.backend.desired_tiler_state) - == kbdev->pm.backend.desired_tiler_state) { - cores_are_available = true; - - KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u, - (u32)(kbdev->shader_available_bitmap & - kbdev->pm.backend.desired_shader_state)); - KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u, - (u32)(kbdev->tiler_available_bitmap & - kbdev->pm.backend.desired_tiler_state)); - } + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF; + } + break; + + case KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: + if ((!stacks_trans && !stacks_ready) || platform_power_down_only) + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + break; + + case KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: + if (!hrtimer_active(&stt->timer) && !stt->cancel_queued) + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; + break; + + case KBASE_SHADERS_RESET_WAIT: + /* Reset complete */ + if (!backend->in_reset) + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + break; + } + } while (backend->shaders_state != prev_state); +} - if (in_desired_state) { - KBASE_DEBUG_ASSERT(cores_are_available); +static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) +{ + bool in_desired_state = true; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbase_pm_is_l2_desired(kbdev) && + kbdev->pm.backend.l2_state != KBASE_L2_ON) + in_desired_state = false; + else if (!kbase_pm_is_l2_desired(kbdev) && + kbdev->pm.backend.l2_state != KBASE_L2_OFF) + in_desired_state = false; + + if (kbdev->pm.backend.shaders_desired && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) + in_desired_state = false; + else if (!kbdev->pm.backend.shaders_desired && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + in_desired_state = false; + + return in_desired_state; +} + +static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev) +{ + bool in_desired_state; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + in_desired_state = kbase_pm_is_in_desired_state_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return in_desired_state; +} + +static bool kbase_pm_is_in_desired_state_with_l2_powered( + struct kbase_device *kbdev) +{ + bool in_desired_state = false; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (kbase_pm_is_in_desired_state_nolock(kbdev) && + (kbdev->pm.backend.l2_state == KBASE_L2_ON)) + in_desired_state = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return in_desired_state; +} + +static void kbase_pm_trace_power_state(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); #if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_L2)); - kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_SHADER)); - kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_TILER)); -#ifdef CONFIG_MALI_CORESTACK + kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2)); + kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_SHADER)); + kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_TILER)); + if (corestack_driver_control) kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK, kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK)); -#endif /* CONFIG_MALI_CORESTACK */ #endif - KBASE_TLSTREAM_AUX_PM_STATE( - KBASE_PM_CORE_L2, - kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_L2)); - KBASE_TLSTREAM_AUX_PM_STATE( - KBASE_PM_CORE_SHADER, - kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_SHADER)); - KBASE_TLSTREAM_AUX_PM_STATE( - KBASE_PM_CORE_TILER, - kbase_pm_get_ready_cores( - kbdev, - KBASE_PM_CORE_TILER)); -#ifdef CONFIG_MALI_CORESTACK + KBASE_TLSTREAM_AUX_PM_STATE( + KBASE_PM_CORE_L2, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_L2)); + KBASE_TLSTREAM_AUX_PM_STATE( + KBASE_PM_CORE_SHADER, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_SHADER)); + KBASE_TLSTREAM_AUX_PM_STATE( + KBASE_PM_CORE_TILER, + kbase_pm_get_ready_cores( + kbdev, + KBASE_PM_CORE_TILER)); + + if (corestack_driver_control) KBASE_TLSTREAM_AUX_PM_STATE( KBASE_PM_CORE_STACK, kbase_pm_get_ready_cores( kbdev, KBASE_PM_CORE_STACK)); -#endif /* CONFIG_MALI_CORESTACK */ +} + +void kbase_pm_update_state(struct kbase_device *kbdev) +{ + enum kbase_shader_core_state prev_shaders_state = + kbdev->pm.backend.shaders_state; + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!kbdev->pm.backend.gpu_powered) + return; /* Do nothing if the GPU is off */ + + kbase_pm_l2_update_state(kbdev); + kbase_pm_shaders_update_state(kbdev); + + /* If the shaders just turned off, re-invoke the L2 state machine, in + * case it was waiting for the shaders to turn off before powering down + * the L2. + */ + if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF && + kbdev->pm.backend.shaders_state == KBASE_SHADERS_OFF_CORESTACK_OFF) + kbase_pm_l2_update_state(kbdev); + + if (kbase_pm_is_in_desired_state_nolock(kbdev)) { KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, - kbdev->pm.backend.gpu_in_desired_state, - (u32)kbdev->pm.backend.desired_shader_state); - KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u, - (u32)kbdev->pm.backend.desired_tiler_state); + true, kbdev->pm.backend.shaders_avail); - /* Wake slow-path waiters. Job scheduler does not use this. */ - KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); + kbase_pm_trace_power_state(kbdev); + KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); } +} - spin_unlock(&kbdev->pm.backend.gpu_powered_lock); - - kbdev->shader_ready_bitmap = shader_ready_bitmap; - kbdev->shader_transitioning_bitmap = shader_transitioning_bitmap; - - /* The core availability policy is not allowed to keep core group 0 - * turned off (unless it was changing the l2 power state) */ - if (!((shader_ready_bitmap | shader_transitioning_bitmap) & - kbdev->gpu_props.props.coherency_info.group[0].core_mask) && - (prev_l2_available_bitmap == desired_l2_state) && - !(kbase_pm_ca_get_core_mask(kbdev) & - kbdev->gpu_props.props.coherency_info.group[0].core_mask)) - BUG(); - - /* The core availability policy is allowed to keep core group 1 off, - * but all jobs specifically targeting CG1 must fail */ - if (!((shader_ready_bitmap | shader_transitioning_bitmap) & - kbdev->gpu_props.props.coherency_info.group[1].core_mask) && - !(kbase_pm_ca_get_core_mask(kbdev) & - kbdev->gpu_props.props.coherency_info.group[1].core_mask)) - kbdev->pm.backend.cg1_disabled = true; - else - kbdev->pm.backend.cg1_disabled = false; +static enum hrtimer_restart +shader_tick_timer_callback(struct hrtimer *timer) +{ + struct kbasep_pm_tick_timer_state *stt = container_of(timer, + struct kbasep_pm_tick_timer_state, timer); + struct kbase_device *kbdev = container_of(stt, struct kbase_device, + pm.backend.shader_tick_timer); + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + unsigned long flags; + enum hrtimer_restart restart = HRTIMER_NORESTART; - return cores_are_available; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (stt->remaining_ticks && + backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) { + stt->remaining_ticks--; + + /* If the remaining ticks just changed from 1 to 0, invoke the + * PM state machine to power off the shader cores. + */ + if (!stt->remaining_ticks && !backend->shaders_desired) + kbase_pm_update_state(kbdev); + } + + if (stt->needed) { + hrtimer_forward_now(timer, stt->configured_interval); + restart = HRTIMER_RESTART; + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return restart; } -KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock); -/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has +int kbase_pm_state_machine_init(struct kbase_device *kbdev) +{ + struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; + + stt->wq = alloc_workqueue("kbase_pm_shader_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!stt->wq) + return -ENOMEM; + + INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback); + + stt->needed = false; + hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stt->timer.function = shader_tick_timer_callback; + stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); + stt->configured_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; + + return 0; +} + +void kbase_pm_state_machine_term(struct kbase_device *kbdev) +{ + hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer); + destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq); +} + +void kbase_pm_reset_start_locked(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + backend->in_reset = true; + backend->l2_state = KBASE_L2_RESET_WAIT; + backend->shaders_state = KBASE_SHADERS_RESET_WAIT; + + /* We're in a reset, so hwcnt will have been synchronously disabled by + * this function's caller as part of the reset process. We therefore + * know that any call to kbase_hwcnt_context_disable_atomic, if + * required to sync the hwcnt refcount with our internal state, is + * guaranteed to succeed. + */ + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) { + WARN_ON(!kbase_hwcnt_context_disable_atomic( + kbdev->hwcnt_gpu_ctx)); + backend->hwcnt_disabled = true; + } + + shader_poweroff_timer_queue_cancel(kbdev); +} + +void kbase_pm_reset_complete(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + backend->in_reset = false; + kbase_pm_update_state(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has * aborted due to a fatal signal. If the time spent waiting has exceeded this * threshold then there is most likely a hardware issue. */ #define PM_TIMEOUT (5*HZ) /* 5s */ -void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) +static void kbase_pm_timed_out(struct kbase_device *kbdev) +{ + dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); + dev_err(kbdev->dev, "Desired state :\n"); + dev_err(kbdev->dev, "\tShader=%016llx\n", + kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); + dev_err(kbdev->dev, "Current state :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_LO))); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_LO))); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_LO))); + dev_err(kbdev->dev, "Cores transitioning :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_LO))); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_LO))); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_LO))); + + dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); +} + +void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) { unsigned long flags; unsigned long timeout; - bool cores_are_available; - int ret; + int err; - /* Force the transition to be checked and reported - the cores may be - * 'available' (for job submission) but not fully powered up. */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + timeout = jiffies + PM_TIMEOUT; - cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + /* Wait for cores */ + err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state_with_l2_powered(kbdev)); + + if (err < 0 && time_after(jiffies, timeout)) + kbase_pm_timed_out(kbdev); +} - /* Don't need 'cores_are_available', because we don't return anything */ - CSTD_UNUSED(cores_are_available); +void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) +{ + unsigned long flags; + unsigned long timeout; + int err; + + /* Let the state machine latch the most recent desired state. */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); timeout = jiffies + PM_TIMEOUT; /* Wait for cores */ - ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, - kbdev->pm.backend.gpu_in_desired_state); - - if (ret < 0 && time_after(jiffies, timeout)) { - dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); - dev_err(kbdev->dev, "Desired state :\n"); - dev_err(kbdev->dev, "\tShader=%016llx\n", - kbdev->pm.backend.desired_shader_state); - dev_err(kbdev->dev, "\tTiler =%016llx\n", - kbdev->pm.backend.desired_tiler_state); - dev_err(kbdev->dev, "Current state :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_READY_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_READY_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_READY_LO))); - dev_err(kbdev->dev, "Cores transitioning :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - SHADER_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - SHADER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - TILER_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - TILER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - L2_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - L2_PWRTRANS_LO))); -#if KBASE_GPU_RESET_EN - dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); -#endif /* KBASE_GPU_RESET_EN */ - } + err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev)); + + if (err < 0 && time_after(jiffies, timeout)) + kbase_pm_timed_out(kbdev); } -KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync); +KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); void kbase_pm_enable_interrupts(struct kbase_device *kbdev) { @@ -957,7 +1256,6 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); - /* * pmu layout: * 0x0000: PMU TAG (RO) (0xCAFECAFE) @@ -990,12 +1288,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) kbdev->pm.backend.callback_power_resume(kbdev); return; } else if (kbdev->pm.backend.callback_power_on) { - kbdev->pm.backend.callback_power_on(kbdev); - /* If your platform properly keeps the GPU state you may use the - * return value of the callback_power_on function to - * conditionally reset the GPU on power up. Currently we are - * conservative and always reset the GPU. */ - reset_required = true; + reset_required = kbdev->pm.backend.callback_power_on(kbdev); } spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); @@ -1014,8 +1307,14 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); - /* Lastly, enable the interrupts */ + /* Enable the interrupts */ kbase_pm_enable_interrupts(kbdev); + + /* Turn on the L2 caches */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.l2_desired = true; + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_clock_on); @@ -1028,7 +1327,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) lockdep_assert_held(&kbdev->pm.lock); /* ASSERT that the cores should now be unavailable. No lock needed. */ - KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u); + WARN_ON(kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF); kbdev->poweroff_pending = true; @@ -1252,14 +1551,31 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING)) kbdev->hw_quirks_sc |= SC_TLS_HASH_ENABLE; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { + int default_idvs_group_size = 0xF; + u32 tmp; + + if (of_property_read_u32(kbdev->dev->of_node, + "idvs-group-size", &tmp)) + tmp = default_idvs_group_size; + + if (tmp > JM_MAX_IDVS_GROUP_SIZE) { + dev_err(kbdev->dev, + "idvs-group-size of %d is too large. Maximum value is %d", + tmp, JM_MAX_IDVS_GROUP_SIZE); + tmp = default_idvs_group_size; + } + + kbdev->hw_quirks_jm |= tmp << JM_IDVS_GROUP_SIZE_SHIFT; + } + if (!kbdev->hw_quirks_jm) kbdev->hw_quirks_jm = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)); -#ifdef CONFIG_MALI_CORESTACK #define MANUAL_POWER_CONTROL ((u32)(1 << 8)) - kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; -#endif /* CONFIG_MALI_CORESTACK */ + if (corestack_driver_control) + kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; } static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) @@ -1411,7 +1727,6 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) { unsigned long irq_flags; int err; - bool resume_vinstr = false; KBASE_DEBUG_ASSERT(NULL != kbdev); lockdep_assert_held(&kbdev->pm.lock); @@ -1438,15 +1753,9 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* The cores should be made unavailable due to the reset */ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - if (kbdev->shader_available_bitmap != 0u) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, - NULL, 0u, (u32)0u); - if (kbdev->tiler_available_bitmap != 0u) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, NULL, 0u, (u32)0u); - kbdev->shader_available_bitmap = 0u; - kbdev->tiler_available_bitmap = 0u; - kbdev->l2_available_bitmap = 0u; + if (kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + NULL, 0u, (u32)0u); spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); /* Soft reset the GPU */ @@ -1457,10 +1766,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) err = kbase_pm_do_reset(kbdev); spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - if (kbdev->protected_mode) - resume_vinstr = true; kbdev->protected_mode = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); if (err) @@ -1484,9 +1790,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) * false when called from kbase_pm_powerup */ if (kbdev->pm.backend.gpu_cycle_counter_requests && (flags & PM_ENABLE_IRQS)) { - /* enable interrupts as the L2 may have to be powered on */ kbase_pm_enable_interrupts(kbdev); - kbase_pm_request_l2_caches(kbdev); /* Re-enable the counters if we need to */ spin_lock_irqsave( @@ -1499,10 +1803,6 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) &kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags); - spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - kbase_pm_release_l2_caches(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); - kbase_pm_disable_interrupts(kbdev); } @@ -1510,10 +1810,16 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_enable_interrupts(kbdev); exit: - /* If GPU is leaving protected mode resume vinstr operation. */ - if (kbdev->vinstr_ctx && resume_vinstr) - kbase_vinstr_resume(kbdev->vinstr_ctx); - + /* Re-enable GPU hardware counters if we're resetting from protected + * mode. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); return err; } @@ -1527,9 +1833,8 @@ exit: * kbase_pm_request_gpu_cycle_counter() or * kbase_pm_request_gpu_cycle_counter_l2_is_on() only * - * When this function is called the l2 cache must be on and the l2 cache users - * count must have been incremented by a call to ( - * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() ) + * When this function is called the l2 cache must be on - i.e., the GPU must be + * on. * * @kbdev: The kbase device structure of the device */ @@ -1561,8 +1866,6 @@ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < INT_MAX); - kbase_pm_request_l2_caches(kbdev); - kbase_pm_request_gpu_cycle_counter_do_request(kbdev); } @@ -1577,8 +1880,6 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < INT_MAX); - kbase_pm_request_l2_caches_l2_is_on(kbdev); - kbase_pm_request_gpu_cycle_counter_do_request(kbdev); } @@ -1606,8 +1907,6 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) spin_unlock_irqrestore( &kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); - - kbase_pm_release_l2_caches(kbdev); } void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index 0d3599a..e88b3a8 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -163,7 +163,7 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev); * kbase_pm_disable_interrupts - Disable interrupts on the device. * * This prevents delivery of Power Management interrupts to the CPU so that - * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler + * kbase_pm_update_state() will not be called from the IRQ handler * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. * * Interrupts are also disabled after a call to kbase_pm_clock_off(). @@ -206,58 +206,38 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); */ void kbase_pm_reset_done(struct kbase_device *kbdev); - /** - * kbase_pm_check_transitions_nolock - Check if there are any power transitions - * to make, and if so start them. - * - * This function will check the desired_xx_state members of - * struct kbase_pm_device_data and the actual status of the hardware to see if - * any power transitions can be made at this time to make the hardware state - * closer to the state desired by the power policy. + * kbase_pm_wait_for_desired_state - Wait for the desired power state to be + * reached * - * The return value can be used to check whether all the desired cores are - * available, and so whether it's worth submitting a job (e.g. from a Power - * Management IRQ). + * Wait for the L2 and shader power state machines to reach the states + * corresponding to the values of 'l2_desired' and 'shaders_desired'. * - * Note that this still returns true when desired_xx_state has no - * cores. That is: of the no cores desired, none were *un*available. In - * this case, the caller may still need to try submitting jobs. This is because - * the Core Availability Policy might have taken us to an intermediate state - * where no cores are powered, before powering on more cores (e.g. for core - * rotation) + * The usual use-case for this is to ensure cores are 'READY' after performing + * a GPU Reset. * - * The caller must hold kbase_device.pm.power_change_lock + * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, + * because this function will take that lock itself. * * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Return: non-zero when all desired cores are available. That is, - * it's worthwhile for the caller to submit a job. - * false otherwise */ -bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev); +void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); /** - * kbase_pm_check_transitions_sync - Synchronous and locking variant of - * kbase_pm_check_transitions_nolock() - * - * On returning, the desired state at the time of the call will have been met. + * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on * - * There is nothing to stop the core being switched off by calls to - * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the - * caller must have already made a call to - * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously. + * Wait for the L2 to be powered on, and for the L2 and shader state machines to + * stabilise by reaching the states corresponding to the values of 'l2_desired' + * and 'shaders_desired'. * - * The usual use-case for this is to ensure cores are 'READY' after performing - * a GPU Reset. + * kbdev->pm.active_count must be non-zero when calling this function. * - * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold - * kbase_device.pm.power_change_lock, because this function will take that - * lock itself. + * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, + * because this function will take that lock itself. * * @kbdev: The kbase device structure for the device (must be a valid pointer) */ -void kbase_pm_check_transitions_sync(struct kbase_device *kbdev); +void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); /** * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state() @@ -269,6 +249,25 @@ void kbase_pm_check_transitions_sync(struct kbase_device *kbdev); void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); /** + * kbase_pm_update_state - Update the L2 and shader power state machines + * @kbdev: Device pointer + */ +void kbase_pm_update_state(struct kbase_device *kbdev); + +/** + * kbase_pm_state_machine_init - Initialize the state machines, primarily the + * shader poweroff timer + * @kbdev: Device pointer + */ +int kbase_pm_state_machine_init(struct kbase_device *kbdev); + +/** + * kbase_pm_state_machine_term - Clean up the PM state machines' data + * @kbdev: Device pointer + */ +void kbase_pm_state_machine_term(struct kbase_device *kbdev); + +/** * kbase_pm_update_cores_state - Update the desired state of shader cores from * the Power Policy, and begin any power * transitions. @@ -283,24 +282,6 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); void kbase_pm_update_cores_state(struct kbase_device *kbdev); /** - * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off - * the GPU and/or shader cores. - * - * This should be called by any functions which directly power off the GPU. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev); - -/** - * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required - * and used cores. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev); - -/** * kbasep_pm_metrics_init - Initialize the metrics gathering framework. * * This must be called before other metric gathering APIs are called. @@ -577,4 +558,67 @@ void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); #endif +/** + * kbase_pm_reset_start_locked - Signal that GPU reset has started + * @kbdev: Device pointer + * + * Normal power management operation will be suspended until the reset has + * completed. + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_reset_start_locked(struct kbase_device *kbdev); + +/** + * kbase_pm_reset_complete - Signal that GPU reset has completed + * @kbdev: Device pointer + * + * Normal power management operation will be resumed. The power manager will + * re-evaluate what cores are needed and power on or off as required. + */ +void kbase_pm_reset_complete(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_override_enable - Enable the protected mode override + * @kbdev: Device pointer + * + * When the protected mode override is enabled, all shader cores are requested + * to power down, and the L2 power state can be controlled by + * kbase_pm_protected_l2_override(). + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_protected_override_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_override_disable - Disable the protected mode override + * @kbdev: Device pointer + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_protected_override_disable(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_l2_override - Control the protected mode L2 override + * @kbdev: Device pointer + * @override: true to enable the override, false to disable + * + * When the driver is transitioning in or out of protected mode, the L2 cache is + * forced to power off. This can be overridden to force the L2 cache to power + * on. This is required to change coherency settings on some GPUs. + */ +void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override); + +/* If true, the driver should explicitly control corestack power management, + * instead of relying on the Power Domain Controller. + */ +extern bool corestack_driver_control; + +/* If true, disable powering-down of individual cores, and just power-down at + * the top-level using platform-specific code. + * If false, use the expected behaviour of controlling the individual cores + * from within the driver. + */ +extern bool platform_power_down_only; + #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index 6dd00a9..2f06a0a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -30,215 +30,51 @@ #include <mali_kbase_config_defaults.h> #include <backend/gpu/mali_kbase_pm_internal.h> -static const struct kbase_pm_policy *const policy_list[] = { +static const struct kbase_pm_policy *const all_policy_list[] = { #ifdef CONFIG_MALI_NO_MALI &kbase_pm_always_on_policy_ops, - &kbase_pm_demand_policy_ops, &kbase_pm_coarse_demand_policy_ops, #if !MALI_CUSTOMER_RELEASE - &kbase_pm_demand_always_powered_policy_ops, - &kbase_pm_fast_start_policy_ops, + &kbase_pm_always_on_demand_policy_ops, #endif #else /* CONFIG_MALI_NO_MALI */ -#if !PLATFORM_POWER_DOWN_ONLY - &kbase_pm_demand_policy_ops, -#endif /* !PLATFORM_POWER_DOWN_ONLY */ &kbase_pm_coarse_demand_policy_ops, - &kbase_pm_always_on_policy_ops, #if !MALI_CUSTOMER_RELEASE -#if !PLATFORM_POWER_DOWN_ONLY - &kbase_pm_demand_always_powered_policy_ops, - &kbase_pm_fast_start_policy_ops, -#endif /* !PLATFORM_POWER_DOWN_ONLY */ + &kbase_pm_always_on_demand_policy_ops, #endif + &kbase_pm_always_on_policy_ops #endif /* CONFIG_MALI_NO_MALI */ }; -/* The number of policies available in the system. - * This is derived from the number of functions listed in policy_get_functions. +/* A filtered list of policies available in the system, calculated by filtering + * all_policy_list based on the flags provided by each policy. */ -#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) - - -/* Function IDs for looking up Timeline Trace codes in - * kbase_pm_change_state_trace_code */ -enum kbase_pm_func_id { - KBASE_PM_FUNC_ID_REQUEST_CORES_START, - KBASE_PM_FUNC_ID_REQUEST_CORES_END, - KBASE_PM_FUNC_ID_RELEASE_CORES_START, - KBASE_PM_FUNC_ID_RELEASE_CORES_END, - /* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither - * expect to hit it nor tend to hit it very much anyway. We can detect - * whether we need more instrumentation by a difference between - * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */ - - /* Must be the last */ - KBASE_PM_FUNC_ID_COUNT -}; - - -/* State changes during request/unrequest/release-ing cores */ -enum { - KBASE_PM_CHANGE_STATE_SHADER = (1u << 0), - KBASE_PM_CHANGE_STATE_TILER = (1u << 1), - - /* These two must be last */ - KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER | - KBASE_PM_CHANGE_STATE_SHADER), - KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1 -}; -typedef u32 kbase_pm_change_state; +static const struct kbase_pm_policy *enabled_policy_list[ARRAY_SIZE(all_policy_list)]; +static size_t enabled_policy_count; -/** - * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any - * requested shader cores - * @kbdev: Device pointer - */ -static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) +static void generate_filtered_policy_list(void) { - u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; - u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbdev->pm.backend.desired_shader_state &= - ~kbdev->pm.backend.shader_poweroff_pending; - kbdev->pm.backend.desired_tiler_state &= - ~kbdev->pm.backend.tiler_poweroff_pending; - - kbdev->pm.backend.shader_poweroff_pending = 0; - kbdev->pm.backend.tiler_poweroff_pending = 0; - - if (prev_shader_state != kbdev->pm.backend.desired_shader_state || - prev_tiler_state != - kbdev->pm.backend.desired_tiler_state || - kbdev->pm.backend.ca_in_transition) { - bool cores_are_available; - - cores_are_available = kbase_pm_check_transitions_nolock(kbdev); - - /* Don't need 'cores_are_available', - * because we don't return anything */ - CSTD_UNUSED(cores_are_available); - } -} - -static enum hrtimer_restart -kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) -{ - struct kbase_device *kbdev; - unsigned long flags; - - kbdev = container_of(timer, struct kbase_device, - pm.backend.gpu_poweroff_timer); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* It is safe for this call to do nothing if the work item is already - * queued. The worker function will read the must up-to-date state of - * kbdev->pm.backend.gpu_poweroff_pending under lock. - * - * If a state change occurs while the worker function is processing, - * this call will succeed as a work item can be requeued once it has - * started processing. - */ - if (kbdev->pm.backend.gpu_poweroff_pending) - queue_work(kbdev->pm.backend.gpu_poweroff_wq, - &kbdev->pm.backend.gpu_poweroff_work); - - if (kbdev->pm.backend.shader_poweroff_pending || - kbdev->pm.backend.tiler_poweroff_pending) { - kbdev->pm.backend.shader_poweroff_pending_time--; - - KBASE_DEBUG_ASSERT( - kbdev->pm.backend.shader_poweroff_pending_time - >= 0); - - if (!kbdev->pm.backend.shader_poweroff_pending_time) - kbasep_pm_do_poweroff_cores(kbdev); - } + size_t i; - if (kbdev->pm.backend.poweroff_timer_needed) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + for (i = 0; i < ARRAY_SIZE(all_policy_list); ++i) { + const struct kbase_pm_policy *pol = all_policy_list[i]; - hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); + if (platform_power_down_only && + (pol->flags & KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY)) + continue; - return HRTIMER_RESTART; + enabled_policy_list[enabled_policy_count++] = pol; } - - kbdev->pm.backend.poweroff_timer_running = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return HRTIMER_NORESTART; -} - -static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) -{ - unsigned long flags; - struct kbase_device *kbdev; - bool do_poweroff = false; - - kbdev = container_of(data, struct kbase_device, - pm.backend.gpu_poweroff_work); - - mutex_lock(&kbdev->pm.lock); - - if (kbdev->pm.backend.gpu_poweroff_pending == 0) { - mutex_unlock(&kbdev->pm.lock); - return; - } - - kbdev->pm.backend.gpu_poweroff_pending--; - - if (kbdev->pm.backend.gpu_poweroff_pending > 0) { - mutex_unlock(&kbdev->pm.lock); - return; - } - - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* Only power off the GPU if a request is still pending */ - if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev)) - do_poweroff = true; - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - if (do_poweroff) { - kbdev->pm.backend.poweroff_timer_needed = false; - hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); - kbdev->pm.backend.poweroff_timer_running = false; - - /* Power off the GPU */ - kbase_pm_do_poweroff(kbdev, false); - } - - mutex_unlock(&kbdev->pm.lock); } int kbase_pm_policy_init(struct kbase_device *kbdev) { - struct workqueue_struct *wq; - - wq = alloc_workqueue("kbase_pm_do_poweroff", - WQ_HIGHPRI | WQ_UNBOUND, 1); - if (!wq) - return -ENOMEM; - - kbdev->pm.backend.gpu_poweroff_wq = wq; - INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work, - kbasep_pm_do_gpu_poweroff_wq); - hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); - kbdev->pm.backend.gpu_poweroff_timer.function = - kbasep_pm_do_gpu_poweroff_callback; - kbdev->pm.backend.pm_current_policy = policy_list[0]; + generate_filtered_policy_list(); + if (enabled_policy_count == 0) + return -EINVAL; + + kbdev->pm.backend.pm_current_policy = enabled_policy_list[0]; kbdev->pm.backend.pm_current_policy->init(kbdev); - kbdev->pm.gpu_poweroff_time = - HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); - kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; - kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU; return 0; } @@ -246,29 +82,6 @@ int kbase_pm_policy_init(struct kbase_device *kbdev) void kbase_pm_policy_term(struct kbase_device *kbdev) { kbdev->pm.backend.pm_current_policy->term(kbdev); - destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq); -} - -void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) -{ - unsigned long flags; - - lockdep_assert_held(&kbdev->pm.lock); - - kbdev->pm.backend.poweroff_timer_needed = false; - hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->pm.backend.poweroff_timer_running = false; - - /* If wq is already running but is held off by pm.lock, make sure it has - * no effect */ - kbdev->pm.backend.gpu_poweroff_pending = 0; - - kbdev->pm.backend.shader_poweroff_pending = 0; - kbdev->pm.backend.tiler_poweroff_pending = 0; - kbdev->pm.backend.shader_poweroff_pending_time = 0; - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_pm_update_active(struct kbase_device *kbdev) @@ -291,35 +104,24 @@ void kbase_pm_update_active(struct kbase_device *kbdev) kbdev->pm.backend.pm_current_policy->name); if (active) { - if (backend->gpu_poweroff_pending) { - /* Cancel any pending power off request */ - backend->gpu_poweroff_pending = 0; - - /* If a request was pending then the GPU was still - * powered, so no need to continue */ - if (!kbdev->poweroff_pending) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); - return; - } - } - - if (!backend->poweroff_timer_running && !backend->gpu_powered && - (pm->poweroff_gpu_ticks || - pm->poweroff_shader_ticks)) { - backend->poweroff_timer_needed = true; - backend->poweroff_timer_running = true; - hrtimer_start(&backend->gpu_poweroff_timer, - pm->gpu_poweroff_time, - HRTIMER_MODE_REL); - } - /* Power on the GPU and any cores requested by the policy */ - if (pm->backend.poweroff_wait_in_progress) { + if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off && + pm->backend.poweroff_wait_in_progress) { KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); pm->backend.poweron_required = true; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } else { + /* Cancel the the invocation of + * kbase_pm_gpu_poweroff_wait_wq() from the L2 state + * machine. This is safe - it + * invoke_poweroff_wait_wq_when_l2_off is true, then + * the poweroff work hasn't even been queued yet, + * meaning we can go straight to powering on. + */ + pm->backend.invoke_poweroff_wait_wq_when_l2_off = false; + pm->backend.poweroff_wait_in_progress = false; + pm->backend.l2_desired = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_pm_do_poweron(kbdev, false); } @@ -328,89 +130,21 @@ void kbase_pm_update_active(struct kbase_device *kbdev) * when there are contexts active */ KBASE_DEBUG_ASSERT(pm->active_count == 0); - if (backend->shader_poweroff_pending || - backend->tiler_poweroff_pending) { - backend->shader_poweroff_pending = 0; - backend->tiler_poweroff_pending = 0; - backend->shader_poweroff_pending_time = 0; - } - /* Request power off */ if (pm->backend.gpu_powered) { - if (pm->poweroff_gpu_ticks) { - backend->gpu_poweroff_pending = - pm->poweroff_gpu_ticks; - backend->poweroff_timer_needed = true; - if (!backend->poweroff_timer_running) { - /* Start timer if not running (eg if - * power policy has been changed from - * always_on to something else). This - * will ensure the GPU is actually - * powered off */ - backend->poweroff_timer_running - = true; - hrtimer_start( - &backend->gpu_poweroff_timer, - pm->gpu_poweroff_time, - HRTIMER_MODE_REL); - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); - } else { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); - - /* Power off the GPU immediately */ - kbase_pm_do_poweroff(kbdev, false); - } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Power off the GPU immediately */ + kbase_pm_do_poweroff(kbdev, false); } else { spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } } } -/** - * get_desired_shader_bitmap - Get the desired shader bitmap, based on the - * current power policy - * - * @kbdev: The kbase device structure for the device - * - * Queries the current power policy to determine if shader cores will be - * required in the current state, and apply any HW workarounds. - * - * Return: bitmap of desired shader cores - */ - -static u64 get_desired_shader_bitmap(struct kbase_device *kbdev) -{ - u64 desired_bitmap = 0u; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev)) - desired_bitmap = kbase_pm_ca_get_core_mask(kbdev); - - WARN(!desired_bitmap && kbdev->shader_needed_cnt, - "Shader cores are needed but policy '%s' did not make them needed", - kbdev->pm.backend.pm_current_policy->name); - - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { - /* Unless XAFFINITY is supported, enable core 0 if tiler - * required, regardless of core availability - */ - if (kbdev->tiler_needed_cnt > 0) - desired_bitmap |= 1; - } - - return desired_bitmap; -} - void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) { - u64 desired_bitmap; - u64 desired_tiler_bitmap; - bool cores_are_available; - bool do_poweroff = false; + bool shaders_desired; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -419,105 +153,20 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) if (kbdev->pm.backend.poweroff_wait_in_progress) return; - if (kbdev->protected_mode_transition && !kbdev->shader_needed_cnt && - !kbdev->tiler_needed_cnt) { + if (kbdev->pm.backend.protected_transition_override) /* We are trying to change in/out of protected mode - force all * cores off so that the L2 powers down */ - desired_bitmap = 0; - desired_tiler_bitmap = 0; - } else { - desired_bitmap = get_desired_shader_bitmap(kbdev); - - if (kbdev->tiler_needed_cnt > 0) - desired_tiler_bitmap = 1; - else - desired_tiler_bitmap = 0; - } + shaders_desired = false; + else + shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); - if (kbdev->pm.backend.desired_shader_state != desired_bitmap) + if (kbdev->pm.backend.shaders_desired != shaders_desired) { KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, - (u32)desired_bitmap); - /* Are any cores being powered on? */ - if (~kbdev->pm.backend.desired_shader_state & desired_bitmap || - ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap || - kbdev->pm.backend.ca_in_transition) { - /* Check if we are powering off any cores before updating shader - * state */ - if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || - kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap) { - /* Start timer to power off cores */ - kbdev->pm.backend.shader_poweroff_pending |= - (kbdev->pm.backend.desired_shader_state & - ~desired_bitmap); - kbdev->pm.backend.tiler_poweroff_pending |= - (kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap); - - if (kbdev->pm.poweroff_shader_ticks && - !kbdev->protected_mode_transition) - kbdev->pm.backend.shader_poweroff_pending_time = - kbdev->pm.poweroff_shader_ticks; - else - do_poweroff = true; - } - - kbdev->pm.backend.desired_shader_state = desired_bitmap; - kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap; - - /* If any cores are being powered on, transition immediately */ - cores_are_available = kbase_pm_check_transitions_nolock(kbdev); - } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || - kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap) { - /* Start timer to power off cores */ - kbdev->pm.backend.shader_poweroff_pending |= - (kbdev->pm.backend.desired_shader_state & - ~desired_bitmap); - kbdev->pm.backend.tiler_poweroff_pending |= - (kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap); - if (kbdev->pm.poweroff_shader_ticks && - !kbdev->protected_mode_transition) - kbdev->pm.backend.shader_poweroff_pending_time = - kbdev->pm.poweroff_shader_ticks; - else - kbasep_pm_do_poweroff_cores(kbdev); - } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && - desired_tiler_bitmap != 0 && - kbdev->pm.backend.poweroff_timer_needed) { - /* If power policy is keeping cores on despite there being no - * active contexts then disable poweroff timer as it isn't - * required. - * Only reset poweroff_timer_needed if we're not in the middle - * of the power off callback */ - kbdev->pm.backend.poweroff_timer_needed = false; - } + (u32)kbdev->pm.backend.shaders_desired); - /* Ensure timer does not power off wanted cores and make sure to power - * off unwanted cores */ - if (kbdev->pm.backend.shader_poweroff_pending || - kbdev->pm.backend.tiler_poweroff_pending) { - kbdev->pm.backend.shader_poweroff_pending &= - ~(kbdev->pm.backend.desired_shader_state & - desired_bitmap); - kbdev->pm.backend.tiler_poweroff_pending &= - ~(kbdev->pm.backend.desired_tiler_state & - desired_tiler_bitmap); - - if (!kbdev->pm.backend.shader_poweroff_pending && - !kbdev->pm.backend.tiler_poweroff_pending) - kbdev->pm.backend.shader_poweroff_pending_time = 0; + kbdev->pm.backend.shaders_desired = shaders_desired; + kbase_pm_update_state(kbdev); } - - /* Shader poweroff is deferred to the end of the function, to eliminate - * issues caused by the core availability policy recursing into this - * function */ - if (do_poweroff) - kbasep_pm_do_poweroff_cores(kbdev); - - /* Don't need 'cores_are_available', because we don't return anything */ - CSTD_UNUSED(cores_are_available); } void kbase_pm_update_cores_state(struct kbase_device *kbdev) @@ -533,12 +182,11 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev) int kbase_pm_list_policies(const struct kbase_pm_policy * const **list) { - if (!list) - return POLICY_COUNT; - - *list = policy_list; + WARN_ON(enabled_policy_count == 0); + if (list) + *list = enabled_policy_list; - return POLICY_COUNT; + return enabled_policy_count; } KBASE_EXPORT_TEST_API(kbase_pm_list_policies); @@ -607,171 +255,3 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, } KBASE_EXPORT_TEST_API(kbase_pm_set_policy); - -void kbase_pm_request_cores(struct kbase_device *kbdev, - bool tiler_required, bool shader_required) -{ - kbase_pm_change_state change_gpu_state = 0u; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (shader_required) { - int cnt = ++kbdev->shader_needed_cnt; - - if (cnt == 1) - change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; - - KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt != 0); - } - - if (tiler_required) { - int cnt = ++kbdev->tiler_needed_cnt; - - if (cnt == 1) - change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; - - KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0); - } - - if (change_gpu_state) { - KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL, - NULL, 0u, kbdev->shader_needed_cnt); - KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_TILER_NEEDED, NULL, - NULL, 0u, kbdev->tiler_needed_cnt); - - kbase_pm_update_cores_state_nolock(kbdev); - } -} - -KBASE_EXPORT_TEST_API(kbase_pm_request_cores); - -void kbase_pm_release_cores(struct kbase_device *kbdev, - bool tiler_required, bool shader_required) -{ - kbase_pm_change_state change_gpu_state = 0u; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (shader_required) { - int cnt; - - KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt > 0); - - cnt = --kbdev->shader_needed_cnt; - - if (0 == cnt) { - change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; - } - } - - if (tiler_required) { - int cnt; - - KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); - - cnt = --kbdev->tiler_needed_cnt; - - if (0 == cnt) - change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; - } - - if (change_gpu_state) { - KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_NEEDED, NULL, - NULL, 0u, kbdev->shader_needed_cnt); - KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_TILER_NEEDED, NULL, - NULL, 0u, kbdev->tiler_needed_cnt); - - kbase_pm_update_cores_state_nolock(kbdev); - } -} - -KBASE_EXPORT_TEST_API(kbase_pm_release_cores); - -void kbase_pm_request_cores_sync(struct kbase_device *kbdev, - bool tiler_required, bool shader_required) -{ - unsigned long flags; - - kbase_pm_wait_for_poweroff_complete(kbdev); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_request_cores(kbdev, tiler_required, shader_required); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - kbase_pm_check_transitions_sync(kbdev); -} - -KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync); - -static void kbase_pm_l2_caches_ref(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbdev->l2_users_count++; - - KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0); - - /* Check for the required L2 transitions. - * Caller would block here for the L2 caches of all core groups to be - * powered on, so need to inform the Hw to power up all the L2 caches. - * Can't rely on the l2_users_count value being non-zero previously to - * avoid checking for the transition, as the count could be non-zero - * even if not all the instances of L2 cache are powered up since - * currently the power status of L2 is not tracked separately for each - * core group. Also if the GPU is reset while the L2 is on, L2 will be - * off but the count will be non-zero. - */ - kbase_pm_check_transitions_nolock(kbdev); -} - -void kbase_pm_request_l2_caches(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* Take the reference on l2_users_count and check core transitions. - */ - kbase_pm_l2_caches_ref(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - wait_event(kbdev->pm.backend.l2_powered_wait, - kbdev->pm.backend.l2_powered == 1); -} - -KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches); - -void kbase_pm_request_l2_caches_nolock(struct kbase_device *kbdev) -{ - /* Take the reference on l2_users_count and check core transitions. - */ - kbase_pm_l2_caches_ref(kbdev); -} - -void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbdev->l2_users_count++; -} - -KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on); - -void kbase_pm_release_l2_caches(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0); - - --kbdev->l2_users_count; - - if (!kbdev->l2_users_count) - kbase_pm_check_transitions_nolock(kbdev); -} - -KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.h b/mali_kbase/backend/gpu/mali_kbase_pm_policy.h index 2e86929..28d258f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.h @@ -64,61 +64,10 @@ void kbase_pm_update_active(struct kbase_device *kbdev); */ void kbase_pm_update_cores(struct kbase_device *kbdev); - -enum kbase_pm_cores_ready { - KBASE_CORES_NOT_READY = 0, - KBASE_NEW_AFFINITY = 1, - KBASE_CORES_READY = 2 -}; - - -/** - * kbase_pm_request_cores - Request the desired cores to be powered up. - * @kbdev: Kbase device - * @tiler_required: true if tiler is required - * @shader_required: true if shaders are required - * - * Called by the scheduler to request power to the desired cores. - * - * There is no guarantee that the HW will be powered up on return. Use - * kbase_pm_cores_requested()/kbase_pm_cores_ready() to verify that cores are - * now powered, or instead call kbase_pm_request_cores_sync(). - */ -void kbase_pm_request_cores(struct kbase_device *kbdev, bool tiler_required, - bool shader_required); - -/** - * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores() - * @kbdev: Kbase device - * @tiler_required: true if tiler is required - * @shader_required: true if shaders are required - * - * When this function returns, the @shader_cores will be in the READY state. - * - * This is safe variant of kbase_pm_check_transitions_sync(): it handles the - * work of ensuring the requested cores will remain powered until a matching - * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate) - * is made. - */ -void kbase_pm_request_cores_sync(struct kbase_device *kbdev, - bool tiler_required, bool shader_required); - -/** - * kbase_pm_release_cores - Request the desired cores to be powered down. - * @kbdev: Kbase device - * @tiler_required: true if tiler is required - * @shader_required: true if shaders are required - * - * Called by the scheduler to release its power reference on the desired cores. - */ -void kbase_pm_release_cores(struct kbase_device *kbdev, bool tiler_required, - bool shader_required); - /** * kbase_pm_cores_requested - Check that a power request has been locked into * the HW. * @kbdev: Kbase device - * @tiler_required: true if tiler is required * @shader_required: true if shaders are required * * Called by the scheduler to check if a power on request has been locked into @@ -136,112 +85,23 @@ void kbase_pm_release_cores(struct kbase_device *kbdev, bool tiler_required, * request is still pending. */ static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, - bool tiler_required, bool shader_required) + bool shader_required) { lockdep_assert_held(&kbdev->hwaccess_lock); - if ((shader_required && !kbdev->shader_available_bitmap) || - (tiler_required && !kbdev->tiler_available_bitmap)) + /* If the L2 & tiler are not on or pending, then the tiler is not yet + * available, and shaders are definitely not powered. + */ + if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON && + kbdev->pm.backend.l2_state != KBASE_L2_ON) return false; - return true; -} - -/** - * kbase_pm_cores_ready - Check that the required cores have been powered on by - * the HW. - * @kbdev: Kbase device - * @tiler_required: true if tiler is required - * @shader_required: true if shaders are required - * - * Called by the scheduler to check if cores are ready. - * - * Note that the caller should ensure that they have first requested cores - * before calling this function. - * - * Caller must hold the hwaccess_lock. - * - * Return: true if the cores are ready. - */ -static inline bool kbase_pm_cores_ready(struct kbase_device *kbdev, - bool tiler_required, bool shader_required) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - if ((shader_required && !kbdev->shader_ready_bitmap) || - (tiler_required && !kbdev->tiler_available_bitmap)) + if (shader_required && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) return false; return true; } -/** - * kbase_pm_request_l2_caches - Request l2 caches - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Request the use of l2 caches for all core groups, power up, wait and prevent - * the power manager from powering down the l2 caches. - * - * This tells the power management that the caches should be powered up, and - * they should remain powered, irrespective of the usage of shader cores. This - * does not return until the l2 caches are powered up. - * - * The caller must call kbase_pm_release_l2_caches() when they are finished - * to allow normal power management of the l2 caches to resume. - * - * This should only be used when power management is active. - */ -void kbase_pm_request_l2_caches(struct kbase_device *kbdev); - -/** - * kbase_pm_request_l2_caches_nolock - Request l2 caches, nolock version - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Request the use of l2 caches for all core groups and power up without - * waiting for power manager to actually power up the cores. This is done - * because the call to this function is done from within the atomic context - * and the actual l2 caches being powered up is checked at a later stage. - * The reference taken on l2 caches is removed when the protected mode atom - * is released so there is no need to make a call to a matching - * release_l2_caches(). - * - * This function is used specifically for the case when l2 caches are - * to be powered up as part of the sequence for entering protected mode. - * - * This should only be used when power management is active. - */ -void kbase_pm_request_l2_caches_nolock(struct kbase_device *kbdev); - -/** - * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Increment the count of l2 users but do not attempt to power on the l2 - * - * It is the callers responsibility to ensure that the l2 is already powered up - * and to eventually call kbase_pm_release_l2_caches() - */ -void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev); - -/** - * kbase_pm_release_l2_caches - Release l2 caches - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Release the use of l2 caches for all core groups and allow the power manager - * to power them down when necessary. - * - * This tells the power management that the caches can be powered down if - * necessary, with respect to the usage of shader cores. - * - * The caller must have called kbase_pm_request_l2_caches() prior to a call - * to this. - * - * This should only be used when power management is active. - */ -void kbase_pm_release_l2_caches(struct kbase_device *kbdev); - #endif /* _KBASE_PM_POLICY_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_time.h b/mali_kbase/backend/gpu/mali_kbase_time.h deleted file mode 100644 index ece7009..0000000 --- a/mali_kbase/backend/gpu/mali_kbase_time.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_BACKEND_TIME_H_ -#define _KBASE_BACKEND_TIME_H_ - -/** - * kbase_backend_get_gpu_time() - Get current GPU time - * @kbdev: Device pointer - * @cycle_counter: Pointer to u64 to store cycle counter in - * @system_time: Pointer to u64 to store system time in - * @ts: Pointer to struct timespec to store current monotonic - * time in - */ -void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec *ts); - -/** - * kbase_wait_write_flush() - Wait for GPU write flush - * @kbdev: Kbase device - * - * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush - * its write buffer. - * - * If GPU resets occur then the counters are reset to zero, the delay may not be - * as expected. - * - * This function is only in use for BASE_HW_ISSUE_6367 - */ -#ifdef CONFIG_MALI_NO_MALI -static inline void kbase_wait_write_flush(struct kbase_device *kbdev) -{ -} -#else -void kbase_wait_write_flush(struct kbase_device *kbdev); -#endif - -#endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp index ba3a25c..2cf685c 100644 --- a/mali_kbase/build.bp +++ b/mali_kbase/build.bp @@ -19,9 +19,6 @@ bob_defaults { no_mali: { kbuild_options: ["CONFIG_MALI_NO_MALI=y"], }, - mali_corestack: { - kbuild_options: ["CONFIG_MALI_CORESTACK=y"], - }, mali_devfreq: { kbuild_options: ["CONFIG_MALI_DEVFREQ=y"], }, @@ -84,8 +81,15 @@ bob_kernel_module { "CONFIG_MALI_MIDGARD=m", "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", - "MALI_MOCK_TEST={{.mali_mock_test}}", ], + mali_fpga_bus_logger: { + extra_symbols: [ + "bus_logger", + ], + }, + mali_corestack: { + kbuild_options: ["CONFIG_MALI_CORESTACK=y"], + }, mali_error_inject: { kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], }, @@ -98,9 +102,6 @@ bob_kernel_module { mali_2mb_alloc: { kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], }, - mali_mock_test: { - srcs: ["tests/internal/src/mock/mali_kbase_pm_driver_mock.c"], - }, gpu_has_csf: { srcs: [ "csf/*.c", diff --git a/mali_kbase/ipa/mali_kbase_ipa.c b/mali_kbase/ipa/mali_kbase_ipa.c index 520f8fc..9da2878 100644 --- a/mali_kbase/ipa/mali_kbase_ipa.c +++ b/mali_kbase/ipa/mali_kbase_ipa.c @@ -38,16 +38,15 @@ #endif #define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" -#define KBASE_IPA_G71_MODEL_NAME "mali-g71-power-model" -#define KBASE_IPA_G72_MODEL_NAME "mali-g72-power-model" -#define KBASE_IPA_TNOX_MODEL_NAME "mali-tnox-power-model" -#define KBASE_IPA_TGOX_R1_MODEL_NAME "mali-tgox_r1-power-model" -static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { +static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { &kbase_simple_ipa_model_ops, &kbase_g71_ipa_model_ops, &kbase_g72_ipa_model_ops, - &kbase_tnox_ipa_model_ops + &kbase_g76_ipa_model_ops, + &kbase_g52_ipa_model_ops, + &kbase_g52_r1_ipa_model_ops, + &kbase_g51_ipa_model_ops }; int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) @@ -68,13 +67,13 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) return err; } -static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, +const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, const char *name) { int i; for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) { - struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i]; + const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i]; if (!strcmp(ops->name, name)) return ops; @@ -84,6 +83,7 @@ static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device return NULL; } +KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find); const char *kbase_ipa_model_name_from_id(u32 gpu_id) { @@ -93,18 +93,20 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id) if (GPU_ID_IS_NEW_FORMAT(prod_id)) { switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { case GPU_ID2_PRODUCT_TMIX: - return KBASE_IPA_G71_MODEL_NAME; + return "mali-g71-power-model"; case GPU_ID2_PRODUCT_THEX: - return KBASE_IPA_G72_MODEL_NAME; + return "mali-g72-power-model"; case GPU_ID2_PRODUCT_TNOX: - return KBASE_IPA_TNOX_MODEL_NAME; + return "mali-g76-power-model"; + case GPU_ID2_PRODUCT_TSIX: + return "mali-g51-power-model"; case GPU_ID2_PRODUCT_TGOX: if ((gpu_id & GPU_ID2_VERSION_MAJOR) == (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) - /* TGOX r0 shares a power model with TNOX */ - return KBASE_IPA_TNOX_MODEL_NAME; + /* g52 aliased to g76 power-model's ops */ + return "mali-g52-power-model"; else - return KBASE_IPA_TGOX_R1_MODEL_NAME; + return "mali-g52_r1-power-model"; default: return KBASE_IPA_FALLBACK_MODEL_NAME; } @@ -112,6 +114,7 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id) return KBASE_IPA_FALLBACK_MODEL_NAME; } +KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); static struct device_node *get_model_dt_node(struct kbase_ipa_model *model) { @@ -244,7 +247,7 @@ void kbase_ipa_term_model(struct kbase_ipa_model *model) KBASE_EXPORT_TEST_API(kbase_ipa_term_model); struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, - struct kbase_ipa_model_ops *ops) + const struct kbase_ipa_model_ops *ops) { struct kbase_ipa_model *model; int err; @@ -298,7 +301,7 @@ int kbase_ipa_init(struct kbase_device *kbdev) { const char *model_name; - struct kbase_ipa_model_ops *ops; + const struct kbase_ipa_model_ops *ops; struct kbase_ipa_model *default_model = NULL; int err; @@ -371,6 +374,8 @@ void kbase_ipa_term(struct kbase_device *kbdev) mutex_lock(&kbdev->ipa.lock); kbase_ipa_term_locked(kbdev); mutex_unlock(&kbdev->ipa.lock); + + mutex_destroy(&kbdev->ipa.lock); } KBASE_EXPORT_TEST_API(kbase_ipa_term); @@ -517,6 +522,9 @@ static unsigned long kbase_get_static_power(unsigned long voltage) struct kbase_device *kbdev = kbase_find_device(-1); #endif + if (!kbdev) + return 0ul; + mutex_lock(&kbdev->ipa.lock); model = get_current_model(kbdev); @@ -552,6 +560,9 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq, struct kbase_device *kbdev = kbase_find_device(-1); #endif + if (!kbdev) + return 0ul; + mutex_lock(&kbdev->ipa.lock); model = kbdev->ipa.fallback_model; @@ -627,6 +638,9 @@ int kbase_get_real_power(struct devfreq *df, u32 *power, int ret; struct kbase_device *kbdev = dev_get_drvdata(&df->dev); + if (!kbdev) + return -ENODEV; + mutex_lock(&kbdev->ipa.lock); ret = kbase_get_real_power_locked(kbdev, power, freq, voltage); mutex_unlock(&kbdev->ipa.lock); diff --git a/mali_kbase/ipa/mali_kbase_ipa.h b/mali_kbase/ipa/mali_kbase_ipa.h index 4656ded..7462048 100644 --- a/mali_kbase/ipa/mali_kbase_ipa.h +++ b/mali_kbase/ipa/mali_kbase_ipa.h @@ -40,7 +40,7 @@ struct devfreq; struct kbase_ipa_model { struct kbase_device *kbdev; void *model_data; - struct kbase_ipa_model_ops *ops; + const struct kbase_ipa_model_ops *ops; struct list_head params; bool missing_dt_node_warning; }; @@ -154,6 +154,25 @@ void kbase_ipa_term(struct kbase_device *kbdev); int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); /** + * kbase_ipa_model_ops_find - Lookup an IPA model using its name + * @kbdev: pointer to kbase device + * @name: name of model to lookup + * + * Return: Pointer to model's 'ops' structure, or NULL if the lookup failed. + */ +const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, + const char *name); + +/** + * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID + * @gpu_id: GPU ID of GPU the model will be used for + * + * Return: The name of the appropriate counter-based model, or the name of the + * fallback model if no counter model exists. + */ +const char *kbase_ipa_model_name_from_id(u32 gpu_id); + +/** * kbase_ipa_init_model - Initilaize the particular IPA model * @kbdev: pointer to kbase device * @ops: pointer to object containing model specific methods. @@ -164,7 +183,7 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); * Return: pointer to kbase_ipa_model on success, NULL on error */ struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, - struct kbase_ipa_model_ops *ops); + const struct kbase_ipa_model_ops *ops); /** * kbase_ipa_term_model - Terminate the particular IPA model * @model: pointer to the IPA model object, already initialized @@ -183,10 +202,12 @@ void kbase_ipa_term_model(struct kbase_ipa_model *model); */ void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev); -extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops; -extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops; -extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops; -extern struct kbase_ipa_model_ops kbase_tgox_r1_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops; /** * kbase_get_real_power() - get the real power consumption of the GPU diff --git a/mali_kbase/ipa/mali_kbase_ipa_simple.c b/mali_kbase/ipa/mali_kbase_ipa_simple.c index e684df4..c8399ab 100644 --- a/mali_kbase/ipa/mali_kbase_ipa_simple.c +++ b/mali_kbase/ipa/mali_kbase_ipa_simple.c @@ -268,8 +268,9 @@ static int kbase_simple_power_model_init(struct kbase_ipa_model *model) (void *) model_data, "mali-simple-power-model-temp-poll"); if (IS_ERR(model_data->poll_temperature_thread)) { + err = PTR_ERR(model_data->poll_temperature_thread); kfree(model_data); - return PTR_ERR(model_data->poll_temperature_thread); + return err; } err = add_params(model); diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c index 69c3230..1a6ba01 100644 --- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c +++ b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c @@ -44,7 +44,7 @@ static inline u32 kbase_ipa_read_hwcnt( struct kbase_ipa_model_vinstr_data *model_data, u32 offset) { - u8 *p = model_data->vinstr_buffer; + u8 *p = (u8 *)model_data->dump_buf.dump_buf; return *(u32 *)&p[offset]; } @@ -118,125 +118,69 @@ s64 kbase_ipa_single_counter( return counter_value * (s64) coeff; } -#ifndef CONFIG_MALI_NO_MALI -/** - * kbase_ipa_gpu_active - Inform IPA that GPU is now active - * @model_data: Pointer to model data - * - * This function may cause vinstr to become active. - */ -static void kbase_ipa_gpu_active(struct kbase_ipa_model_vinstr_data *model_data) +int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) { + int errcode; struct kbase_device *kbdev = model_data->kbdev; + struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt; + struct kbase_hwcnt_enable_map enable_map; + const struct kbase_hwcnt_metadata *metadata = + kbase_hwcnt_virtualizer_metadata(hvirt); - lockdep_assert_held(&kbdev->pm.lock); + if (!metadata) + return -1; - if (!kbdev->ipa.vinstr_active) { - kbdev->ipa.vinstr_active = true; - kbase_vinstr_resume_client(model_data->vinstr_cli); + errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map); + if (errcode) { + dev_err(kbdev->dev, "Failed to allocate IPA enable map"); + return errcode; } -} -/** - * kbase_ipa_gpu_idle - Inform IPA that GPU is now idle - * @model_data: Pointer to model data - * - * This function may cause vinstr to become idle. - */ -static void kbase_ipa_gpu_idle(struct kbase_ipa_model_vinstr_data *model_data) -{ - struct kbase_device *kbdev = model_data->kbdev; + kbase_hwcnt_enable_map_enable_all(&enable_map); - lockdep_assert_held(&kbdev->pm.lock); - - if (kbdev->ipa.vinstr_active) { - kbase_vinstr_suspend_client(model_data->vinstr_cli); - kbdev->ipa.vinstr_active = false; + errcode = kbase_hwcnt_virtualizer_client_create( + hvirt, &enable_map, &model_data->hvirt_cli); + kbase_hwcnt_enable_map_free(&enable_map); + if (errcode) { + dev_err(kbdev->dev, "Failed to register IPA with virtualizer"); + model_data->hvirt_cli = NULL; + return errcode; } -} -#endif - -int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) -{ - struct kbase_device *kbdev = model_data->kbdev; - struct kbase_ioctl_hwcnt_reader_setup setup; - size_t dump_size; - dump_size = kbase_vinstr_dump_size(kbdev); - model_data->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL); - if (!model_data->vinstr_buffer) { + errcode = kbase_hwcnt_dump_buffer_alloc( + metadata, &model_data->dump_buf); + if (errcode) { dev_err(kbdev->dev, "Failed to allocate IPA dump buffer"); - return -1; - } - - setup.jm_bm = ~0u; - setup.shader_bm = ~0u; - setup.tiler_bm = ~0u; - setup.mmu_l2_bm = ~0u; - model_data->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx, - &setup, model_data->vinstr_buffer); - if (!model_data->vinstr_cli) { - dev_err(kbdev->dev, "Failed to register IPA with vinstr core"); - kfree(model_data->vinstr_buffer); - model_data->vinstr_buffer = NULL; - return -1; + kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); + model_data->hvirt_cli = NULL; + return errcode; } - kbase_vinstr_hwc_clear(model_data->vinstr_cli); - -#ifndef CONFIG_MALI_NO_MALI - kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active; - kbdev->ipa.gpu_idle_callback = kbase_ipa_gpu_idle; - kbdev->ipa.model_data = model_data; - kbdev->ipa.vinstr_active = false; - /* Suspend vinstr, to ensure that the GPU is powered off until there is - * something to execute. - */ - kbase_vinstr_suspend_client(model_data->vinstr_cli); -#else - kbdev->ipa.gpu_active_callback = NULL; - kbdev->ipa.gpu_idle_callback = NULL; - kbdev->ipa.vinstr_active = true; -#endif - return 0; } void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) { - struct kbase_device *kbdev = model_data->kbdev; - - kbdev->ipa.gpu_active_callback = NULL; - kbdev->ipa.gpu_idle_callback = NULL; - kbdev->ipa.model_data = NULL; - kbdev->ipa.vinstr_active = false; - - if (model_data->vinstr_cli) - kbase_vinstr_detach_client(model_data->vinstr_cli); - - model_data->vinstr_cli = NULL; - kfree(model_data->vinstr_buffer); - model_data->vinstr_buffer = NULL; + if (model_data->hvirt_cli) { + kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); + kbase_hwcnt_dump_buffer_free(&model_data->dump_buf); + model_data->hvirt_cli = NULL; + } } int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) { struct kbase_ipa_model_vinstr_data *model_data = (struct kbase_ipa_model_vinstr_data *)model->model_data; - struct kbase_device *kbdev = model_data->kbdev; s64 energy = 0; size_t i; u64 coeff = 0, coeff_mul = 0; + u64 start_ts_ns, end_ts_ns; u32 active_cycles; int err = 0; - if (!kbdev->ipa.vinstr_active) { - err = -ENODATA; - goto err0; /* GPU powered off - no counters to collect */ - } - - err = kbase_vinstr_hwc_dump(model_data->vinstr_cli, - BASE_HWCNT_READER_EVENT_MANUAL); + err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli, + &start_ts_ns, &end_ts_ns, &model_data->dump_buf); if (err) goto err0; diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h index 0deafae..46e3cd4 100644 --- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h +++ b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h @@ -24,6 +24,8 @@ #define _KBASE_IPA_VINSTR_COMMON_H_ #include "mali_kbase.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" /* Maximum number of IPA groups for an IPA model. */ #define KBASE_IPA_MAX_GROUP_DEF_NUM 16 @@ -49,8 +51,8 @@ typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinst * @groups_def_num: Number of elements in the array of IPA groups. * @get_active_cycles: Callback to return number of active cycles during * counter sample period - * @vinstr_cli: vinstr client handle - * @vinstr_buffer: buffer to dump hardware counters onto + * @hvirt_cli: hardware counter virtualizer client handle + * @dump_buf: buffer to dump hardware counters onto * @reference_voltage: voltage, in mV, of the operating point used when * deriving the power model coefficients. Range approx * 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13 @@ -72,8 +74,8 @@ struct kbase_ipa_model_vinstr_data { const struct kbase_ipa_group *groups_def; size_t groups_def_num; kbase_ipa_get_active_cycles_callback get_active_cycles; - struct kbase_vinstr_client *vinstr_cli; - void *vinstr_buffer; + struct kbase_hwcnt_virtualizer_client *hvirt_cli; + struct kbase_hwcnt_dump_buffer dump_buf; s32 reference_voltage; s32 scaling_factor; s32 min_sample_cycles; diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c b/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c index 8366033..6365d2f 100644 --- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c +++ b/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c @@ -248,7 +248,7 @@ static const struct kbase_ipa_group ipa_groups_def_g72[] = { }, }; -static const struct kbase_ipa_group ipa_groups_def_tnox[] = { +static const struct kbase_ipa_group ipa_groups_def_g76[] = { { .name = "gpu_active", .default_value = 122000, @@ -281,7 +281,7 @@ static const struct kbase_ipa_group ipa_groups_def_tnox[] = { }, }; -static const struct kbase_ipa_group ipa_groups_def_tgox_r1[] = { +static const struct kbase_ipa_group ipa_groups_def_g52_r1[] = { { .name = "gpu_active", .default_value = 224200, @@ -314,6 +314,48 @@ static const struct kbase_ipa_group ipa_groups_def_tgox_r1[] = { }, }; +static const struct kbase_ipa_group ipa_groups_def_g51[] = { + { + .name = "gpu_active", + .default_value = 201400, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, + { + .name = "exec_instr_count", + .default_value = 392700, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "vary_instr", + .default_value = 274000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_VARY_INSTR, + }, + { + .name = "tex_tfch_num_operations", + .default_value = 528000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, + }, + { + .name = "l2_access", + .default_value = 506400, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, +}; + +#define IPA_POWER_MODEL_OPS(gpu, init_token) \ + const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ + .name = "mali-" #gpu "-power-model", \ + .init = kbase_ ## init_token ## _power_model_init, \ + .term = kbase_ipa_vinstr_common_model_term, \ + .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ + }; \ + KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) + #define STANDARD_POWER_MODEL(gpu, reference_voltage) \ static int kbase_ ## gpu ## _power_model_init(\ struct kbase_ipa_model *model) \ @@ -326,15 +368,16 @@ static const struct kbase_ipa_group ipa_groups_def_tgox_r1[] = { kbase_g7x_get_active_cycles, \ (reference_voltage)); \ } \ - struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ - .name = "mali-" #gpu "-power-model", \ - .init = kbase_ ## gpu ## _power_model_init, \ - .term = kbase_ipa_vinstr_common_model_term, \ - .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ - }; \ - KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) + IPA_POWER_MODEL_OPS(gpu, gpu) + +#define ALIAS_POWER_MODEL(gpu, as_gpu) \ + IPA_POWER_MODEL_OPS(gpu, as_gpu) STANDARD_POWER_MODEL(g71, 800); STANDARD_POWER_MODEL(g72, 800); -STANDARD_POWER_MODEL(tnox, 800); -STANDARD_POWER_MODEL(tgox_r1, 1000); +STANDARD_POWER_MODEL(g76, 800); +STANDARD_POWER_MODEL(g52_r1, 1000); +STANDARD_POWER_MODEL(g51, 1000); + +/* g52 is an alias of g76 (TNOX) for IPA */ +ALIAS_POWER_MODEL(g52, g76); diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h index 69c22f2..5571f84 100644 --- a/mali_kbase/mali_base_hwconfig_features.h +++ b/mali_kbase/mali_base_hwconfig_features.h @@ -59,6 +59,8 @@ enum base_hw_feature { BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -203,6 +205,7 @@ static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -230,6 +233,7 @@ static const enum base_hw_feature base_hw_features_tHEx[] = { BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -257,6 +261,7 @@ static const enum base_hw_feature base_hw_features_tSIx[] = { BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -284,6 +289,7 @@ static const enum base_hw_feature base_hw_features_tDVx[] = { BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -314,6 +320,7 @@ static const enum base_hw_feature base_hw_features_tNOx[] = { BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -344,6 +351,7 @@ static const enum base_hw_feature base_hw_features_tGOx[] = { BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -398,6 +406,8 @@ static const enum base_hw_feature base_hw_features_tTRx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -425,10 +435,12 @@ static const enum base_hw_feature base_hw_features_tNAx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tULx[] = { +static const enum base_hw_feature base_hw_features_tBEx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, @@ -452,10 +464,12 @@ static const enum base_hw_feature base_hw_features_tULx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tDUx[] = { +static const enum base_hw_feature base_hw_features_tULx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, @@ -479,6 +493,7 @@ static const enum base_hw_feature base_hw_features_tDUx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -506,6 +521,7 @@ static const enum base_hw_feature base_hw_features_tBOx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -533,6 +549,7 @@ static const enum base_hw_feature base_hw_features_tIDx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -560,6 +577,7 @@ static const enum base_hw_feature base_hw_features_tVAx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h index b8bd3d0..d7c40ef 100644 --- a/mali_kbase/mali_base_hwconfig_issues.h +++ b/mali_kbase/mali_base_hwconfig_issues.h @@ -1222,7 +1222,6 @@ static const enum base_hw_issue base_hw_issues_model_tKAx[] = { static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END @@ -1232,7 +1231,6 @@ static const enum base_hw_issue base_hw_issues_model_tTRx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END @@ -1241,7 +1239,6 @@ static const enum base_hw_issue base_hw_issues_model_tTRx[] = { static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END @@ -1251,45 +1248,40 @@ static const enum base_hw_issue base_hw_issues_model_tNAx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tULx_r0p0[] = { +static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tULx[] = { +static const enum base_hw_issue base_hw_issues_model_tBEx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { +static const enum base_hw_issue base_hw_issues_tULx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tDUx[] = { +static const enum base_hw_issue base_hw_issues_model_tULx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END @@ -1317,7 +1309,6 @@ static const enum base_hw_issue base_hw_issues_model_tBOx[] = { static const enum base_hw_issue base_hw_issues_tIDx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END @@ -1327,7 +1318,6 @@ static const enum base_hw_issue base_hw_issues_model_tIDx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END @@ -1336,7 +1326,6 @@ static const enum base_hw_issue base_hw_issues_model_tIDx[] = { static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END @@ -1346,7 +1335,6 @@ static const enum base_hw_issue base_hw_issues_model_tVAx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END diff --git a/mali_kbase/mali_base_kernel.h b/mali_kbase/mali_base_kernel.h index 297df8b..70dc3c5 100644 --- a/mali_kbase/mali_base_kernel.h +++ b/mali_kbase/mali_base_kernel.h @@ -348,15 +348,6 @@ struct base_mem_import_user_buffer { /** - * @brief Result codes of changing the size of the backing store allocated to a tmem region - */ -typedef enum base_backing_threshold_status { - BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */ - BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */ - BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */ -} base_backing_threshold_status; - -/** * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs * @{ */ @@ -797,24 +788,6 @@ typedef u32 base_jd_core_req; ((core_req & BASE_JD_REQ_SOFT_JOB) || \ (core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) -/** - * enum kbase_atom_coreref_state - States to model state machine processed by - * kbasep_js_job_check_ref_cores(), which handles retaining cores for power - * management. - * - * @KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: Starting state: Cores must be - * requested. - * @KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: Cores requested, but - * waiting for them to be powered - * @KBASE_ATOM_COREREF_STATE_READY: Cores are powered, atom can be submitted to - * HW - */ -enum kbase_atom_coreref_state { - KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED, - KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES, - KBASE_ATOM_COREREF_STATE_READY -}; - /* * Base Atom priority * @@ -822,15 +795,16 @@ enum kbase_atom_coreref_state { * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority * level that is not one of those defined below. * - * Priority levels only affect scheduling between atoms of the same type within - * a base context, and only after the atoms have had dependencies resolved. - * Fragment atoms does not affect non-frament atoms with lower priorities, and - * the other way around. For example, a low priority atom that has had its - * dependencies resolved might run before a higher priority atom that has not - * had its dependencies resolved. + * Priority levels only affect scheduling after the atoms have had dependencies + * resolved. For example, a low priority atom that has had its dependencies + * resolved might run before a higher priority atom that has not had its + * dependencies resolved. * - * The scheduling between base contexts/processes and between atoms from - * different base contexts/processes is unaffected by atom priority. + * In general, fragment atoms do not affect non-fragment atoms with + * lower priorities, and vice versa. One exception is that there is only one + * priority value for each context. So a high-priority (e.g.) fragment atom + * could increase its context priority, causing its non-fragment atoms to also + * be scheduled sooner. * * The atoms are scheduled as follows with respect to their priorities: * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies @@ -842,6 +816,14 @@ enum kbase_atom_coreref_state { * - Any two atoms that have the same priority could run in any order with * respect to each other. That is, there is no ordering constraint between * atoms of the same priority. + * + * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are + * scheduled between contexts. The default value, 0, will cause higher-priority + * atoms to be scheduled first, regardless of their context. The value 1 will + * use a round-robin algorithm when deciding which context's atoms to schedule + * next, so higher-priority atoms can only preempt lower priority atoms within + * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and + * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. */ typedef u8 base_jd_prio; diff --git a/mali_kbase/mali_base_vendor_specific_func.h b/mali_kbase/mali_base_vendor_specific_func.h deleted file mode 100644 index 5e8add8..0000000 --- a/mali_kbase/mali_base_vendor_specific_func.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -#ifndef _BASE_VENDOR_SPEC_FUNC_H_ -#define _BASE_VENDOR_SPEC_FUNC_H_ - -int kbase_get_vendor_specific_cpu_clock_speed(u32 * const); - -#endif /*_BASE_VENDOR_SPEC_FUNC_H_*/ diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h index cdd9ecc..24a021d 100644 --- a/mali_kbase/mali_kbase.h +++ b/mali_kbase/mali_kbase.h @@ -68,7 +68,7 @@ #include "mali_kbase_jd_debugfs.h" #include "mali_kbase_gpuprops.h" #include "mali_kbase_jm.h" -#include "mali_kbase_vinstr.h" +#include "mali_kbase_ioctl.h" #include "ipa/mali_kbase_ipa.h" @@ -353,22 +353,13 @@ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) * * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * This takes into account the following - * - * - whether there is an active context reference - * - * - whether any of the shader cores or the tiler are needed - * - * It should generally be preferred against checking just - * kbdev->pm.active_count on its own, because some code paths drop their - * reference on this whilst still having the shader cores/tiler in use. + * This takes into account whether there is an active context reference. * * Return: true if the GPU is active, false otherwise */ static inline bool kbase_pm_is_active(struct kbase_device *kbdev) { - return (kbdev->pm.active_count > 0 || kbdev->shader_needed_cnt || - kbdev->tiler_needed_cnt); + return kbdev->pm.active_count > 0; } /** @@ -713,6 +704,3 @@ int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size); #endif - - - diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h index 9d918a8..bb2ab53 100644 --- a/mali_kbase/mali_kbase_config_defaults.h +++ b/mali_kbase/mali_kbase_config_defaults.h @@ -171,11 +171,6 @@ enum { #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ /** - * Power Manager number of ticks before GPU is powered off - */ -#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */ - -/** * Default scheduling tick granuality */ #define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ diff --git a/mali_kbase/mali_kbase_context.c b/mali_kbase/mali_kbase_context.c index 628f89a..59609d7 100644 --- a/mali_kbase/mali_kbase_context.c +++ b/mali_kbase/mali_kbase_context.c @@ -149,7 +149,7 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1; - mutex_init(&kctx->vinstr_cli_lock); + mutex_init(&kctx->legacy_hwcnt_lock); kbase_timer_setup(&kctx->soft_job_timeout, kbasep_soft_job_timeout_worker); @@ -325,9 +325,6 @@ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags) if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); - /* Latch the initial attributes into the Job Scheduler */ - kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); out: diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index d101d97..382285f 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -48,7 +48,12 @@ #include <mali_kbase_hwaccess_jm.h> #include <mali_kbase_ctx_sched.h> #include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> #include "mali_kbase_ioctl.h" +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_legacy.h" +#include "mali_kbase_vinstr.h" #ifdef CONFIG_MALI_CINSTR_GWT #include "mali_kbase_gwt.h" @@ -161,22 +166,25 @@ enum { #endif /* CONFIG_MALI_DEVFREQ */ inited_tlstream = (1u << 4), inited_backend_early = (1u << 5), - inited_backend_late = (1u << 6), - inited_device = (1u << 7), - inited_vinstr = (1u << 8), - inited_job_fault = (1u << 10), - inited_sysfs_group = (1u << 11), - inited_misc_register = (1u << 12), - inited_get_device = (1u << 13), - inited_dev_list = (1u << 14), - inited_debugfs = (1u << 15), - inited_gpu_device = (1u << 16), - inited_registers_map = (1u << 17), - inited_io_history = (1u << 18), - inited_power_control = (1u << 19), - inited_buslogger = (1u << 20), - inited_protected = (1u << 21), - inited_ctx_sched = (1u << 22) + inited_hwcnt_gpu_iface = (1u << 6), + inited_hwcnt_gpu_ctx = (1u << 7), + inited_hwcnt_gpu_virt = (1u << 8), + inited_vinstr = (1u << 9), + inited_backend_late = (1u << 10), + inited_device = (1u << 11), + inited_job_fault = (1u << 13), + inited_sysfs_group = (1u << 14), + inited_misc_register = (1u << 15), + inited_get_device = (1u << 16), + inited_dev_list = (1u << 17), + inited_debugfs = (1u << 18), + inited_gpu_device = (1u << 19), + inited_registers_map = (1u << 20), + inited_io_history = (1u << 21), + inited_power_control = (1u << 22), + inited_buslogger = (1u << 23), + inited_protected = (1u << 24), + inited_ctx_sched = (1u << 25) }; static struct kbase_device *to_kbase_device(struct device *dev) @@ -494,17 +502,13 @@ static int kbase_release(struct inode *inode, struct file *filp) filp->private_data = NULL; - mutex_lock(&kctx->vinstr_cli_lock); + mutex_lock(&kctx->legacy_hwcnt_lock); /* If this client was performing hwcnt dumping and did not explicitly - * detach itself, remove it from the vinstr core now */ - if (kctx->vinstr_cli) { - struct kbase_ioctl_hwcnt_enable enable; - - enable.dump_buffer = 0llu; - kbase_vinstr_legacy_hwc_setup( - kbdev->vinstr_ctx, &kctx->vinstr_cli, &enable); - } - mutex_unlock(&kctx->vinstr_cli_lock); + * detach itself, destroy it now + */ + kbase_hwcnt_legacy_client_destroy(kctx->legacy_hwcnt_cli); + kctx->legacy_hwcnt_cli = NULL; + mutex_unlock(&kctx->legacy_hwcnt_lock); kbase_destroy_context(kctx); @@ -592,10 +596,15 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx, if (flags & BASE_MEM_FLAGS_KERNEL_ONLY) return -ENOMEM; + /* Force SAME_VA if a 64-bit client. + * The only exception is GPU-executable memory if an EXEC_VA zone + * has been initialized. In that case, GPU-executable memory may + * or may not be SAME_VA. + */ if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) { - /* force SAME_VA if a 64-bit client */ - flags |= BASE_MEM_SAME_VA; + if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx)) + flags |= BASE_MEM_SAME_VA; } @@ -629,13 +638,7 @@ static int kbase_api_mem_free(struct kbase_context *kctx, static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, struct kbase_ioctl_hwcnt_reader_setup *setup) { - int ret; - - mutex_lock(&kctx->vinstr_cli_lock); - ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); - mutex_unlock(&kctx->vinstr_cli_lock); - - return ret; + return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); } static int kbase_api_hwcnt_enable(struct kbase_context *kctx, @@ -643,10 +646,31 @@ static int kbase_api_hwcnt_enable(struct kbase_context *kctx, { int ret; - mutex_lock(&kctx->vinstr_cli_lock); - ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx, - &kctx->vinstr_cli, enable); - mutex_unlock(&kctx->vinstr_cli_lock); + mutex_lock(&kctx->legacy_hwcnt_lock); + if (enable->dump_buffer != 0) { + /* Non-zero dump buffer, so user wants to create the client */ + if (kctx->legacy_hwcnt_cli == NULL) { + ret = kbase_hwcnt_legacy_client_create( + kctx->kbdev->hwcnt_gpu_virt, + enable, + &kctx->legacy_hwcnt_cli); + } else { + /* This context already has a client */ + ret = -EBUSY; + } + } else { + /* Zero dump buffer, so user wants to destroy the client */ + if (kctx->legacy_hwcnt_cli != NULL) { + kbase_hwcnt_legacy_client_destroy( + kctx->legacy_hwcnt_cli); + kctx->legacy_hwcnt_cli = NULL; + ret = 0; + } else { + /* This context has no client to destroy */ + ret = -EINVAL; + } + } + mutex_unlock(&kctx->legacy_hwcnt_lock); return ret; } @@ -655,10 +679,9 @@ static int kbase_api_hwcnt_dump(struct kbase_context *kctx) { int ret; - mutex_lock(&kctx->vinstr_cli_lock); - ret = kbase_vinstr_hwc_dump(kctx->vinstr_cli, - BASE_HWCNT_READER_EVENT_MANUAL); - mutex_unlock(&kctx->vinstr_cli_lock); + mutex_lock(&kctx->legacy_hwcnt_lock); + ret = kbase_hwcnt_legacy_client_dump(kctx->legacy_hwcnt_cli); + mutex_unlock(&kctx->legacy_hwcnt_lock); return ret; } @@ -667,9 +690,9 @@ static int kbase_api_hwcnt_clear(struct kbase_context *kctx) { int ret; - mutex_lock(&kctx->vinstr_cli_lock); - ret = kbase_vinstr_hwc_clear(kctx->vinstr_cli); - mutex_unlock(&kctx->vinstr_cli_lock); + mutex_lock(&kctx->legacy_hwcnt_lock); + ret = kbase_hwcnt_legacy_client_clear(kctx->legacy_hwcnt_cli); + mutex_unlock(&kctx->legacy_hwcnt_lock); return ret; } @@ -749,6 +772,12 @@ static int kbase_api_mem_jit_init(struct kbase_context *kctx, jit_init->max_allocations, jit_init->trim_level); } +static int kbase_api_mem_exec_init(struct kbase_context *kctx, + struct kbase_ioctl_mem_exec_init *exec_init) +{ + return kbase_region_tracker_init_exec(kctx, exec_init->va_pages); +} + static int kbase_api_mem_sync(struct kbase_context *kctx, struct kbase_ioctl_mem_sync *sync) { @@ -1169,6 +1198,11 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) kbase_api_mem_jit_init, struct kbase_ioctl_mem_jit_init); break; + case KBASE_IOCTL_MEM_EXEC_INIT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT, + kbase_api_mem_exec_init, + struct kbase_ioctl_mem_exec_init); + break; case KBASE_IOCTL_MEM_SYNC: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, kbase_api_mem_sync, @@ -1550,7 +1584,10 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, { struct kbase_device *kbdev; u64 new_core_mask[3]; - int items; + int items, i; + ssize_t err = count; + unsigned long flags; + u64 shader_present, group0_core_mask; kbdev = to_kbase_device(dev); @@ -1561,50 +1598,59 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, &new_core_mask[0], &new_core_mask[1], &new_core_mask[2]); + if (items != 1 && items != 3) { + dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" + "Use format <core_mask>\n" + "or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n"); + err = -EINVAL; + goto end; + } + if (items == 1) new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; - if (items == 1 || items == 3) { - u64 shader_present = - kbdev->gpu_props.props.raw_props.shader_present; - u64 group0_core_mask = - kbdev->gpu_props.props.coherency_info.group[0]. - core_mask; - - if ((new_core_mask[0] & shader_present) != new_core_mask[0] || - !(new_core_mask[0] & group0_core_mask) || - (new_core_mask[1] & shader_present) != - new_core_mask[1] || - !(new_core_mask[1] & group0_core_mask) || - (new_core_mask[2] & shader_present) != - new_core_mask[2] || - !(new_core_mask[2] & group0_core_mask)) { - dev_err(dev, "power_policy: invalid core specification\n"); - return -EINVAL; - } - - if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || - kbdev->pm.debug_core_mask[1] != - new_core_mask[1] || - kbdev->pm.debug_core_mask[2] != - new_core_mask[2]) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], - new_core_mask[1], new_core_mask[2]); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + shader_present = kbdev->gpu_props.props.raw_props.shader_present; + group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + + for (i = 0; i < 3; ++i) { + if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", + new_core_mask[i], i, shader_present); + err = -EINVAL; + goto unlock; + + } else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", + new_core_mask[i], i, + kbdev->gpu_props.props.raw_props.shader_present, + kbdev->pm.backend.ca_cores_enabled); + err = -EINVAL; + goto unlock; + + } else if (!(new_core_mask[i] & group0_core_mask)) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", + new_core_mask[i], i, group0_core_mask); + err = -EINVAL; + goto unlock; } + } - return count; + if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || + kbdev->pm.debug_core_mask[1] != + new_core_mask[1] || + kbdev->pm.debug_core_mask[2] != + new_core_mask[2]) { + + kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], + new_core_mask[1], new_core_mask[2]); } - dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n" - "Use format <core_mask>\n" - "or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n"); - return -EINVAL; +unlock: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +end: + return err; } /* @@ -2438,9 +2484,11 @@ static ssize_t set_pm_poweroff(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kbase_device *kbdev; + struct kbasep_pm_tick_timer_state *stt; int items; - s64 gpu_poweroff_time; - int poweroff_shader_ticks, poweroff_gpu_ticks; + u64 gpu_poweroff_time; + unsigned int poweroff_shader_ticks, poweroff_gpu_ticks; + unsigned long flags; kbdev = to_kbase_device(dev); if (!kbdev) @@ -2455,9 +2503,16 @@ static ssize_t set_pm_poweroff(struct device *dev, return -EINVAL; } - kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); - kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks; - kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + stt = &kbdev->pm.backend.shader_tick_timer; + stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); + stt->configured_ticks = poweroff_shader_ticks; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (poweroff_gpu_ticks != 0) + dev_warn(kbdev->dev, "Separate GPU poweroff delay no longer supported.\n"); return count; } @@ -2477,16 +2532,22 @@ static ssize_t show_pm_poweroff(struct device *dev, struct device_attribute *attr, char * const buf) { struct kbase_device *kbdev; + struct kbasep_pm_tick_timer_state *stt; ssize_t ret; + unsigned long flags; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n", - ktime_to_ns(kbdev->pm.gpu_poweroff_time), - kbdev->pm.poweroff_shader_ticks, - kbdev->pm.poweroff_gpu_ticks); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + stt = &kbdev->pm.backend.shader_tick_timer; + ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n", + ktime_to_ns(stt->configured_interval), + stt->configured_ticks); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return ret; } @@ -2958,6 +3019,45 @@ static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { #endif /* CONFIG_DEBUG_FS */ +static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + protected_mode_hwcnt_disable_work); + unsigned long flags; + + bool do_disable; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !kbdev->protected_mode_hwcnt_desired && + !kbdev->protected_mode_hwcnt_disabled; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!do_disable) + return; + + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !kbdev->protected_mode_hwcnt_desired && + !kbdev->protected_mode_hwcnt_disabled; + + if (do_disable) { + /* Protected mode state did not change while we were doing the + * disable, so commit the work we just performed and continue + * the state machine. + */ + kbdev->protected_mode_hwcnt_disabled = true; + kbase_backend_slot_update(kbdev); + } else { + /* Protected mode state was updated while we were doing the + * disable, so we need to undo the disable we just performed. + */ + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + static int kbasep_protected_mode_init(struct kbase_device *kbdev) { #ifdef CONFIG_OF @@ -2975,6 +3075,10 @@ static int kbasep_protected_mode_init(struct kbase_device *kbdev) kbdev->protected_dev->data = kbdev; kbdev->protected_ops = &kbase_native_protected_ops; kbdev->protected_mode_support = true; + INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, + kbasep_protected_mode_hwcnt_disable_worker); + kbdev->protected_mode_hwcnt_desired = true; + kbdev->protected_mode_hwcnt_disabled = false; return 0; } @@ -3024,8 +3128,10 @@ static int kbasep_protected_mode_init(struct kbase_device *kbdev) static void kbasep_protected_mode_term(struct kbase_device *kbdev) { - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { + cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work); kfree(kbdev->protected_dev); + } } #ifdef CONFIG_MALI_NO_MALI @@ -3211,7 +3317,6 @@ static void power_control_term(struct kbase_device *kbdev) #ifdef MALI_KBASE_BUILD #ifdef CONFIG_DEBUG_FS -#if KBASE_GPU_RESET_EN #include <mali_kbase_hwaccess_jm.h> static void trigger_quirks_reload(struct kbase_device *kbdev) @@ -3247,7 +3352,6 @@ MAKE_QUIRK_ACCESSORS(tiler); MAKE_QUIRK_ACCESSORS(mmu); MAKE_QUIRK_ACCESSORS(jm); -#endif /* KBASE_GPU_RESET_EN */ /** * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read @@ -3328,7 +3432,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbase_debug_job_fault_debugfs_init(kbdev); kbasep_gpu_memory_debugfs_init(kbdev); kbase_as_fault_debugfs_init(kbdev); -#if KBASE_GPU_RESET_EN /* fops_* variables created by invocations of macro * MAKE_QUIRK_ACCESSORS() above. */ debugfs_create_file("quirks_sc", 0644, @@ -3343,7 +3446,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) debugfs_create_file("quirks_jm", 0644, kbdev->mali_debugfs_directory, kbdev, &fops_jm_quirks); -#endif /* KBASE_GPU_RESET_EN */ debugfs_create_bool("infinite_cache", 0644, debugfs_ctx_defaults_directory, @@ -3558,14 +3660,29 @@ static int kbase_platform_device_remove(struct platform_device *pdev) #endif + if (kbdev->inited_subsys & inited_backend_late) { + kbase_backend_late_term(kbdev); + kbdev->inited_subsys &= ~inited_backend_late; + } + if (kbdev->inited_subsys & inited_vinstr) { kbase_vinstr_term(kbdev->vinstr_ctx); kbdev->inited_subsys &= ~inited_vinstr; } - if (kbdev->inited_subsys & inited_backend_late) { - kbase_backend_late_term(kbdev); - kbdev->inited_subsys &= ~inited_backend_late; + if (kbdev->inited_subsys & inited_hwcnt_gpu_virt) { + kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); + kbdev->inited_subsys &= ~inited_hwcnt_gpu_virt; + } + + if (kbdev->inited_subsys & inited_hwcnt_gpu_ctx) { + kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx); + kbdev->inited_subsys &= ~inited_hwcnt_gpu_ctx; + } + + if (kbdev->inited_subsys & inited_hwcnt_gpu_iface) { + kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface); + kbdev->inited_subsys &= ~inited_hwcnt_gpu_iface; } if (kbdev->inited_subsys & inited_tlstream) { @@ -3790,20 +3907,40 @@ static int kbase_platform_device_probe(struct platform_device *pdev) } kbdev->inited_subsys |= inited_tlstream; - err = kbase_backend_late_init(kbdev); + /* Initialize the kctx list. This is used by vinstr. */ + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); + + err = kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface); if (err) { - dev_err(kbdev->dev, "Late backend initialization failed\n"); + dev_err(kbdev->dev, "GPU hwcnt backend creation failed\n"); kbase_platform_device_remove(pdev); return err; } - kbdev->inited_subsys |= inited_backend_late; + kbdev->inited_subsys |= inited_hwcnt_gpu_iface; - /* Initialize the kctx list. This is used by vinstr. */ - mutex_init(&kbdev->kctx_list_lock); - INIT_LIST_HEAD(&kbdev->kctx_list); + err = kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, + &kbdev->hwcnt_gpu_ctx); + if (err) { + dev_err(kbdev->dev, + "GPU hwcnt context initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_hwcnt_gpu_ctx; - kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); - if (!kbdev->vinstr_ctx) { + err = kbase_hwcnt_virtualizer_init( + kbdev->hwcnt_gpu_ctx, &kbdev->hwcnt_gpu_virt); + if (err) { + dev_err(kbdev->dev, + "GPU hwcnt virtualizer initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_hwcnt_gpu_virt; + + err = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); + if (err) { dev_err(kbdev->dev, "Virtual instrumentation initialization failed\n"); kbase_platform_device_remove(pdev); @@ -3811,9 +3948,18 @@ static int kbase_platform_device_probe(struct platform_device *pdev) } kbdev->inited_subsys |= inited_vinstr; + err = kbase_backend_late_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Late backend initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_backend_late; + + #ifdef CONFIG_MALI_DEVFREQ - /* Devfreq uses vinstr, so must be initialized after it. */ + /* Devfreq uses hardware counters, so must be initialized after it. */ err = kbase_devfreq_init(kbdev); if (!err) kbdev->inited_subsys |= inited_devfreq; diff --git a/mali_kbase/mali_kbase_debug_job_fault.c b/mali_kbase/mali_kbase_debug_job_fault.c index 0029fe3..88bb0d3 100644 --- a/mali_kbase/mali_kbase_debug_job_fault.c +++ b/mali_kbase/mali_kbase_debug_job_fault.c @@ -364,7 +364,7 @@ static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) * job done but we delayed it. Now we should clean cache * earlier. Then the GPU memory dump should be correct. */ - kbase_backend_cacheclean(kbdev, event->katom); + kbase_backend_cache_clean(kbdev, event->katom); } else return NULL; diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index 07ef140..a135742 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -40,6 +40,7 @@ #include <mali_kbase_instr_defs.h> #include <mali_kbase_pm.h> #include <mali_kbase_gpuprops_types.h> +#include <mali_kbase_hwcnt_backend_gpu.h> #include <protected_mode_switcher.h> @@ -143,8 +144,6 @@ #define BASE_MAX_NR_AS 16 /* mmu */ -#define MIDGARD_MMU_VA_BITS 48 - #define MIDGARD_MMU_LEVEL(x) (x) #define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0) @@ -425,8 +424,8 @@ enum kbase_atom_gpu_rb_state { * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any atoms * currently submitted to GPU and protected mode transition is * not already in progress. - * @KBASE_ATOM_ENTER_PROTECTED_VINSTR: Wait for vinstr to suspend before entry into - * protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to + * become disabled before entry into protected mode. * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation * for the coherency change. L2 shall be powered down and GPU shall * come out of fully coherent mode before entering protected mode. @@ -442,7 +441,7 @@ enum kbase_atom_enter_protected_state { * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK. */ KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, - KBASE_ATOM_ENTER_PROTECTED_VINSTR, + KBASE_ATOM_ENTER_PROTECTED_HWCNT, KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY, KBASE_ATOM_ENTER_PROTECTED_FINISHED, @@ -513,8 +512,6 @@ struct kbase_ext_res { * @jc: GPU address of the job-chain. * @softjob_data: Copy of data read from the user space buffer that @jc * points to. - * @coreref_state: state of the atom with respect to retention of shader - * cores for affinity & power management. * @fence: Stores either an input or output sync fence, depending * on soft-job type * @sync_waiter: Pointer to the sync fence waiter structure passed to the @@ -607,7 +604,6 @@ struct kbase_jd_atom { u32 device_nr; u64 jc; void *softjob_data; - enum kbase_atom_coreref_state coreref_state; #if defined(CONFIG_SYNC) struct sync_fence *fence; struct sync_fence_waiter sync_waiter; @@ -1073,15 +1069,6 @@ struct kbase_pm_device_data { /* Time in milliseconds between each dvfs sample */ u32 dvfs_period; - /* Period of GPU poweroff timer */ - ktime_t gpu_poweroff_time; - - /* Number of ticks of GPU poweroff timer before shader is powered off */ - int poweroff_shader_ticks; - - /* Number of ticks of GPU poweroff timer before GPU is powered off */ - int poweroff_gpu_ticks; - struct kbase_pm_backend_data backend; }; @@ -1254,34 +1241,19 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * configuration/properties of GPU HW device in use. * @hw_issues_mask: List of SW workarounds for HW issues * @hw_features_mask: List of available HW features. - * @shader_needed_cnt: Count for the 64 shader cores, incremented when - * shaders are requested for use and decremented later - * when they are no longer required. - * @tiler_needed_cnt: Count for the Tiler block shader cores, incremented - * when Tiler is requested for use and decremented - * later when the Tiler is no longer required. * @disjoint_event: struct for keeping track of the disjoint information, * that whether the GPU is in a disjoint state and the * number of disjoint events that have occurred on GPU. - * @l2_users_count: Refcount for tracking users of the l2 cache, e.g. - * when using hardware counter instrumentation. - * @shader_available_bitmap: Bitmap of shader cores that are currently available, - * powered up and the power policy is happy for jobs - * to be submitted to these cores. These are updated - * by the power management code. The job scheduler - * should avoid submitting new jobs to any cores - * that are not marked as available. - * @tiler_available_bitmap: Bitmap of tiler units that are currently available. - * @l2_available_bitmap: Bitmap of the currently available Level 2 caches. - * @stack_available_bitmap: Bitmap of the currently available Core stacks. - * @shader_ready_bitmap: Bitmap of shader cores that are ready (powered on) - * @shader_transitioning_bitmap: Bitmap of shader cores that are currently changing - * power state. * @nr_hw_address_spaces: Number of address spaces actually available in the * GPU, remains constant after driver initialisation. * @nr_user_address_spaces: Number of address spaces available to user contexts * @hwcnt: Structure used for instrumentation and HW counters * dumping + * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. + * @hwcnt_gpu_ctx: Context for GPU hardware counter access. + * @hwaccess_lock must be held when calling + * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. + * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. * @vinstr_ctx: vinstr context created per device * @trace_lock: Lock to serialize the access to trace buffer. * @trace_first_out: Index/offset in the trace buffer at which the first @@ -1294,8 +1266,14 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * @reset_timeout_ms: Number of milliseconds to wait for the soft stop to * complete for the GPU jobs before proceeding with the * GPU reset. - * @cacheclean_lock: Lock to serialize the clean & invalidation of GPU caches, - * between Job Manager backend & Instrumentation code. + * @cache_clean_in_progress: Set when a cache clean has been started, and + * cleared when it has finished. This prevents multiple + * cache cleans being done simultaneously. + * @cache_clean_queued: Set if a cache clean is invoked while another is in + * progress. If this happens, another cache clean needs + * to be triggered immediately after completion of the + * current one. + * @cache_clean_wait: Signalled when a cache clean has finished. * @platform_context: Platform specific private data to be accessed by * platform specific config files only. * @kctx_list: List of kbase_contexts created for the device, including @@ -1398,6 +1376,13 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * @protected_mode: set to TRUE when GPU is put into protected mode * @protected_mode_transition: set to TRUE when GPU is transitioning into or * out of protected mode. + * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be + * enabled. Counters must be disabled before transition + * into protected mode. + * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not + * enabled. + * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware + * counters, used if atomic disable is not possible. * @protected_mode_support: set to true if protected mode is supported. * @buslogger: Pointer to the structure required for interfacing * with the bus logger module to set the size of buffer @@ -1471,24 +1456,11 @@ struct kbase_device { unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; - u32 tiler_needed_cnt; - u32 shader_needed_cnt; - struct { atomic_t count; atomic_t state; } disjoint_event; - u32 l2_users_count; - - u64 shader_available_bitmap; - u64 tiler_available_bitmap; - u64 l2_available_bitmap; - u64 stack_available_bitmap; - - u64 shader_ready_bitmap; - u64 shader_transitioning_bitmap; - s8 nr_hw_address_spaces; s8 nr_user_address_spaces; @@ -1498,10 +1470,14 @@ struct kbase_device { struct kbase_context *kctx; u64 addr; + u64 addr_bytes; struct kbase_instr_backend backend; } hwcnt; + struct kbase_hwcnt_backend_interface hwcnt_gpu_iface; + struct kbase_hwcnt_context *hwcnt_gpu_ctx; + struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; struct kbase_vinstr_context *vinstr_ctx; #if KBASE_TRACE_ENABLE @@ -1513,7 +1489,9 @@ struct kbase_device { u32 reset_timeout_ms; - struct mutex cacheclean_lock; + bool cache_clean_in_progress; + bool cache_clean_queued; + wait_queue_head_t cache_clean_wait; void *platform_context; @@ -1548,27 +1526,9 @@ struct kbase_device { * the difference between last_metrics and the current values. */ struct kbasep_pm_metrics last_metrics; - - /* - * gpu_active_callback - Inform IPA that GPU is now active - * @model_data: Pointer to model data - */ - void (*gpu_active_callback)( - struct kbase_ipa_model_vinstr_data *model_data); - - /* - * gpu_idle_callback - Inform IPA that GPU is now idle - * @model_data: Pointer to model data - */ - void (*gpu_idle_callback)( - struct kbase_ipa_model_vinstr_data *model_data); - /* Model data to pass to ipa_gpu_active/idle() */ struct kbase_ipa_model_vinstr_data *model_data; - /* true if IPA is currently using vinstr */ - bool vinstr_active; - /* true if use of fallback model has been forced by the User */ bool force_fallback_model; } ipa; @@ -1642,6 +1602,12 @@ struct kbase_device { bool protected_mode_transition; + bool protected_mode_hwcnt_desired; + + bool protected_mode_hwcnt_disabled; + + struct work_struct protected_mode_hwcnt_disable_work; + bool protected_mode_support; #ifdef CONFIG_MALI_FPGA_BUS_LOGGER @@ -1824,6 +1790,9 @@ struct kbase_sub_alloc { * having the same value for GPU & CPU virtual address. * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA * zone of the GPU virtual address space. + * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA + * zone of the GPU virtual address space. Used for GPU-executable + * allocations which don't need the SAME_VA property. * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for * SAME_VA allocations to defer the reservation of memory region * (from the GPU virtual address space) from base_mem_alloc @@ -1896,6 +1865,10 @@ struct kbase_sub_alloc { * pages used for GPU allocations, done for the context, * to the memory consumed by the process. * @same_va_end: End address of the SAME_VA zone (in 4KB page units) + * @exec_va_start: Start address of the EXEC_VA zone (in 4KB page units) + * or U64_MAX if the EXEC_VA zone is uninitialized. + * @gpu_va_end: End address of the GPU va space (in 4KB page units) + * @jit_va: Indicates if a JIT_VA zone has been created. * @timeline: Object tracking the number of atoms currently in flight for * the context and thread group id of the process, i.e. @tgid. * @mem_profile_data: Buffer containing the profiling information provided by @@ -1930,9 +1903,11 @@ struct kbase_sub_alloc { * @slots_pullable: Bitmask of slots, indicating the slots for which the * context has pullable atoms in the runnable tree. * @work: Work structure used for deferred ASID assignment. - * @vinstr_cli: Pointer to the legacy userspace vinstr client, there can - * be only such client per kbase context. - * @vinstr_cli_lock: Lock used for the vinstr ioctl calls made for @vinstr_cli. + * @legacy_hwcnt_cli: Pointer to the legacy userspace hardware counters + * client, there can be only such client per kbase + * context. + * @legacy_hwcnt_lock: Lock used to prevent concurrent access to + * @legacy_hwcnt_cli. * @completed_jobs: List containing completed atoms for which base_jd_event is * to be posted. * @work_count: Number of work items, corresponding to atoms, currently @@ -2017,6 +1992,7 @@ struct kbase_context { struct mutex reg_lock; struct rb_root reg_rbtree_same; struct rb_root reg_rbtree_custom; + struct rb_root reg_rbtree_exec; unsigned long cookies; @@ -2060,6 +2036,9 @@ struct kbase_context { spinlock_t mm_update_lock; struct mm_struct __rcu *process_mm; u64 same_va_end; + u64 exec_va_start; + u64 gpu_va_end; + bool jit_va; #ifdef CONFIG_DEBUG_FS char *mem_profile_data; @@ -2087,8 +2066,8 @@ struct kbase_context { struct work_struct work; - struct kbase_vinstr_client *vinstr_cli; - struct mutex vinstr_cli_lock; + struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli; + struct mutex legacy_hwcnt_lock; struct list_head completed_jobs; atomic_t work_count; diff --git a/mali_kbase/mali_kbase_device.c b/mali_kbase/mali_kbase_device.c index 44d16a7..530bb45 100644 --- a/mali_kbase/mali_kbase_device.c +++ b/mali_kbase/mali_kbase_device.c @@ -222,7 +222,7 @@ int kbase_device_init(struct kbase_device * const kbdev) if (err) goto term_as; - mutex_init(&kbdev->cacheclean_lock); + init_waitqueue_head(&kbdev->cache_clean_wait); kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); diff --git a/mali_kbase/mali_kbase_gator_api.c b/mali_kbase/mali_kbase_gator_api.c index 7077c3a..1719edf 100644 --- a/mali_kbase/mali_kbase_gator_api.c +++ b/mali_kbase/mali_kbase_gator_api.c @@ -25,6 +25,9 @@ #include "mali_kbase_mem_linux.h" #include "mali_kbase_gator_api.h" #include "mali_kbase_gator_hwcnt_names.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_virtualizer.h" #define MALI_MAX_CORES_PER_GROUP 4 #define MALI_MAX_NUM_BLOCKS_PER_GROUP 8 @@ -33,8 +36,9 @@ struct kbase_gator_hwcnt_handles { struct kbase_device *kbdev; - struct kbase_vinstr_client *vinstr_cli; - void *vinstr_buffer; + struct kbase_hwcnt_virtualizer_client *hvcli; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer dump_buf; struct work_struct dump_work; int dump_complete; spinlock_t dump_lock; @@ -173,8 +177,10 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names); struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) { + int errcode; struct kbase_gator_hwcnt_handles *hand; - struct kbase_ioctl_hwcnt_reader_setup setup; + const struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_physical_enable_map phys_map; uint32_t dump_size = 0, i = 0; if (!in_out_info) @@ -192,11 +198,20 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn if (!hand->kbdev) goto free_hand; - dump_size = kbase_vinstr_dump_size(hand->kbdev); - hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL); - if (!hand->vinstr_buffer) + metadata = kbase_hwcnt_virtualizer_metadata( + hand->kbdev->hwcnt_gpu_virt); + if (!metadata) goto release_device; - in_out_info->kernel_dump_buffer = hand->vinstr_buffer; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &hand->enable_map); + if (errcode) + goto release_device; + + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hand->dump_buf); + if (errcode) + goto free_enable_map; + + in_out_info->kernel_dump_buffer = hand->dump_buf.dump_buf; in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; @@ -213,7 +228,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn in_out_info->nr_core_groups, GFP_KERNEL); if (!in_out_info->hwc_layout) - goto free_vinstr_buffer; + goto free_dump_buf; dump_size = in_out_info->nr_core_groups * MALI_MAX_NUM_BLOCKS_PER_GROUP * @@ -256,7 +271,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); if (!in_out_info->hwc_layout) - goto free_vinstr_buffer; + goto free_dump_buf; dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; @@ -275,17 +290,23 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn } } + /* Calculated dump size must be the same as real dump size */ + if (WARN_ON(dump_size != metadata->dump_buf_bytes)) + goto free_layout; + in_out_info->nr_hwc_blocks = i; in_out_info->size = dump_size; - setup.jm_bm = in_out_info->bitmask[0]; - setup.tiler_bm = in_out_info->bitmask[1]; - setup.shader_bm = in_out_info->bitmask[2]; - setup.mmu_l2_bm = in_out_info->bitmask[3]; - hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx, - &setup, hand->vinstr_buffer); - if (!hand->vinstr_cli) { - dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core"); + phys_map.jm_bm = in_out_info->bitmask[JM_BLOCK]; + phys_map.tiler_bm = in_out_info->bitmask[TILER_BLOCK]; + phys_map.shader_bm = in_out_info->bitmask[SHADER_BLOCK]; + phys_map.mmu_l2_bm = in_out_info->bitmask[MMU_L2_BLOCK]; + kbase_hwcnt_gpu_enable_map_from_physical(&hand->enable_map, &phys_map); + errcode = kbase_hwcnt_virtualizer_client_create( + hand->kbdev->hwcnt_gpu_virt, &hand->enable_map, &hand->hvcli); + if (errcode) { + dev_err(hand->kbdev->dev, + "Failed to register gator with hwcnt virtualizer core"); goto free_layout; } @@ -293,13 +314,12 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn free_layout: kfree(in_out_info->hwc_layout); - -free_vinstr_buffer: - kfree(hand->vinstr_buffer); - +free_dump_buf: + kbase_hwcnt_dump_buffer_free(&hand->dump_buf); +free_enable_map: + kbase_hwcnt_enable_map_free(&hand->enable_map); release_device: kbase_release_device(hand->kbdev); - free_hand: kfree(hand); return NULL; @@ -313,8 +333,9 @@ void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct k if (opaque_handles) { cancel_work_sync(&opaque_handles->dump_work); - kbase_vinstr_detach_client(opaque_handles->vinstr_cli); - kfree(opaque_handles->vinstr_buffer); + kbase_hwcnt_virtualizer_client_destroy(opaque_handles->hvcli); + kbase_hwcnt_dump_buffer_free(&opaque_handles->dump_buf); + kbase_hwcnt_enable_map_free(&opaque_handles->enable_map); kbase_release_device(opaque_handles->kbdev); kfree(opaque_handles); } @@ -323,11 +344,21 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term); static void dump_worker(struct work_struct *work) { + int errcode; + u64 ts_start_ns; + u64 ts_end_ns; struct kbase_gator_hwcnt_handles *hand; hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work); - if (!kbase_vinstr_hwc_dump(hand->vinstr_cli, - BASE_HWCNT_READER_EVENT_MANUAL)) { + errcode = kbase_hwcnt_virtualizer_client_dump( + hand->hvcli, &ts_start_ns, &ts_end_ns, &hand->dump_buf); + if (!errcode) { + /* Patch the header to hide other client's counter choices */ + kbase_hwcnt_gpu_patch_dump_headers( + &hand->dump_buf, &hand->enable_map); + /* Zero all non-enabled counters (currently undefined values) */ + kbase_hwcnt_dump_buffer_zero_non_enabled( + &hand->dump_buf, &hand->enable_map); spin_lock_bh(&hand->dump_lock); hand->dump_complete = 1; spin_unlock_bh(&hand->dump_lock); diff --git a/mali_kbase/mali_kbase_gpu_id.h b/mali_kbase/mali_kbase_gpu_id.h index 5f84ba9..d432f8e 100644 --- a/mali_kbase/mali_kbase_gpu_id.h +++ b/mali_kbase/mali_kbase_gpu_id.h @@ -114,8 +114,8 @@ #define GPU_ID2_PRODUCT_TEGX GPU_ID2_MODEL_MAKE(8, 3) #define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0) #define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) +#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) #define GPU_ID2_PRODUCT_TULX GPU_ID2_MODEL_MAKE(10, 0) -#define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1) #define GPU_ID2_PRODUCT_TIDX GPU_ID2_MODEL_MAKE(10, 3) #define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c index fc6b644..450926c 100644 --- a/mali_kbase/mali_kbase_hw.c +++ b/mali_kbase/mali_kbase_hw.c @@ -74,12 +74,12 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TNAX: features = base_hw_features_tNAx; break; + case GPU_ID2_PRODUCT_TBEX: + features = base_hw_features_tBEx; + break; case GPU_ID2_PRODUCT_TULX: features = base_hw_features_tULx; break; - case GPU_ID2_PRODUCT_TDUX: - features = base_hw_features_tDUx; - break; case GPU_ID2_PRODUCT_TBOX: features = base_hw_features_tBOx; break; @@ -213,12 +213,12 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0}, {U32_MAX, NULL} } }, - {GPU_ID2_PRODUCT_TULX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tULx_r0p0}, + {GPU_ID2_PRODUCT_TBEX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0}, {U32_MAX, NULL} } }, - {GPU_ID2_PRODUCT_TDUX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0}, + {GPU_ID2_PRODUCT_TULX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tULx_r0p0}, {U32_MAX, NULL} } }, {GPU_ID2_PRODUCT_TBOX, @@ -250,10 +250,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( if (product != NULL) { /* Found a matching product. */ const u32 version = gpu_id & GPU_ID2_VERSION; -#if !MALI_CUSTOMER_RELEASE u32 fallback_version = 0; const enum base_hw_issue *fallback_issues = NULL; -#endif size_t v; /* Stop when we reach the end of the map. */ @@ -265,25 +263,34 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( break; } -#if !MALI_CUSTOMER_RELEASE /* Check whether this is a candidate for most recent known version not later than the actual version. */ if ((version > product->map[v].version) && (product->map[v].version >= fallback_version)) { - fallback_version = product->map[v].version; - fallback_issues = product->map[v].issues; - } +#if MALI_CUSTOMER_RELEASE + /* Match on version's major and minor fields */ + if (((version ^ product->map[v].version) >> + GPU_ID2_VERSION_MINOR_SHIFT) == 0) #endif + { + fallback_version = product->map[v].version; + fallback_issues = product->map[v].issues; + } + } } -#if !MALI_CUSTOMER_RELEASE if ((issues == NULL) && (fallback_issues != NULL)) { /* Fall back to the issue set of the most recent known version not later than the actual version. */ issues = fallback_issues; +#if MALI_CUSTOMER_RELEASE + dev_warn(kbdev->dev, + "GPU hardware issue table may need updating:\n" +#else dev_info(kbdev->dev, +#endif "r%dp%d status %d is unknown; treating as r%dp%d status %d", (gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT, @@ -305,7 +312,6 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( kbase_gpuprops_update_core_props_gpu_id( &kbdev->gpu_props.props); } -#endif } return issues; } @@ -467,12 +473,12 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TNAX: issues = base_hw_issues_model_tNAx; break; + case GPU_ID2_PRODUCT_TBEX: + issues = base_hw_issues_model_tBEx; + break; case GPU_ID2_PRODUCT_TULX: issues = base_hw_issues_model_tULx; break; - case GPU_ID2_PRODUCT_TDUX: - issues = base_hw_issues_model_tDUx; - break; case GPU_ID2_PRODUCT_TBOX: issues = base_hw_issues_model_tBOx; break; diff --git a/mali_kbase/mali_kbase_hwaccess_instr.h b/mali_kbase/mali_kbase_hwaccess_instr.h index 0c5ceff..d5b9099 100644 --- a/mali_kbase/mali_kbase_hwaccess_instr.h +++ b/mali_kbase/mali_kbase_hwaccess_instr.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2017-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,7 +32,28 @@ #include <mali_kbase_instr_defs.h> /** - * kbase_instr_hwcnt_enable_internal - Enable HW counters collection + * struct kbase_instr_hwcnt_enable - Enable hardware counter collection. + * @dump_buffer: GPU address to write counters to. + * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer. + * @jm_bm: counters selection bitmask (JM). + * @shader_bm: counters selection bitmask (Shader). + * @tiler_bm: counters selection bitmask (Tiler). + * @mmu_l2_bm: counters selection bitmask (MMU_L2). + * @use_secondary: use secondary performance counters set for applicable + * counter blocks. + */ +struct kbase_instr_hwcnt_enable { + u64 dump_buffer; + u64 dump_buffer_bytes; + u32 jm_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; + bool use_secondary; +}; + +/** + * kbase_instr_hwcnt_enable_internal() - Enable HW counters collection * @kbdev: Kbase device * @kctx: Kbase context * @enable: HW counter setup parameters @@ -43,10 +64,10 @@ */ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbase_ioctl_hwcnt_enable *enable); + struct kbase_instr_hwcnt_enable *enable); /** - * kbase_instr_hwcnt_disable_internal - Disable HW counters collection + * kbase_instr_hwcnt_disable_internal() - Disable HW counters collection * @kctx: Kbase context * * Context: might sleep, waiting for an ongoing dump to complete diff --git a/mali_kbase/mali_kbase_hwaccess_jm.h b/mali_kbase/mali_kbase_hwaccess_jm.h index 580ac98..e2798eb 100644 --- a/mali_kbase/mali_kbase_hwaccess_jm.h +++ b/mali_kbase/mali_kbase_hwaccess_jm.h @@ -128,7 +128,7 @@ void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, struct kbase_context *kctx); /** - * kbase_backend_cacheclean - Perform a cache clean if the given atom requires + * kbase_backend_cache_clean - Perform a cache clean if the given atom requires * one * @kbdev: Device pointer * @katom: Pointer to the failed atom @@ -136,7 +136,7 @@ void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, * On some GPUs, the GPU cache must be cleaned following a failed atom. This * function performs a clean if it is required by @katom. */ -void kbase_backend_cacheclean(struct kbase_device *kbdev, +void kbase_backend_cache_clean(struct kbase_device *kbdev, struct kbase_jd_atom *katom); @@ -160,14 +160,12 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, * any scheduling has taken place. * @kbdev: Device pointer * @core_req: Core requirements of atom - * @coreref_state: Coreref state of atom * * This function should only be called from kbase_jd_done_worker() or * js_return_worker(). */ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, - base_jd_core_req core_req, - enum kbase_atom_coreref_state coreref_state); + base_jd_core_req core_req); /** * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU @@ -277,7 +275,6 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx); */ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); -#if KBASE_GPU_RESET_EN /** * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. * @kbdev: Device pointer @@ -345,8 +342,11 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev); * of the GPU as part of normal processing (e.g. exiting protected mode) where * the driver will have ensured the scheduler has been idled and all other * users of the GPU (e.g. instrumentation) have been suspended. + * + * Return: 0 if the reset was started successfully + * -EAGAIN if another reset is currently in progress */ -void kbase_reset_gpu_silent(struct kbase_device *kbdev); +int kbase_reset_gpu_silent(struct kbase_device *kbdev); /** * kbase_reset_gpu_active - Reports if the GPU is being reset @@ -355,7 +355,6 @@ void kbase_reset_gpu_silent(struct kbase_device *kbdev); * Return: True if the GPU is in the process of being reset. */ bool kbase_reset_gpu_active(struct kbase_device *kbdev); -#endif /** * kbase_job_slot_hardstop - Hard-stop the specified job slot diff --git a/mali_kbase/mali_kbase_hwaccess_pm.h b/mali_kbase/mali_kbase_hwaccess_pm.h index 4598d80..5bb3887 100644 --- a/mali_kbase/mali_kbase_hwaccess_pm.h +++ b/mali_kbase/mali_kbase_hwaccess_pm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -44,13 +44,23 @@ struct kbase_device; * * Must be called before any other power management function * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if the power management framework was successfully initialized. + */ +int kbase_hwaccess_pm_early_init(struct kbase_device *kbdev); + +/** + * Initialize the power management framework. + * + * Must be called before any other power management function (except + * @ref kbase_hwaccess_pm_early_init) + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * @return 0 if the power management framework was successfully - * initialized. + * Return: 0 if the power management framework was successfully initialized. */ -int kbase_hwaccess_pm_init(struct kbase_device *kbdev); +int kbase_hwaccess_pm_late_init(struct kbase_device *kbdev); /** * Terminate the power management framework. @@ -58,10 +68,19 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev); * No power management functions may be called after this (except * @ref kbase_pm_init) * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_hwaccess_pm_early_term(struct kbase_device *kbdev); + +/** + * Terminate the power management framework. + * + * No power management functions may be called after this (except + * @ref kbase_hwaccess_pm_early_term or @ref kbase_hwaccess_pm_late_init) + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ -void kbase_hwaccess_pm_term(struct kbase_device *kbdev); +void kbase_hwaccess_pm_late_term(struct kbase_device *kbdev); /** * kbase_hwaccess_pm_powerup - Power up the GPU. diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h index 9b86b51..f7539f5 100644 --- a/mali_kbase/mali_kbase_hwaccess_time.h +++ b/mali_kbase/mali_kbase_hwaccess_time.h @@ -51,7 +51,11 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, * * This function is only in use for BASE_HW_ISSUE_6367 */ -#ifndef CONFIG_MALI_NO_MALI +#ifdef CONFIG_MALI_NO_MALI +static inline void kbase_wait_write_flush(struct kbase_device *kbdev) +{ +} +#else void kbase_wait_write_flush(struct kbase_device *kbdev); #endif diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/mali_kbase_hwcnt.c new file mode 100644 index 0000000..efbac6f --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt.c @@ -0,0 +1,796 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Implementation of hardware counter context and accumulator APIs. + */ + +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_accumulator.h" +#include "mali_kbase_hwcnt_backend.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" +#include "mali_kbase_linux.h" + +#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/slab.h> + +/** + * enum kbase_hwcnt_accum_state - Hardware counter accumulator states. + * @ACCUM_STATE_ERROR: Error state, where all accumulator operations fail. + * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled. + * @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are + * any enabled counters. + */ +enum kbase_hwcnt_accum_state { + ACCUM_STATE_ERROR, + ACCUM_STATE_DISABLED, + ACCUM_STATE_ENABLED +}; + +/** + * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure. + * @backend: Pointer to created counter backend. + * @state: The current state of the accumulator. + * - State transition from disabled->enabled or + * disabled->error requires state_lock. + * - State transition from enabled->disabled or + * enabled->error requires both accum_lock and + * state_lock. + * - Error state persists until next disable. + * @enable_map: The current set of enabled counters. + * - Must only be modified while holding both + * accum_lock and state_lock. + * - Can be read while holding either lock. + * - Must stay in sync with enable_map_any_enabled. + * @enable_map_any_enabled: True if any counters in the map are enabled, else + * false. If true, and state is ACCUM_STATE_ENABLED, + * then the counter backend will be enabled. + * - Must only be modified while holding both + * accum_lock and state_lock. + * - Can be read while holding either lock. + * - Must stay in sync with enable_map. + * @scratch_map: Scratch enable map, used as temporary enable map + * storage during dumps. + * - Must only be read or modified while holding + * accum_lock. + * @accum_buf: Accumulation buffer, where dumps will be accumulated + * into on transition to a disable state. + * - Must only be read or modified while holding + * accum_lock. + * @accumulated: True if the accumulation buffer has been accumulated + * into and not subsequently read from yet, else false. + * - Must only be read or modified while holding + * accum_lock. + * @ts_last_dump_ns: Timestamp (ns) of the end time of the most recent + * dump that was requested by the user. + * - Must only be read or modified while holding + * accum_lock. + */ +struct kbase_hwcnt_accumulator { + struct kbase_hwcnt_backend *backend; + enum kbase_hwcnt_accum_state state; + struct kbase_hwcnt_enable_map enable_map; + bool enable_map_any_enabled; + struct kbase_hwcnt_enable_map scratch_map; + struct kbase_hwcnt_dump_buffer accum_buf; + bool accumulated; + u64 ts_last_dump_ns; +}; + +/** + * struct kbase_hwcnt_context - Hardware counter context structure. + * @iface: Pointer to hardware counter backend interface. + * @state_lock: Spinlock protecting state. + * @disable_count: Disable count of the context. Initialised to 1. + * Decremented when the accumulator is acquired, and incremented + * on release. Incremented on calls to + * kbase_hwcnt_context_disable[_atomic], and decremented on + * calls to kbase_hwcnt_context_enable. + * - Must only be read or modified while holding state_lock. + * @accum_lock: Mutex protecting accumulator. + * @accum_inited: Flag to prevent concurrent accumulator initialisation and/or + * termination. Set to true before accumulator initialisation, + * and false after accumulator termination. + * - Must only be modified while holding both accum_lock and + * state_lock. + * - Can be read while holding either lock. + * @accum: Hardware counter accumulator structure. + */ +struct kbase_hwcnt_context { + const struct kbase_hwcnt_backend_interface *iface; + spinlock_t state_lock; + size_t disable_count; + struct mutex accum_lock; + bool accum_inited; + struct kbase_hwcnt_accumulator accum; +}; + +int kbase_hwcnt_context_init( + const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx) +{ + struct kbase_hwcnt_context *hctx = NULL; + + if (!iface || !out_hctx) + return -EINVAL; + + hctx = kzalloc(sizeof(*hctx), GFP_KERNEL); + if (!hctx) + return -ENOMEM; + + hctx->iface = iface; + spin_lock_init(&hctx->state_lock); + hctx->disable_count = 1; + mutex_init(&hctx->accum_lock); + hctx->accum_inited = false; + + *out_hctx = hctx; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_init); + +void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) +{ + if (!hctx) + return; + + /* Make sure we didn't leak the accumulator */ + WARN_ON(hctx->accum_inited); + kfree(hctx); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_term); + +/** + * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context. + * @hctx: Non-NULL pointer to hardware counter context. + */ +static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx) +{ + WARN_ON(!hctx); + WARN_ON(!hctx->accum_inited); + + kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map); + kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf); + kbase_hwcnt_enable_map_free(&hctx->accum.enable_map); + hctx->iface->term(hctx->accum.backend); + memset(&hctx->accum, 0, sizeof(hctx->accum)); +} + +/** + * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context. + * @hctx: Non-NULL pointer to hardware counter context. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx) +{ + int errcode; + + WARN_ON(!hctx); + WARN_ON(!hctx->accum_inited); + + errcode = hctx->iface->init( + hctx->iface->info, &hctx->accum.backend); + if (errcode) + goto error; + + hctx->accum.state = ACCUM_STATE_ERROR; + + errcode = kbase_hwcnt_enable_map_alloc( + hctx->iface->metadata, &hctx->accum.enable_map); + if (errcode) + goto error; + + hctx->accum.enable_map_any_enabled = false; + + errcode = kbase_hwcnt_dump_buffer_alloc( + hctx->iface->metadata, &hctx->accum.accum_buf); + if (errcode) + goto error; + + errcode = kbase_hwcnt_enable_map_alloc( + hctx->iface->metadata, &hctx->accum.scratch_map); + if (errcode) + goto error; + + hctx->accum.accumulated = false; + + hctx->accum.ts_last_dump_ns = + hctx->iface->timestamp_ns(hctx->accum.backend); + + return 0; + +error: + kbasep_hwcnt_accumulator_term(hctx); + return errcode; +} + +/** + * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the + * disabled state, from the enabled or + * error states. + * @hctx: Non-NULL pointer to hardware counter context. + * @accumulate: True if we should accumulate before disabling, else false. + */ +static void kbasep_hwcnt_accumulator_disable( + struct kbase_hwcnt_context *hctx, bool accumulate) +{ + int errcode = 0; + bool backend_enabled = false; + struct kbase_hwcnt_accumulator *accum; + unsigned long flags; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->accum_lock); + WARN_ON(!hctx->accum_inited); + + accum = &hctx->accum; + + spin_lock_irqsave(&hctx->state_lock, flags); + + WARN_ON(hctx->disable_count != 0); + WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED); + + if ((hctx->accum.state == ACCUM_STATE_ENABLED) && + (accum->enable_map_any_enabled)) + backend_enabled = true; + + if (!backend_enabled) + hctx->accum.state = ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + /* Early out if the backend is not already enabled */ + if (!backend_enabled) + return; + + if (!accumulate) + goto disable; + + /* Try and accumulate before disabling */ + errcode = hctx->iface->dump_request(accum->backend); + if (errcode) + goto disable; + + errcode = hctx->iface->dump_wait(accum->backend); + if (errcode) + goto disable; + + errcode = hctx->iface->dump_get(accum->backend, + &accum->accum_buf, &accum->enable_map, accum->accumulated); + if (errcode) + goto disable; + + accum->accumulated = true; + +disable: + hctx->iface->dump_disable(accum->backend); + + /* Regardless of any errors during the accumulate, put the accumulator + * in the disabled state. + */ + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum.state = ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); +} + +/** + * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the + * enabled state, from the disabled state. + * @hctx: Non-NULL pointer to hardware counter context. + */ +static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx) +{ + int errcode = 0; + struct kbase_hwcnt_accumulator *accum; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->state_lock); + WARN_ON(!hctx->accum_inited); + WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED); + + accum = &hctx->accum; + + /* The backend only needs enabling if any counters are enabled */ + if (accum->enable_map_any_enabled) + errcode = hctx->iface->dump_enable_nolock( + accum->backend, &accum->enable_map); + + if (!errcode) + accum->state = ACCUM_STATE_ENABLED; + else + accum->state = ACCUM_STATE_ERROR; +} + +/** + * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date + * values of enabled counters possible, and + * optionally update the set of enabled + * counters. + * @hctx : Non-NULL pointer to the hardware counter context + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * @new_map: Pointer to the new counter enable map. If non-NULL, must have + * the same metadata as the accumulator. If NULL, the set of + * enabled counters will be unchanged. + */ +static int kbasep_hwcnt_accumulator_dump( + struct kbase_hwcnt_context *hctx, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf, + const struct kbase_hwcnt_enable_map *new_map) +{ + int errcode = 0; + unsigned long flags; + enum kbase_hwcnt_accum_state state; + bool dump_requested = false; + bool dump_written = false; + bool cur_map_any_enabled; + struct kbase_hwcnt_enable_map *cur_map; + bool new_map_any_enabled = false; + u64 dump_time_ns; + struct kbase_hwcnt_accumulator *accum; + + WARN_ON(!hctx); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hctx->iface->metadata)); + WARN_ON(new_map && (new_map->metadata != hctx->iface->metadata)); + WARN_ON(!hctx->accum_inited); + lockdep_assert_held(&hctx->accum_lock); + + accum = &hctx->accum; + cur_map = &accum->scratch_map; + + /* Save out info about the current enable map */ + cur_map_any_enabled = accum->enable_map_any_enabled; + kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map); + + if (new_map) + new_map_any_enabled = + kbase_hwcnt_enable_map_any_enabled(new_map); + + /* + * We're holding accum_lock, so the accumulator state might transition + * from disabled to enabled during this function (as enabling is lock + * free), but it will never disable (as disabling needs to hold the + * accum_lock), nor will it ever transition from enabled to error (as + * an enable while we're already enabled is impossible). + * + * If we're already disabled, we'll only look at the accumulation buffer + * rather than do a real dump, so a concurrent enable does not affect + * us. + * + * If a concurrent enable fails, we might transition to the error + * state, but again, as we're only looking at the accumulation buffer, + * it's not an issue. + */ + spin_lock_irqsave(&hctx->state_lock, flags); + + state = accum->state; + + /* + * Update the new map now, such that if an enable occurs during this + * dump then that enable will set the new map. If we're already enabled, + * then we'll do it ourselves after the dump. + */ + if (new_map) { + kbase_hwcnt_enable_map_copy( + &accum->enable_map, new_map); + accum->enable_map_any_enabled = new_map_any_enabled; + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + /* Error state, so early out. No need to roll back any map updates */ + if (state == ACCUM_STATE_ERROR) + return -EIO; + + /* Initiate the dump if the backend is enabled. */ + if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { + /* Disable pre-emption, to make the timestamp as accurate as + * possible. + */ + preempt_disable(); + { + dump_time_ns = hctx->iface->timestamp_ns( + accum->backend); + if (dump_buf) { + errcode = hctx->iface->dump_request( + accum->backend); + dump_requested = true; + } else { + errcode = hctx->iface->dump_clear( + accum->backend); + } + } + preempt_enable(); + if (errcode) + goto error; + } else { + dump_time_ns = hctx->iface->timestamp_ns(accum->backend); + } + + /* Copy any accumulation into the dest buffer */ + if (accum->accumulated && dump_buf) { + kbase_hwcnt_dump_buffer_copy( + dump_buf, &accum->accum_buf, cur_map); + dump_written = true; + } + + /* Wait for any requested dumps to complete */ + if (dump_requested) { + WARN_ON(state != ACCUM_STATE_ENABLED); + errcode = hctx->iface->dump_wait(accum->backend); + if (errcode) + goto error; + } + + /* If we're enabled and there's a new enable map, change the enabled set + * as soon after the dump has completed as possible. + */ + if ((state == ACCUM_STATE_ENABLED) && new_map) { + /* Backend is only enabled if there were any enabled counters */ + if (cur_map_any_enabled) + hctx->iface->dump_disable(accum->backend); + + /* (Re-)enable the backend if the new map has enabled counters. + * No need to acquire the spinlock, as concurrent enable while + * we're already enabled and holding accum_lock is impossible. + */ + if (new_map_any_enabled) { + errcode = hctx->iface->dump_enable( + accum->backend, new_map); + if (errcode) + goto error; + } + } + + /* Copy, accumulate, or zero into the dest buffer to finish */ + if (dump_buf) { + /* If we dumped, copy or accumulate it into the destination */ + if (dump_requested) { + WARN_ON(state != ACCUM_STATE_ENABLED); + errcode = hctx->iface->dump_get( + accum->backend, + dump_buf, + cur_map, + dump_written); + if (errcode) + goto error; + dump_written = true; + } + + /* If we've not written anything into the dump buffer so far, it + * means there was nothing to write. Zero any enabled counters. + */ + if (!dump_written) + kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map); + } + + /* Write out timestamps */ + *ts_start_ns = accum->ts_last_dump_ns; + *ts_end_ns = dump_time_ns; + + accum->accumulated = false; + accum->ts_last_dump_ns = dump_time_ns; + + return 0; +error: + /* An error was only physically possible if the backend was enabled */ + WARN_ON(state != ACCUM_STATE_ENABLED); + + /* Disable the backend, and transition to the error state */ + hctx->iface->dump_disable(accum->backend); + spin_lock_irqsave(&hctx->state_lock, flags); + + accum->state = ACCUM_STATE_ERROR; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + return errcode; +} + +/** + * kbasep_hwcnt_context_disable() - Increment the disable count of the context. + * @hctx: Non-NULL pointer to hardware counter context. + * @accumulate: True if we should accumulate before disabling, else false. + */ +static void kbasep_hwcnt_context_disable( + struct kbase_hwcnt_context *hctx, bool accumulate) +{ + unsigned long flags; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->accum_lock); + + if (!kbase_hwcnt_context_disable_atomic(hctx)) { + kbasep_hwcnt_accumulator_disable(hctx, accumulate); + + spin_lock_irqsave(&hctx->state_lock, flags); + + /* Atomic disable failed and we're holding the mutex, so current + * disable count must be 0. + */ + WARN_ON(hctx->disable_count != 0); + hctx->disable_count++; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + } +} + +int kbase_hwcnt_accumulator_acquire( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum) +{ + int errcode = 0; + unsigned long flags; + + if (!hctx || !accum) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!hctx->accum_inited) + /* Set accum initing now to prevent concurrent init */ + hctx->accum_inited = true; + else + /* Already have an accum, or already being inited */ + errcode = -EBUSY; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); + + if (errcode) + return errcode; + + errcode = kbasep_hwcnt_accumulator_init(hctx); + + if (errcode) { + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum_inited = false; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); + + return errcode; + } + + spin_lock_irqsave(&hctx->state_lock, flags); + + WARN_ON(hctx->disable_count == 0); + WARN_ON(hctx->accum.enable_map_any_enabled); + + /* Decrement the disable count to allow the accumulator to be accessible + * now that it's fully constructed. + */ + hctx->disable_count--; + + /* + * Make sure the accumulator is initialised to the correct state. + * Regardless of initial state, counters don't need to be enabled via + * the backend, as the initial enable map has no enabled counters. + */ + hctx->accum.state = (hctx->disable_count == 0) ? + ACCUM_STATE_ENABLED : + ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + *accum = &hctx->accum; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_acquire); + +void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) +{ + unsigned long flags; + struct kbase_hwcnt_context *hctx; + + if (!accum) + return; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + mutex_lock(&hctx->accum_lock); + + /* Double release is a programming error */ + WARN_ON(!hctx->accum_inited); + + /* Disable the context to ensure the accumulator is inaccesible while + * we're destroying it. This performs the corresponding disable count + * increment to the decrement done during acquisition. + */ + kbasep_hwcnt_context_disable(hctx, false); + + mutex_unlock(&hctx->accum_lock); + + kbasep_hwcnt_accumulator_term(hctx); + + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum_inited = false; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_release); + +void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) +{ + if (WARN_ON(!hctx)) + return; + + /* Try and atomically disable first, so we can avoid locking the mutex + * if we don't need to. + */ + if (kbase_hwcnt_context_disable_atomic(hctx)) + return; + + mutex_lock(&hctx->accum_lock); + + kbasep_hwcnt_context_disable(hctx, true); + + mutex_unlock(&hctx->accum_lock); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable); + +bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) +{ + unsigned long flags; + bool atomic_disabled = false; + + if (WARN_ON(!hctx)) + return false; + + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!WARN_ON(hctx->disable_count == SIZE_MAX)) { + /* + * If disable count is non-zero or no counters are enabled, we + * can just bump the disable count. + * + * Otherwise, we can't disable in an atomic context. + */ + if (hctx->disable_count != 0) { + hctx->disable_count++; + atomic_disabled = true; + } else { + WARN_ON(!hctx->accum_inited); + if (!hctx->accum.enable_map_any_enabled) { + hctx->disable_count++; + hctx->accum.state = ACCUM_STATE_DISABLED; + atomic_disabled = true; + } + } + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + return atomic_disabled; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable_atomic); + +void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) +{ + unsigned long flags; + + if (WARN_ON(!hctx)) + return; + + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!WARN_ON(hctx->disable_count == 0)) { + if (hctx->disable_count == 1) + kbasep_hwcnt_accumulator_enable(hctx); + + hctx->disable_count--; + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_enable); + +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( + struct kbase_hwcnt_context *hctx) +{ + if (!hctx) + return NULL; + + return hctx->iface->metadata; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_metadata); + +int kbase_hwcnt_accumulator_set_counters( + struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_context *hctx; + + if (!accum || !new_map || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + if ((new_map->metadata != hctx->iface->metadata) || + (dump_buf && (dump_buf->metadata != hctx->iface->metadata))) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + + errcode = kbasep_hwcnt_accumulator_dump( + hctx, ts_start_ns, ts_end_ns, dump_buf, new_map); + + mutex_unlock(&hctx->accum_lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_set_counters); + +int kbase_hwcnt_accumulator_dump( + struct kbase_hwcnt_accumulator *accum, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_context *hctx; + + if (!accum || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + if (dump_buf && (dump_buf->metadata != hctx->iface->metadata)) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + + errcode = kbasep_hwcnt_accumulator_dump( + hctx, ts_start_ns, ts_end_ns, dump_buf, NULL); + + mutex_unlock(&hctx->accum_lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump); diff --git a/mali_kbase/mali_kbase_hwcnt_accumulator.h b/mali_kbase/mali_kbase_hwcnt_accumulator.h new file mode 100644 index 0000000..fc45743 --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_accumulator.h @@ -0,0 +1,137 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter accumulator API. + */ + +#ifndef _KBASE_HWCNT_ACCUMULATOR_H_ +#define _KBASE_HWCNT_ACCUMULATOR_H_ + +#include <linux/types.h> + +struct kbase_hwcnt_context; +struct kbase_hwcnt_accumulator; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator + * for a hardware counter context. + * @hctx: Non-NULL pointer to a hardware counter context. + * @accum: Non-NULL pointer to where the pointer to the created accumulator + * will be stored on success. + * + * There can exist at most one instance of the hardware counter accumulator per + * context at a time. + * + * If multiple clients need access to the hardware counters at the same time, + * then an abstraction built on top of the single instance to the hardware + * counter accumulator is required. + * + * No counters will be enabled with the returned accumulator. A subsequent call + * to kbase_hwcnt_accumulator_set_counters must be used to turn them on. + * + * There are four components to a hardware counter dump: + * - A set of enabled counters + * - A start time + * - An end time + * - A dump buffer containing the accumulated counter values for all enabled + * counters between the start and end times. + * + * For each dump, it is guaranteed that all enabled counters were active for the + * entirety of the period between the start and end times. + * + * It is also guaranteed that the start time of dump "n" is always equal to the + * end time of dump "n - 1". + * + * For all dumps, the values of any counters that were not enabled is undefined. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_acquire( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum); + +/** + * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * + * The accumulator must be released before the context the accumulator was + * created from is terminated. + */ +void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum); + +/** + * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently + * enabled counters, and enable a new + * set of counters that will be used + * for subsequent dumps. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * @new_map: Non-NULL pointer to the new counter enable map. Must have the + * same metadata as the accumulator. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * If this function fails for some unexpected reason (i.e. anything other than + * invalid args), then the accumulator will be put into the error state until + * the parent context is next disabled. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_set_counters( + struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled + * counters. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * If this function fails for some unexpected reason (i.e. anything other than + * invalid args), then the accumulator will be put into the error state until + * the parent context is next disabled. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_dump( + struct kbase_hwcnt_accumulator *accum, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_backend.h b/mali_kbase/mali_kbase_hwcnt_backend.h new file mode 100644 index 0000000..b7aa0e1 --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_backend.h @@ -0,0 +1,217 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Virtual interface for hardware counter backends. + */ + +#ifndef _KBASE_HWCNT_BACKEND_H_ +#define _KBASE_HWCNT_BACKEND_H_ + +#include <linux/types.h> + +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/* + * struct kbase_hwcnt_backend_info - Opaque pointer to information used to + * create an instance of a hardware counter + * backend. + */ +struct kbase_hwcnt_backend_info; + +/* + * struct kbase_hwcnt_backend_info - Opaque pointer to a hardware counter + * backend, used to perform dumps. + */ +struct kbase_hwcnt_backend; + +/** + * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend. + * @info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * All uses of the created hardware counter backend must be externally + * synchronised. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_init_fn)( + const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend); + +/** + * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend. + * @backend: Pointer to backend to be terminated. + */ +typedef void (*kbase_hwcnt_backend_term_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend + * timestamp. + * @backend: Non-NULL pointer to backend. + * + * Return: Backend timestamp in nanoseconds. + */ +typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the + * backend. + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * + * The enable_map must have been created using the interface's metadata. + * If the backend has already been enabled, an error is returned. + * + * May be called in an atomic context. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_enable_fn)( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +/** + * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping + * with the backend. + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * + * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be + * called in an atomic context with the spinlock documented by the specific + * backend interface held. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +/** + * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with + * the backend. + * @backend: Non-NULL pointer to backend. + * + * If the backend is already disabled, does nothing. + * Any undumped counter values since the last dump get will be lost. + */ +typedef void (*kbase_hwcnt_backend_dump_disable_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped + * counters. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled, returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_clear_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter + * dump. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled or another dump is already in progress, + * returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_request_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested + * counter dump has completed. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled, returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_wait_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the + * counters dumped after the last dump + * request into the dump buffer. + * @backend: Non-NULL pointer to backend. + * @dump_buffer: Non-NULL pointer to destination dump buffer. + * @enable_map: Non-NULL pointer to enable map specifying enabled values. + * @accumulate: True if counters should be accumulated into dump_buffer, rather + * than copied. + * + * If the backend is not enabled, returns an error. + * If a dump is in progress (i.e. dump_wait has not yet returned successfully) + * then the resultant contents of the dump buffer will be undefined. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_get_fn)( + struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map, + bool accumulate); + +/** + * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual + * interface. + * @metadata: Immutable hardware counter metadata. + * @info: Immutable info used to initialise an instance of the + * backend. + * @init: Function ptr to initialise an instance of the backend. + * @term: Function ptr to terminate an instance of the backend. + * @timestamp_ns: Function ptr to get the current backend timestamp. + * @dump_enable: Function ptr to enable dumping. + * @dump_enable_nolock: Function ptr to enable dumping while the + * backend-specific spinlock is already held. + * @dump_disable: Function ptr to disable dumping. + * @dump_clear: Function ptr to clear counters. + * @dump_request: Function ptr to request a dump. + * @dump_wait: Function ptr to wait until dump to complete. + * @dump_get: Function ptr to copy or accumulate dump into a dump + * buffer. + */ +struct kbase_hwcnt_backend_interface { + const struct kbase_hwcnt_metadata *metadata; + const struct kbase_hwcnt_backend_info *info; + kbase_hwcnt_backend_init_fn init; + kbase_hwcnt_backend_term_fn term; + kbase_hwcnt_backend_timestamp_ns_fn timestamp_ns; + kbase_hwcnt_backend_dump_enable_fn dump_enable; + kbase_hwcnt_backend_dump_enable_nolock_fn dump_enable_nolock; + kbase_hwcnt_backend_dump_disable_fn dump_disable; + kbase_hwcnt_backend_dump_clear_fn dump_clear; + kbase_hwcnt_backend_dump_request_fn dump_request; + kbase_hwcnt_backend_dump_wait_fn dump_wait; + kbase_hwcnt_backend_dump_get_fn dump_get; +}; + +#endif /* _KBASE_HWCNT_BACKEND_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_backend_gpu.c b/mali_kbase/mali_kbase_hwcnt_backend_gpu.c new file mode 100644 index 0000000..4bc8916 --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_backend_gpu.c @@ -0,0 +1,538 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_backend_gpu.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" +#include "mali_kbase_pm_policy.h" +#include "mali_kbase_hwaccess_instr.h" +#include "mali_kbase_tlstream.h" +#ifdef CONFIG_MALI_NO_MALI +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif + +/** + * struct kbase_hwcnt_backend_gpu_info - Information used to create an instance + * of a GPU hardware counter backend. + * @kbdev: KBase device. + * @use_secondary: True if secondary performance counters should be used, + * else false. Ignored if secondary counters are not supported. + * @metadata: Hardware counter metadata. + * @dump_bytes: Bytes of GPU memory required to perform a + * hardware counter dump. + */ +struct kbase_hwcnt_backend_gpu_info { + struct kbase_device *kbdev; + bool use_secondary; + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; +}; + +/** + * struct kbase_hwcnt_backend_gpu - Instance of a GPU hardware counter backend. + * @info: Info used to create the backend. + * @kctx: KBase context used for GPU memory allocation and + * counter dumping. + * @kctx_element: List element used to add kctx to device context list. + * @gpu_dump_va: GPU hardware counter dump buffer virtual address. + * @cpu_dump_va: CPU mapping of gpu_dump_va. + * @vmap: Dump buffer vmap. + * @enabled: True if dumping has been enabled, else false. + */ +struct kbase_hwcnt_backend_gpu { + const struct kbase_hwcnt_backend_gpu_info *info; + struct kbase_context *kctx; + struct kbasep_kctx_list_element *kctx_element; + u64 gpu_dump_va; + void *cpu_dump_va; + struct kbase_vmap_struct *vmap; + bool enabled; +}; + +/* GPU backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ +static u64 kbasep_hwcnt_backend_gpu_timestamp_ns( + struct kbase_hwcnt_backend *backend) +{ + struct timespec ts; + + (void)backend; + getrawmonotonic(&ts); + return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ +static int kbasep_hwcnt_backend_gpu_dump_enable_nolock( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode; + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct kbase_hwcnt_physical_enable_map phys; + struct kbase_instr_hwcnt_enable enable; + + if (!backend_gpu || !enable_map || backend_gpu->enabled || + (enable_map->metadata != backend_gpu->info->metadata)) + return -EINVAL; + + kctx = backend_gpu->kctx; + kbdev = backend_gpu->kctx->kbdev; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map); + + enable.jm_bm = phys.jm_bm; + enable.shader_bm = phys.shader_bm; + enable.tiler_bm = phys.tiler_bm; + enable.mmu_l2_bm = phys.mmu_l2_bm; + enable.use_secondary = backend_gpu->info->use_secondary; + enable.dump_buffer = backend_gpu->gpu_dump_va; + enable.dump_buffer_bytes = backend_gpu->info->dump_bytes; + + errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); + if (errcode) + goto error; + + backend_gpu->enabled = true; + + return 0; +error: + return errcode; +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_fn */ +static int kbasep_hwcnt_backend_gpu_dump_enable( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + unsigned long flags; + int errcode; + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + struct kbase_device *kbdev; + + if (!backend_gpu) + return -EINVAL; + + kbdev = backend_gpu->kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + errcode = kbasep_hwcnt_backend_gpu_dump_enable_nolock( + backend, enable_map); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return errcode; +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_disable_fn */ +static void kbasep_hwcnt_backend_gpu_dump_disable( + struct kbase_hwcnt_backend *backend) +{ + int errcode; + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + + if (WARN_ON(!backend_gpu) || !backend_gpu->enabled) + return; + + errcode = kbase_instr_hwcnt_disable_internal(backend_gpu->kctx); + WARN_ON(errcode); + + backend_gpu->enabled = false; +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_clear_fn */ +static int kbasep_hwcnt_backend_gpu_dump_clear( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + + if (!backend_gpu || !backend_gpu->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_clear(backend_gpu->kctx); +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_request_fn */ +static int kbasep_hwcnt_backend_gpu_dump_request( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + + if (!backend_gpu || !backend_gpu->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_request_dump(backend_gpu->kctx); +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_wait_fn */ +static int kbasep_hwcnt_backend_gpu_dump_wait( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + + if (!backend_gpu || !backend_gpu->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_wait_for_dump(backend_gpu->kctx); +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_get_fn */ +static int kbasep_hwcnt_backend_gpu_dump_get( + struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) +{ + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + + if (!backend_gpu || !dst || !dst_enable_map || + (backend_gpu->info->metadata != dst->metadata) || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + /* Invalidate the kernel buffer before reading from it. */ + kbase_sync_mem_regions( + backend_gpu->kctx, backend_gpu->vmap, KBASE_SYNC_TO_CPU); + + return kbase_hwcnt_gpu_dump_get( + dst, backend_gpu->cpu_dump_va, dst_enable_map, accumulate); +} + +/** + * kbasep_hwcnt_backend_gpu_dump_alloc() - Allocate a GPU dump buffer. + * @info: Non-NULL pointer to GPU backend info. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address + * is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_dump_alloc( + const struct kbase_hwcnt_backend_gpu_info *info, + struct kbase_context *kctx, + u64 *gpu_dump_va) +{ + struct kbase_va_region *reg; + u64 flags; + u64 nr_pages; + + WARN_ON(!info); + WARN_ON(!kctx); + WARN_ON(!gpu_dump_va); + + flags = BASE_MEM_PROT_CPU_RD | + BASE_MEM_PROT_GPU_WR | + BASE_MEM_PERMANENT_KERNEL_MAPPING | + BASE_MEM_CACHED_CPU; + + if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) + flags |= BASE_MEM_UNCACHED_GPU; + + nr_pages = PFN_UP(info->dump_bytes); + + reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va); + + if (!reg) + return -ENOMEM; + + return 0; +} + +/** + * kbasep_hwcnt_backend_gpu_dump_free() - Free an allocated GPU dump buffer. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: GPU dump buffer virtual address. + */ +static void kbasep_hwcnt_backend_gpu_dump_free( + struct kbase_context *kctx, + u64 gpu_dump_va) +{ + WARN_ON(!kctx); + if (gpu_dump_va) + kbase_mem_free(kctx, gpu_dump_va); +} + +/** + * kbasep_hwcnt_backend_gpu_destroy() - Destroy a GPU backend. + * @backend: Pointer to GPU backend to destroy. + * + * Can be safely called on a backend in any state of partial construction. + */ +static void kbasep_hwcnt_backend_gpu_destroy( + struct kbase_hwcnt_backend_gpu *backend) +{ + if (!backend) + return; + + if (backend->kctx) { + struct kbase_context *kctx = backend->kctx; + struct kbase_device *kbdev = kctx->kbdev; + + if (backend->cpu_dump_va) + kbase_phy_alloc_mapping_put(kctx, backend->vmap); + + if (backend->gpu_dump_va) + kbasep_hwcnt_backend_gpu_dump_free( + kctx, backend->gpu_dump_va); + + if (backend->kctx_element) { + mutex_lock(&kbdev->kctx_list_lock); + + KBASE_TLSTREAM_TL_DEL_CTX(kctx); + list_del(&backend->kctx_element->link); + + mutex_unlock(&kbdev->kctx_list_lock); + kfree(backend->kctx_element); + } + + kbasep_js_release_privileged_ctx(kbdev, kctx); + kbase_destroy_context(kctx); + } + + kfree(backend); +} + +/** + * kbasep_hwcnt_backend_gpu_create() - Create a GPU backend. + * @info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_create( + const struct kbase_hwcnt_backend_gpu_info *info, + struct kbase_hwcnt_backend_gpu **out_backend) +{ + int errcode; + struct kbase_device *kbdev; + struct kbase_hwcnt_backend_gpu *backend = NULL; + + WARN_ON(!info); + WARN_ON(!out_backend); + + kbdev = info->kbdev; + + backend = kzalloc(sizeof(*backend), GFP_KERNEL); + if (!backend) + goto alloc_error; + + backend->info = info; + + backend->kctx = kbase_create_context(kbdev, true); + if (!backend->kctx) + goto alloc_error; + + kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); + + backend->kctx_element = kzalloc( + sizeof(*backend->kctx_element), GFP_KERNEL); + if (!backend->kctx_element) + goto alloc_error; + + backend->kctx_element->kctx = backend->kctx; + + /* Add kernel context to list of contexts associated with device. */ + mutex_lock(&kbdev->kctx_list_lock); + + list_add(&backend->kctx_element->link, &kbdev->kctx_list); + /* Fire tracepoint while lock is held, to ensure tracepoint is not + * created in both body and summary stream + */ + KBASE_TLSTREAM_TL_NEW_CTX( + backend->kctx, backend->kctx->id, (u32)(backend->kctx->tgid)); + + mutex_unlock(&kbdev->kctx_list_lock); + + errcode = kbasep_hwcnt_backend_gpu_dump_alloc( + info, backend->kctx, &backend->gpu_dump_va); + if (errcode) + goto error; + + backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx, + backend->gpu_dump_va, &backend->vmap); + if (!backend->cpu_dump_va) + goto alloc_error; + +#ifdef CONFIG_MALI_NO_MALI + /* The dummy model needs the CPU mapping. */ + gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va); +#endif + + *out_backend = backend; + return 0; + +alloc_error: + errcode = -ENOMEM; +error: + kbasep_hwcnt_backend_gpu_destroy(backend); + return errcode; +} + +/* GPU backend implementation of kbase_hwcnt_backend_init_fn */ +static int kbasep_hwcnt_backend_gpu_init( + const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend) +{ + int errcode; + struct kbase_hwcnt_backend_gpu *backend = NULL; + + if (!info || !out_backend) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_gpu_create( + (const struct kbase_hwcnt_backend_gpu_info *) info, &backend); + if (errcode) + return errcode; + + *out_backend = (struct kbase_hwcnt_backend *)backend; + + return 0; +} + +/* GPU backend implementation of kbase_hwcnt_backend_term_fn */ +static void kbasep_hwcnt_backend_gpu_term(struct kbase_hwcnt_backend *backend) +{ + if (!backend) + return; + + kbasep_hwcnt_backend_gpu_dump_disable(backend); + kbasep_hwcnt_backend_gpu_destroy( + (struct kbase_hwcnt_backend_gpu *)backend); +} + +/** + * kbasep_hwcnt_backend_gpu_info_destroy() - Destroy a GPU backend info. + * @info: Pointer to info to destroy. + * + * Can be safely called on a backend info in any state of partial construction. + */ +static void kbasep_hwcnt_backend_gpu_info_destroy( + const struct kbase_hwcnt_backend_gpu_info *info) +{ + if (!info) + return; + + kbase_hwcnt_gpu_metadata_destroy(info->metadata); + kfree(info); +} + +/** + * kbasep_hwcnt_backend_gpu_info_create() - Create a GPU backend info. + * @kbdev: Non_NULL pointer to kbase device. + * @out_info: Non-NULL pointer to where info is stored on success. + * + * Return 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_info_create( + struct kbase_device *kbdev, + const struct kbase_hwcnt_backend_gpu_info **out_info) +{ + int errcode = -ENOMEM; + struct kbase_hwcnt_gpu_info hwcnt_gpu_info; + struct kbase_hwcnt_backend_gpu_info *info = NULL; + + WARN_ON(!kbdev); + WARN_ON(!out_info); + + errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info); + if (errcode) + return errcode; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + goto error; + + info->kbdev = kbdev; + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + info->use_secondary = true; +#else + info->use_secondary = false; +#endif + + errcode = kbase_hwcnt_gpu_metadata_create( + &hwcnt_gpu_info, info->use_secondary, + &info->metadata, + &info->dump_bytes); + if (errcode) + goto error; + + *out_info = info; + + return 0; +error: + kbasep_hwcnt_backend_gpu_info_destroy(info); + return errcode; +} + +int kbase_hwcnt_backend_gpu_create( + struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface) +{ + int errcode; + const struct kbase_hwcnt_backend_gpu_info *info = NULL; + + if (!kbdev || !iface) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_gpu_info_create(kbdev, &info); + + if (errcode) + return errcode; + + iface->metadata = info->metadata; + iface->info = (struct kbase_hwcnt_backend_info *)info; + iface->init = kbasep_hwcnt_backend_gpu_init; + iface->term = kbasep_hwcnt_backend_gpu_term; + iface->timestamp_ns = kbasep_hwcnt_backend_gpu_timestamp_ns; + iface->dump_enable = kbasep_hwcnt_backend_gpu_dump_enable; + iface->dump_enable_nolock = kbasep_hwcnt_backend_gpu_dump_enable_nolock; + iface->dump_disable = kbasep_hwcnt_backend_gpu_dump_disable; + iface->dump_clear = kbasep_hwcnt_backend_gpu_dump_clear; + iface->dump_request = kbasep_hwcnt_backend_gpu_dump_request; + iface->dump_wait = kbasep_hwcnt_backend_gpu_dump_wait; + iface->dump_get = kbasep_hwcnt_backend_gpu_dump_get; + + return 0; +} + +void kbase_hwcnt_backend_gpu_destroy( + struct kbase_hwcnt_backend_interface *iface) +{ + if (!iface) + return; + + kbasep_hwcnt_backend_gpu_info_destroy( + (const struct kbase_hwcnt_backend_gpu_info *)iface->info); + memset(iface, 0, sizeof(*iface)); +} diff --git a/mali_kbase/mali_kbase_hwcnt_backend_gpu.h b/mali_kbase/mali_kbase_hwcnt_backend_gpu.h new file mode 100644 index 0000000..7712f14 --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_backend_gpu.h @@ -0,0 +1,61 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Concrete implementation of mali_kbase_hwcnt_backend interface for GPU + * backend. + */ + +#ifndef _KBASE_HWCNT_BACKEND_GPU_H_ +#define _KBASE_HWCNT_BACKEND_GPU_H_ + +#include "mali_kbase_hwcnt_backend.h" + +struct kbase_device; + +/** + * kbase_hwcnt_backend_gpu_create() - Create a GPU hardware counter backend + * interface. + * @kbdev: Non-NULL pointer to kbase device. + * @iface: Non-NULL pointer to backend interface structure that is filled in + * on creation success. + * + * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_backend_gpu_create( + struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_gpu_destroy() - Destroy a GPU hardware counter backend + * interface. + * @iface: Pointer to interface to destroy. + * + * Can be safely called on an all-zeroed interface, or on an already destroyed + * interface. + */ +void kbase_hwcnt_backend_gpu_destroy( + struct kbase_hwcnt_backend_interface *iface); + +#endif /* _KBASE_HWCNT_BACKEND_GPU_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_context.h b/mali_kbase/mali_kbase_hwcnt_context.h new file mode 100644 index 0000000..bc50ad1 --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_context.h @@ -0,0 +1,119 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter context API. + */ + +#ifndef _KBASE_HWCNT_CONTEXT_H_ +#define _KBASE_HWCNT_CONTEXT_H_ + +#include <linux/types.h> + +struct kbase_hwcnt_backend_interface; +struct kbase_hwcnt_context; + +/** + * kbase_hwcnt_context_init() - Initialise a hardware counter context. + * @iface: Non-NULL pointer to a hardware counter backend interface. + * @out_hctx: Non-NULL pointer to where the pointer to the created context will + * be stored on success. + * + * On creation, the disable count of the context will be 0. + * A hardware counter accumulator can be acquired using a created context. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_context_init( + const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx); + +/** + * kbase_hwcnt_context_term() - Terminate a hardware counter context. + * @hctx: Pointer to context to be terminated. + */ +void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by + * the context, so related counter data + * structures can be created. + * @hctx: Non-NULL pointer to the hardware counter context. + * + * Return: Non-NULL pointer to metadata, or NULL on error. + */ +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( + struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_disable() - Increment the disable count of the context. + * @hctx: Pointer to the hardware counter context. + * + * If a call to this function increments the disable count from 0 to 1, and + * an accumulator has been acquired, then a counter dump will be performed + * before counters are disabled via the backend interface. + * + * Subsequent dumps via the accumulator while counters are disabled will first + * return the accumulated dump, then will return dumps with zeroed counters. + * + * After this function call returns, it is guaranteed that counters will not be + * enabled via the backend interface. + */ +void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the + * context if possible in an atomic + * context. + * @hctx: Pointer to the hardware counter context. + * + * This function will only succeed if hardware counters are effectively already + * disabled, i.e. there is no accumulator, the disable count is already + * non-zero, or the accumulator has no counters set. + * + * After this function call returns true, it is guaranteed that counters will + * not be enabled via the backend interface. + * + * Return: True if the disable count was incremented, else False. + */ +bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_enable() - Decrement the disable count of the context. + * @hctx: Pointer to the hardware counter context. + * + * If a call to this function decrements the disable count from 1 to 0, and + * an accumulator has been acquired, then counters will be re-enabled via the + * backend interface. + * + * If an accumulator has been acquired and enabling counters fails for some + * reason, the accumulator will be placed into an error state. + * + * It is only valid to call this function one time for each prior returned call + * to kbase_hwcnt_context_disable. + * + * The spinlock documented in the backend interface that was passed in to + * kbase_hwcnt_context_init() must be held before calling this function. + */ +void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx); + +#endif /* _KBASE_HWCNT_CONTEXT_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c new file mode 100644 index 0000000..647d3ec --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_gpu.c @@ -0,0 +1,716 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" +#ifdef CONFIG_MALI_NO_MALI +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif + +#define KBASE_HWCNT_V4_BLOCKS_PER_GROUP 8 +#define KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP 4 +#define KBASE_HWCNT_V4_MAX_GROUPS \ + (KBASE_HWCNT_AVAIL_MASK_BITS / KBASE_HWCNT_V4_BLOCKS_PER_GROUP) +#define KBASE_HWCNT_V4_HEADERS_PER_BLOCK 4 +#define KBASE_HWCNT_V4_COUNTERS_PER_BLOCK 60 +#define KBASE_HWCNT_V4_VALUES_PER_BLOCK \ + (KBASE_HWCNT_V4_HEADERS_PER_BLOCK + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK) +/* Index of the PRFCNT_EN header into a V4 counter block */ +#define KBASE_HWCNT_V4_PRFCNT_EN_HEADER 2 + +#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 +#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 +#define KBASE_HWCNT_V5_COUNTERS_PER_BLOCK 60 +#define KBASE_HWCNT_V5_VALUES_PER_BLOCK \ + (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_COUNTERS_PER_BLOCK) +/* Index of the PRFCNT_EN header into a V5 counter block */ +#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 + +/** + * kbasep_hwcnt_backend_gpu_metadata_v4_create() - Create hardware counter + * metadata for a v4 GPU. + * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. + * @metadata: Non-NULL pointer to where created metadata is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_metadata_v4_create( + const struct kbase_hwcnt_gpu_v4_info *v4_info, + const struct kbase_hwcnt_metadata **metadata) +{ + size_t grp; + int errcode = -ENOMEM; + struct kbase_hwcnt_description desc; + struct kbase_hwcnt_group_description *grps; + size_t avail_mask_bit; + + WARN_ON(!v4_info); + WARN_ON(!metadata); + + /* Check if there are enough bits in the availability mask to represent + * all the hardware counter blocks in the system. + */ + if (v4_info->cg_count > KBASE_HWCNT_V4_MAX_GROUPS) + return -EINVAL; + + grps = kcalloc(v4_info->cg_count, sizeof(*grps), GFP_KERNEL); + if (!grps) + goto clean_up; + + desc.grp_cnt = v4_info->cg_count; + desc.grps = grps; + + for (grp = 0; grp < v4_info->cg_count; grp++) { + size_t blk; + size_t sc; + const u64 core_mask = v4_info->cgs[grp].core_mask; + struct kbase_hwcnt_block_description *blks = kcalloc( + KBASE_HWCNT_V4_BLOCKS_PER_GROUP, + sizeof(*blks), + GFP_KERNEL); + + if (!blks) + goto clean_up; + + grps[grp].type = KBASE_HWCNT_GPU_GROUP_TYPE_V4; + grps[grp].blk_cnt = KBASE_HWCNT_V4_BLOCKS_PER_GROUP; + grps[grp].blks = blks; + + for (blk = 0; blk < KBASE_HWCNT_V4_BLOCKS_PER_GROUP; blk++) { + blks[blk].inst_cnt = 1; + blks[blk].hdr_cnt = + KBASE_HWCNT_V4_HEADERS_PER_BLOCK; + blks[blk].ctr_cnt = + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK; + } + + for (sc = 0; sc < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; sc++) { + blks[sc].type = core_mask & (1ull << sc) ? + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER : + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + } + + blks[4].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER; + blks[5].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2; + blks[6].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + blks[7].type = (grp == 0) ? + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM : + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + + WARN_ON(KBASE_HWCNT_V4_BLOCKS_PER_GROUP != 8); + } + + /* Initialise the availability mask */ + desc.avail_mask = 0; + avail_mask_bit = 0; + + for (grp = 0; grp < desc.grp_cnt; grp++) { + size_t blk; + const struct kbase_hwcnt_block_description *blks = + desc.grps[grp].blks; + for (blk = 0; blk < desc.grps[grp].blk_cnt; blk++) { + WARN_ON(blks[blk].inst_cnt != 1); + if (blks[blk].type != + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED) + desc.avail_mask |= (1ull << avail_mask_bit); + + avail_mask_bit++; + } + } + + errcode = kbase_hwcnt_metadata_create(&desc, metadata); + + /* Always clean up, as metadata will make a copy of the input args */ +clean_up: + if (grps) { + for (grp = 0; grp < v4_info->cg_count; grp++) + kfree(grps[grp].blks); + kfree(grps); + } + return errcode; +} + +/** + * kbasep_hwcnt_backend_gpu_v4_dump_bytes() - Get the raw dump buffer size for a + * V4 GPU. + * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. + * + * Return: Size of buffer the V4 GPU needs to perform a counter dump. + */ +static size_t kbasep_hwcnt_backend_gpu_v4_dump_bytes( + const struct kbase_hwcnt_gpu_v4_info *v4_info) +{ + return v4_info->cg_count * + KBASE_HWCNT_V4_BLOCKS_PER_GROUP * + KBASE_HWCNT_V4_VALUES_PER_BLOCK * + KBASE_HWCNT_VALUE_BYTES; +} + +/** + * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter + * metadata for a v5 GPU. + * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. + * @use_secondary: True if secondary performance counters should be used, else + * false. Ignored if secondary counters are not supported. + * @metadata: Non-NULL pointer to where created metadata is stored + * on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_metadata_v5_create( + const struct kbase_hwcnt_gpu_v5_info *v5_info, + bool use_secondary, + const struct kbase_hwcnt_metadata **metadata) +{ + struct kbase_hwcnt_description desc; + struct kbase_hwcnt_group_description group; + struct kbase_hwcnt_block_description + blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; + size_t non_sc_block_count; + size_t sc_block_count; + + WARN_ON(!v5_info); + WARN_ON(!metadata); + + /* Calculate number of block instances that aren't shader cores */ + non_sc_block_count = 2 + v5_info->l2_count; + /* Calculate number of block instances that are shader cores */ + sc_block_count = fls64(v5_info->core_mask); + + /* + * A system can have up to 64 shader cores, but the 64-bit + * availability mask can't physically represent that many cores as well + * as the other hardware blocks. + * Error out if there are more blocks than our implementation can + * support. + */ + if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) + return -EINVAL; + + /* One Job Manager block */ + blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM; + blks[0].inst_cnt = 1; + blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* One Tiler block */ + blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; + blks[1].inst_cnt = 1; + blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* l2_count memsys blks */ + blks[2].type = use_secondary ? + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 : + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; + blks[2].inst_cnt = v5_info->l2_count; + blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* + * There are as many shader cores in the system as there are bits set in + * the core mask. However, the dump buffer memory requirements need to + * take into account the fact that the core mask may be non-contiguous. + * + * For example, a system with a core mask of 0b1011 has the same dump + * buffer memory requirements as a system with 0b1111, but requires more + * memory than a system with 0b0111. However, core 2 of the system with + * 0b1011 doesn't physically exist, and the dump buffer memory that + * accounts for that core will never be written to when we do a counter + * dump. + * + * We find the core mask's last set bit to determine the memory + * requirements, and embed the core mask into the availability mask so + * we can determine later which shader cores physically exist. + */ + blks[3].type = use_secondary ? + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 : + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; + blks[3].inst_cnt = sc_block_count; + blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); + + group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; + group.blks = blks; + + desc.grp_cnt = 1; + desc.grps = &group; + + /* The JM, Tiler, and L2s are always available, and are before cores */ + desc.avail_mask = (1ull << non_sc_block_count) - 1; + /* Embed the core mask directly in the availability mask */ + desc.avail_mask |= (v5_info->core_mask << non_sc_block_count); + + return kbase_hwcnt_metadata_create(&desc, metadata); +} + +/** + * kbasep_hwcnt_backend_gpu_v5_dump_bytes() - Get the raw dump buffer size for a + * V5 GPU. + * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. + * + * Return: Size of buffer the V5 GPU needs to perform a counter dump. + */ +static size_t kbasep_hwcnt_backend_gpu_v5_dump_bytes( + const struct kbase_hwcnt_gpu_v5_info *v5_info) +{ + WARN_ON(!v5_info); + return (2 + v5_info->l2_count + fls64(v5_info->core_mask)) * + KBASE_HWCNT_V5_VALUES_PER_BLOCK * + KBASE_HWCNT_VALUE_BYTES; +} + +int kbase_hwcnt_gpu_info_init( + struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info) +{ + if (!kbdev || !info) + return -EINVAL; + +#ifdef CONFIG_MALI_NO_MALI + /* NO_MALI uses V5 layout, regardless of the underlying platform. */ + info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; + info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; +#else + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { + info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V4; + info->v4.cg_count = kbdev->gpu_props.num_core_groups; + info->v4.cgs = kbdev->gpu_props.props.coherency_info.group; + } else { + const struct base_gpu_props *props = &kbdev->gpu_props.props; + const size_t l2_count = props->l2_props.num_l2_slices; + const size_t core_mask = + props->coherency_info.group[0].core_mask; + + info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + info->v5.l2_count = l2_count; + info->v5.core_mask = core_mask; + } +#endif + return 0; +} + +int kbase_hwcnt_gpu_metadata_create( + const struct kbase_hwcnt_gpu_info *info, + bool use_secondary, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes) +{ + int errcode; + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; + + if (!info || !out_metadata || !out_dump_bytes) + return -EINVAL; + + switch (info->type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + dump_bytes = kbasep_hwcnt_backend_gpu_v4_dump_bytes(&info->v4); + errcode = kbasep_hwcnt_backend_gpu_metadata_v4_create( + &info->v4, &metadata); + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5); + errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create( + &info->v5, use_secondary, &metadata); + break; + default: + return -EINVAL; + } + if (errcode) + return errcode; + + /* + * Dump abstraction size should be exactly the same size and layout as + * the physical dump size, for backwards compatibility. + */ + WARN_ON(dump_bytes != metadata->dump_buf_bytes); + + *out_metadata = metadata; + *out_dump_bytes = dump_bytes; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_create); + +void kbase_hwcnt_gpu_metadata_destroy( + const struct kbase_hwcnt_metadata *metadata) +{ + if (!metadata) + return; + + kbase_hwcnt_metadata_destroy(metadata); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy); + +int kbase_hwcnt_gpu_dump_get( + struct kbase_hwcnt_dump_buffer *dst, + void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) +{ + const struct kbase_hwcnt_metadata *metadata; + const u32 *dump_src; + size_t src_offset, grp, blk, blk_inst; + + if (!dst || !src || !dst_enable_map || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + metadata = dst->metadata; + dump_src = (const u32 *)src; + src_offset = 0; + + kbase_hwcnt_metadata_for_each_block( + metadata, grp, blk, blk_inst) { + const size_t hdr_cnt = + kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + const size_t ctr_cnt = + kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + + /* Early out if no values in the dest block are enabled */ + if (kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = dump_src + src_offset; + + if (accumulate) { + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, ctr_cnt); + } else { + kbase_hwcnt_dump_buffer_block_copy( + dst_blk, src_blk, (hdr_cnt + ctr_cnt)); + } + } + + src_offset += (hdr_cnt + ctr_cnt); + } + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_dump_get); + +/** + * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block + * enable map abstraction to + * a physical block enable + * map. + * @lo: Low 64 bits of block enable map abstraction. + * @hi: High 64 bits of block enable map abstraction. + * + * The abstraction uses 128 bits to enable 128 block values, whereas the + * physical uses just 32 bits, as bit n enables values [n*4, n*4+3]. + * Therefore, this conversion is lossy. + * + * Return: 32-bit physical block enable map. + */ +static inline u32 kbasep_hwcnt_backend_gpu_block_map_to_physical( + u64 lo, + u64 hi) +{ + u32 phys = 0; + u64 dwords[2] = {lo, hi}; + size_t dword_idx; + + for (dword_idx = 0; dword_idx < 2; dword_idx++) { + const u64 dword = dwords[dword_idx]; + u16 packed = 0; + + size_t hword_bit; + + for (hword_bit = 0; hword_bit < 16; hword_bit++) { + const size_t dword_bit = hword_bit * 4; + const u16 mask = + ((dword >> (dword_bit + 0)) & 0x1) | + ((dword >> (dword_bit + 1)) & 0x1) | + ((dword >> (dword_bit + 2)) & 0x1) | + ((dword >> (dword_bit + 3)) & 0x1); + packed |= (mask << hword_bit); + } + phys |= ((u32)packed) << (16 * dword_idx); + } + return phys; +} + +/** + * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical + * block enable map to a + * block enable map + * abstraction. + * @phys: Physical 32-bit block enable map + * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction + * will be stored. + * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction + * will be stored. + */ +static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical( + u32 phys, + u64 *lo, + u64 *hi) +{ + u64 dwords[2] = {0, 0}; + + size_t dword_idx; + + for (dword_idx = 0; dword_idx < 2; dword_idx++) { + const u16 packed = phys >> (16 * dword_idx); + u64 dword = 0; + + size_t hword_bit; + + for (hword_bit = 0; hword_bit < 16; hword_bit++) { + const size_t dword_bit = hword_bit * 4; + const u64 mask = (packed >> (hword_bit)) & 0x1; + + dword |= mask << (dword_bit + 0); + dword |= mask << (dword_bit + 1); + dword |= mask << (dword_bit + 2); + dword |= mask << (dword_bit + 3); + } + dwords[dword_idx] = dword; + } + *lo = dwords[0]; + *hi = dwords[1]; +} + +void kbase_hwcnt_gpu_enable_map_to_physical( + struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + const struct kbase_hwcnt_metadata *metadata; + + u64 jm_bm = 0; + u64 shader_bm = 0; + u64 tiler_bm = 0; + u64 mmu_l2_bm = 0; + + size_t grp, blk, blk_inst; + + if (WARN_ON(!src) || WARN_ON(!dst)) + return; + + metadata = src->metadata; + + kbase_hwcnt_metadata_for_each_block( + metadata, grp, blk, blk_inst) { + const u64 grp_type = kbase_hwcnt_metadata_group_type( + metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); + const size_t blk_val_cnt = + kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + src, grp, blk, blk_inst); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: + shader_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: + tiler_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: + mmu_l2_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: + jm_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: + break; + default: + WARN_ON(true); + } + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: + jm_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + tiler_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + shader_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + mmu_l2_bm |= *blk_map; + break; + default: + WARN_ON(true); + } + break; + default: + WARN_ON(true); + } + } + + dst->jm_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0); + dst->shader_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0); + dst->tiler_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0); + dst->mmu_l2_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical); + +void kbase_hwcnt_gpu_enable_map_from_physical( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src) +{ + const struct kbase_hwcnt_metadata *metadata; + + u64 ignored_hi; + u64 jm_bm; + u64 shader_bm; + u64 tiler_bm; + u64 mmu_l2_bm; + size_t grp, blk, blk_inst; + + if (WARN_ON(!src) || WARN_ON(!dst)) + return; + + metadata = dst->metadata; + + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->jm_bm, &jm_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->shader_bm, &shader_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->tiler_bm, &tiler_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->mmu_l2_bm, &mmu_l2_bm, &ignored_hi); + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + const u64 grp_type = kbase_hwcnt_metadata_group_type( + metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); + const size_t blk_val_cnt = + kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: + *blk_map = shader_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: + *blk_map = tiler_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: + *blk_map = mmu_l2_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: + *blk_map = jm_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: + break; + default: + WARN_ON(true); + } + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: + *blk_map = jm_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + *blk_map = tiler_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + *blk_map = shader_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + *blk_map = mmu_l2_bm; + break; + default: + WARN_ON(true); + } + break; + default: + WARN_ON(true); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_from_physical); + +void kbase_hwcnt_gpu_patch_dump_headers( + struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!buf) || WARN_ON(!enable_map) || + WARN_ON(buf->metadata != enable_map->metadata)) + return; + + metadata = buf->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + const u64 grp_type = + kbase_hwcnt_metadata_group_type(metadata, grp); + u32 *buf_blk = kbase_hwcnt_dump_buffer_block_instance( + buf, grp, blk, blk_inst); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + enable_map, grp, blk, blk_inst); + const u32 prfcnt_en = + kbasep_hwcnt_backend_gpu_block_map_to_physical( + blk_map[0], 0); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + buf_blk[KBASE_HWCNT_V4_PRFCNT_EN_HEADER] = prfcnt_en; + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; + break; + default: + WARN_ON(true); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_patch_dump_headers); diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h new file mode 100644 index 0000000..509608a --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_gpu.h @@ -0,0 +1,249 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_HWCNT_GPU_H_ +#define _KBASE_HWCNT_GPU_H_ + +#include <linux/types.h> + +struct kbase_device; +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to + * identify metadata groups. + * @KBASE_HWCNT_GPU_GROUP_TYPE_V4: GPU V4 group type. + * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. + */ +enum kbase_hwcnt_gpu_group_type { + KBASE_HWCNT_GPU_GROUP_TYPE_V4 = 0x10, + KBASE_HWCNT_GPU_GROUP_TYPE_V5, +}; + +/** + * enum kbase_hwcnt_gpu_v4_block_type - GPU V4 hardware counter block types, + * used to identify metadata blocks. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: Shader block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: Tiler block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: MMU/L2 block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: Job Manager block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: Reserved block. + */ +enum kbase_hwcnt_gpu_v4_block_type { + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER = 0x20, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED, +}; + +/** + * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, + * used to identify metadata blocks. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: Job Manager block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. + */ +enum kbase_hwcnt_gpu_v5_block_type { + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, +}; + +/** + * struct kbase_hwcnt_physical_enable_map - Representation of enable map + * directly used by GPU. + * @jm_bm: Job Manager counters selection bitmask. + * @shader_bm: Shader counters selection bitmask. + * @tiler_bm: Tiler counters selection bitmask. + * @mmu_l2_bm: MMU_L2 counters selection bitmask. + */ +struct kbase_hwcnt_physical_enable_map { + u32 jm_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; +}; + +/** + * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs. + * @cg_count: Core group count. + * @cgs: Non-NULL pointer to array of cg_count coherent group structures. + * + * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups, + * where each core group may have a physically different layout. + */ +struct kbase_hwcnt_gpu_v4_info { + size_t cg_count; + const struct mali_base_gpu_coherent_group *cgs; +}; + +/** + * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs. + * @l2_count: L2 cache count. + * @core_mask: Shader core mask. May be sparse. + */ +struct kbase_hwcnt_gpu_v5_info { + size_t l2_count; + u64 core_mask; +}; + +/** + * struct kbase_hwcnt_gpu_info - Tagged union with information about the current + * GPU's hwcnt blocks. + * @type: GPU type. + * @v4: Info filled in if a v4 GPU. + * @v5: Info filled in if a v5 GPU. + */ +struct kbase_hwcnt_gpu_info { + enum kbase_hwcnt_gpu_group_type type; + union { + struct kbase_hwcnt_gpu_v4_info v4; + struct kbase_hwcnt_gpu_v5_info v5; + }; +}; + +/** + * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the + * hwcnt metadata. + * @kbdev: Non-NULL pointer to kbase device. + * @info: Non-NULL pointer to data structure to be filled in. + * + * The initialised info struct will only be valid for use while kbdev is valid. + */ +int kbase_hwcnt_gpu_info_init( + struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info); + +/** + * kbase_hwcnt_gpu_metadata_create() - Create hardware counter metadata for the + * current GPU. + * @info: Non-NULL pointer to info struct initialised by + * kbase_hwcnt_gpu_info_init. + * @use_secondary: True if secondary performance counters should be used, else + * false. Ignored if secondary counters are not supported. + * @out_metadata: Non-NULL pointer to where created metadata is stored on + * success. + * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump + * buffer is stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_gpu_metadata_create( + const struct kbase_hwcnt_gpu_info *info, + bool use_secondary, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes); + +/** + * kbase_hwcnt_gpu_metadata_destroy() - Destroy GPU hardware counter metadata. + * @metadata: Pointer to metadata to destroy. + */ +void kbase_hwcnt_gpu_metadata_destroy( + const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_gpu_dump_get() - Copy or accumulate enabled counters from the raw + * dump buffer in src into the dump buffer + * abstraction in dst. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src raw dump buffer, of same length + * as returned in out_dump_bytes parameter of + * kbase_hwcnt_gpu_metadata_create. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @accumulate: True if counters in src should be accumulated into dst, + * rather than copied. + * + * The dst and dst_enable_map MUST have been created from the same metadata as + * returned from the call to kbase_hwcnt_gpu_metadata_create as was used to get + * the length of src. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_gpu_dump_get( + struct kbase_hwcnt_dump_buffer *dst, + void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate); + +/** + * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction + * into a physical enable map. + * @dst: Non-NULL pointer to dst physical enable map. + * @src: Non-NULL pointer to src enable map abstraction. + * + * The src must have been created from a metadata returned from a call to + * kbase_hwcnt_gpu_metadata_create. + * + * This is a lossy conversion, as the enable map abstraction has one bit per + * individual counter block value, but the physical enable map uses 1 bit for + * every 4 counters, shared over all instances of a block. + */ +void kbase_hwcnt_gpu_enable_map_to_physical( + struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src); + +/** + * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to + * an enable map abstraction. + * @dst: Non-NULL pointer to dst enable map abstraction. + * @src: Non-NULL pointer to src physical enable map. + * + * The dst must have been created from a metadata returned from a call to + * kbase_hwcnt_gpu_metadata_create. + * + * This is a lossy conversion, as the physical enable map can technically + * support counter blocks with 128 counters each, but no hardware actually uses + * more than 64, so the enable map abstraction has nowhere to store the enable + * information for the 64 non-existent counters. + */ +void kbase_hwcnt_gpu_enable_map_from_physical( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src); + +/** + * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter + * enable headers in a dump buffer to + * reflect the specified enable map. + * @buf: Non-NULL pointer to dump buffer to patch. + * @enable_map: Non-NULL pointer to enable map. + * + * The buf and enable_map must have been created from a metadata returned from + * a call to kbase_hwcnt_gpu_metadata_create. + * + * This function should be used before handing off a dump buffer over the + * kernel-user boundary, to ensure the header is accurate for the enable map + * used by the user. + */ +void kbase_hwcnt_gpu_patch_dump_headers( + struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map); + +#endif /* _KBASE_HWCNT_GPU_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.c b/mali_kbase/mali_kbase_hwcnt_legacy.c new file mode 100644 index 0000000..b0e6aee --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_legacy.c @@ -0,0 +1,152 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_legacy.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_ioctl.h" + +#include <linux/slab.h> +#include <linux/uaccess.h> + +/** + * struct kbase_hwcnt_legacy_client - Legacy hardware counter client. + * @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned. + * @enable_map: Counter enable map. + * @dump_buf: Dump buffer used to manipulate dumps before copied to user. + * @hvcli: Hardware counter virtualizer client. + */ +struct kbase_hwcnt_legacy_client { + void __user *user_dump_buf; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer dump_buf; + struct kbase_hwcnt_virtualizer_client *hvcli; +}; + +int kbase_hwcnt_legacy_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_ioctl_hwcnt_enable *enable, + struct kbase_hwcnt_legacy_client **out_hlcli) +{ + int errcode; + struct kbase_hwcnt_legacy_client *hlcli; + const struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_physical_enable_map phys_em; + + if (!hvirt || !enable || !enable->dump_buffer || !out_hlcli) + return -EINVAL; + + metadata = kbase_hwcnt_virtualizer_metadata(hvirt); + + hlcli = kzalloc(sizeof(*hlcli), GFP_KERNEL); + if (!hlcli) + return -ENOMEM; + + hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map); + if (errcode) + goto error; + + /* Translate from the ioctl enable map to the internal one */ + phys_em.jm_bm = enable->jm_bm; + phys_em.shader_bm = enable->shader_bm; + phys_em.tiler_bm = enable->tiler_bm; + phys_em.mmu_l2_bm = enable->mmu_l2_bm; + kbase_hwcnt_gpu_enable_map_from_physical(&hlcli->enable_map, &phys_em); + + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hlcli->dump_buf); + if (errcode) + goto error; + + errcode = kbase_hwcnt_virtualizer_client_create( + hvirt, &hlcli->enable_map, &hlcli->hvcli); + if (errcode) + goto error; + + *out_hlcli = hlcli; + return 0; + +error: + kbase_hwcnt_legacy_client_destroy(hlcli); + return errcode; +} + +void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli) +{ + if (!hlcli) + return; + + kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli); + kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf); + kbase_hwcnt_enable_map_free(&hlcli->enable_map); + kfree(hlcli); +} + +int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli) +{ + int errcode; + u64 ts_start_ns; + u64 ts_end_ns; + + if (!hlcli) + return -EINVAL; + + /* Dump into the kernel buffer */ + errcode = kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, + &ts_start_ns, &ts_end_ns, &hlcli->dump_buf); + if (errcode) + return errcode; + + /* Patch the dump buf headers, to hide the counters that other hwcnt + * clients are using. + */ + kbase_hwcnt_gpu_patch_dump_headers( + &hlcli->dump_buf, &hlcli->enable_map); + + /* Zero all non-enabled counters (current values are undefined) */ + kbase_hwcnt_dump_buffer_zero_non_enabled( + &hlcli->dump_buf, &hlcli->enable_map); + + /* Copy into the user's buffer */ + errcode = copy_to_user(hlcli->user_dump_buf, hlcli->dump_buf.dump_buf, + hlcli->dump_buf.metadata->dump_buf_bytes); + /* Non-zero errcode implies user buf was invalid or too small */ + if (errcode) + return -EFAULT; + + return 0; +} + +int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli) +{ + u64 ts_start_ns; + u64 ts_end_ns; + + if (!hlcli) + return -EINVAL; + + /* Dump with a NULL buffer to clear this client's counters */ + return kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, + &ts_start_ns, &ts_end_ns, NULL); +} diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.h b/mali_kbase/mali_kbase_hwcnt_legacy.h new file mode 100644 index 0000000..7a610ae --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_legacy.h @@ -0,0 +1,94 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Legacy hardware counter interface, giving userspace clients simple, + * synchronous access to hardware counters. + * + * Any functions operating on an single legacy hardware counter client instance + * must be externally synchronised. + * Different clients may safely be used concurrently. + */ + +#ifndef _KBASE_HWCNT_LEGACY_H_ +#define _KBASE_HWCNT_LEGACY_H_ + +struct kbase_hwcnt_legacy_client; +struct kbase_ioctl_hwcnt_enable; +struct kbase_hwcnt_virtualizer; + +/** + * kbase_hwcnt_legacy_client_create() - Create a legacy hardware counter client. + * @hvirt: Non-NULL pointer to hardware counter virtualizer the client + * should be attached to. + * @enable: Non-NULL pointer to hwcnt_enable structure, containing a valid + * pointer to a user dump buffer large enough to hold a dump, and + * the counters that should be enabled. + * @out_hlcli: Non-NULL pointer to where the pointer to the created client will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_ioctl_hwcnt_enable *enable, + struct kbase_hwcnt_legacy_client **out_hlcli); + +/** + * kbase_hwcnt_legacy_client_destroy() - Destroy a legacy hardware counter + * client. + * @hlcli: Pointer to the legacy hardware counter client. + * + * Will safely destroy a client in any partial state of construction. + */ +void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli); + +/** + * kbase_hwcnt_legacy_client_dump() - Perform a hardware counter dump into the + * client's user buffer. + * @hlcli: Non-NULL pointer to the legacy hardware counter client. + * + * This function will synchronously dump hardware counters into the user buffer + * specified on client creation, with the counters specified on client creation. + * + * The counters are automatically cleared after each dump, such that the next + * dump performed will return the counter values accumulated between the time of + * this function call and the next dump. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli); + +/** + * kbase_hwcnt_legacy_client_clear() - Perform and discard a hardware counter + * dump. + * @hlcli: Non-NULL pointer to the legacy hardware counter client. + * + * This function will synchronously clear the hardware counters, such that the + * next dump performed will return the counter values accumulated between the + * time of this function call and the next dump. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli); + +#endif /* _KBASE_HWCNT_LEGACY_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_types.c b/mali_kbase/mali_kbase_hwcnt_types.c new file mode 100644 index 0000000..1e9efde --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_types.c @@ -0,0 +1,538 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" + +/* Minimum alignment of each block of hardware counters */ +#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \ + (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) + +/** + * KBASE_HWCNT_ALIGN_UPWARDS() - Align a value to an alignment. + * @value: The value to align upwards. + * @alignment: The alignment. + * + * Return: A number greater than or equal to value that is aligned to alignment. + */ +#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ + (value + ((alignment - (value % alignment)) % alignment)) + +int kbase_hwcnt_metadata_create( + const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **out_metadata) +{ + char *buf; + struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_group_metadata *grp_mds; + size_t grp; + size_t enable_map_count; /* Number of u64 bitfields (inc padding) */ + size_t dump_buf_count; /* Number of u32 values (inc padding) */ + size_t avail_mask_bits; /* Number of availability mask bits */ + + size_t size; + size_t offset; + + if (!desc || !out_metadata) + return -EINVAL; + + /* Calculate the bytes needed to tightly pack the metadata */ + + /* Top level metadata */ + size = 0; + size += sizeof(struct kbase_hwcnt_metadata); + + /* Group metadata */ + size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + + /* Block metadata */ + for (grp = 0; grp < desc->grp_cnt; grp++) { + size += sizeof(struct kbase_hwcnt_block_metadata) * + desc->grps[grp].blk_cnt; + } + + /* Single allocation for the entire metadata */ + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Use the allocated memory for the metadata and its members */ + + /* Bump allocate the top level metadata */ + offset = 0; + metadata = (struct kbase_hwcnt_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_metadata); + + /* Bump allocate the group metadata */ + grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + + enable_map_count = 0; + dump_buf_count = 0; + avail_mask_bits = 0; + + for (grp = 0; grp < desc->grp_cnt; grp++) { + size_t blk; + + const struct kbase_hwcnt_group_description *grp_desc = + desc->grps + grp; + struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp; + + size_t group_enable_map_count = 0; + size_t group_dump_buffer_count = 0; + size_t group_avail_mask_bits = 0; + + /* Bump allocate this group's block metadata */ + struct kbase_hwcnt_block_metadata *blk_mds = + (struct kbase_hwcnt_block_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_block_metadata) * + grp_desc->blk_cnt; + + /* Fill in each block in the group's information */ + for (blk = 0; blk < grp_desc->blk_cnt; blk++) { + const struct kbase_hwcnt_block_description *blk_desc = + grp_desc->blks + blk; + struct kbase_hwcnt_block_metadata *blk_md = + blk_mds + blk; + const size_t n_values = + blk_desc->hdr_cnt + blk_desc->ctr_cnt; + + blk_md->type = blk_desc->type; + blk_md->inst_cnt = blk_desc->inst_cnt; + blk_md->hdr_cnt = blk_desc->hdr_cnt; + blk_md->ctr_cnt = blk_desc->ctr_cnt; + blk_md->enable_map_index = group_enable_map_count; + blk_md->enable_map_stride = + kbase_hwcnt_bitfield_count(n_values); + blk_md->dump_buf_index = group_dump_buffer_count; + blk_md->dump_buf_stride = + KBASE_HWCNT_ALIGN_UPWARDS( + n_values, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + blk_md->avail_mask_index = group_avail_mask_bits; + + group_enable_map_count += + blk_md->enable_map_stride * blk_md->inst_cnt; + group_dump_buffer_count += + blk_md->dump_buf_stride * blk_md->inst_cnt; + group_avail_mask_bits += blk_md->inst_cnt; + } + + /* Fill in the group's information */ + grp_md->type = grp_desc->type; + grp_md->blk_cnt = grp_desc->blk_cnt; + grp_md->blk_metadata = blk_mds; + grp_md->enable_map_index = enable_map_count; + grp_md->dump_buf_index = dump_buf_count; + grp_md->avail_mask_index = avail_mask_bits; + + enable_map_count += group_enable_map_count; + dump_buf_count += group_dump_buffer_count; + avail_mask_bits += group_avail_mask_bits; + } + + /* Fill in the top level metadata's information */ + metadata->grp_cnt = desc->grp_cnt; + metadata->grp_metadata = grp_mds; + metadata->enable_map_bytes = + enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; + metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; + metadata->avail_mask = desc->avail_mask; + + WARN_ON(size != offset); + /* Due to the block alignment, there should be exactly one enable map + * bit per 4 bytes in the dump buffer. + */ + WARN_ON(metadata->dump_buf_bytes != + (metadata->enable_map_bytes * + BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); + + *out_metadata = metadata; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_create); + +void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) +{ + kfree(metadata); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_destroy); + +int kbase_hwcnt_enable_map_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map) +{ + u64 *enable_map_buf; + + if (!metadata || !enable_map) + return -EINVAL; + + enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL); + if (!enable_map_buf) + return -ENOMEM; + + enable_map->metadata = metadata; + enable_map->enable_map = enable_map_buf; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc); + +void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) +{ + if (!enable_map) + return; + + kfree(enable_map->enable_map); + enable_map->enable_map = NULL; + enable_map->metadata = NULL; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free); + +int kbase_hwcnt_dump_buffer_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + u32 *buf; + + if (!metadata || !dump_buf) + return -EINVAL; + + buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + dump_buf->metadata = metadata; + dump_buf->dump_buf = buf; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc); + +void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) +{ + if (!dump_buf) + return; + + kfree(dump_buf->dump_buf); + memset(dump_buf, 0, sizeof(*dump_buf)); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_free); + +int kbase_hwcnt_dump_buffer_array_alloc( + const struct kbase_hwcnt_metadata *metadata, + size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs) +{ + struct kbase_hwcnt_dump_buffer *buffers; + size_t buf_idx; + unsigned int order; + unsigned long addr; + + if (!metadata || !dump_bufs) + return -EINVAL; + + /* Allocate memory for the dump buffer struct array */ + buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); + if (!buffers) + return -ENOMEM; + + /* Allocate pages for the actual dump buffers, as they tend to be fairly + * large. + */ + order = get_order(metadata->dump_buf_bytes * n); + addr = __get_free_pages(GFP_KERNEL, order); + + if (!addr) { + kfree(buffers); + return -ENOMEM; + } + + dump_bufs->page_addr = addr; + dump_bufs->page_order = order; + dump_bufs->buf_cnt = n; + dump_bufs->bufs = buffers; + + /* Set the buffer of each dump buf */ + for (buf_idx = 0; buf_idx < n; buf_idx++) { + const size_t offset = metadata->dump_buf_bytes * buf_idx; + + buffers[buf_idx].metadata = metadata; + buffers[buf_idx].dump_buf = (u32 *)(addr + offset); + } + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_alloc); + +void kbase_hwcnt_dump_buffer_array_free( + struct kbase_hwcnt_dump_buffer_array *dump_bufs) +{ + if (!dump_bufs) + return; + + kfree(dump_bufs->bufs); + free_pages(dump_bufs->page_addr, dump_bufs->page_order); + memset(dump_bufs, 0, sizeof(*dump_bufs)); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_free); + +void kbase_hwcnt_dump_buffer_zero( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + size_t val_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero); + +void kbase_hwcnt_dump_buffer_zero_strict( + struct kbase_hwcnt_dump_buffer *dst) +{ + if (WARN_ON(!dst)) + return; + + memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict); + +void kbase_hwcnt_dump_buffer_zero_non_enabled( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + + if (kbase_hwcnt_metadata_block_instance_avail( + metadata, grp, blk, blk_inst)) { + /* Block available, so only zero non-enabled values */ + kbase_hwcnt_dump_buffer_block_zero_non_enabled( + dst_blk, blk_em, val_cnt); + } else { + /* Block not available, so zero the entire thing */ + kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_non_enabled); + +void kbase_hwcnt_dump_buffer_copy( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + const u32 *src_blk; + size_t val_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy); + +void kbase_hwcnt_dump_buffer_copy_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + + kbase_hwcnt_dump_buffer_block_copy_strict( + dst_blk, src_blk, blk_em, val_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict); + +void kbase_hwcnt_dump_buffer_accumulate( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + const u32 *src_blk; + size_t hdr_cnt; + size_t ctr_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + hdr_cnt = kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + ctr_cnt = kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, ctr_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate); + +void kbase_hwcnt_dump_buffer_accumulate_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + /* Align upwards to include padding bytes */ + ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES) - hdr_cnt); + + kbase_hwcnt_dump_buffer_block_accumulate_strict( + dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict); diff --git a/mali_kbase/mali_kbase_hwcnt_types.h b/mali_kbase/mali_kbase_hwcnt_types.h new file mode 100644 index 0000000..4d78c84 --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_types.h @@ -0,0 +1,1087 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter types. + * Contains structures for describing the physical layout of hardware counter + * dump buffers and enable maps within a system. + * + * Also contains helper functions for manipulation of these dump buffers and + * enable maps. + * + * Through use of these structures and functions, hardware counters can be + * enabled, copied, accumulated, and generally manipulated in a generic way, + * regardless of the physical counter dump layout. + * + * Terminology: + * + * Hardware Counter System: + * A collection of hardware counter groups, making a full hardware counter + * system. + * Hardware Counter Group: + * A group of Hardware Counter Blocks (e.g. a t62x might have more than one + * core group, so has one counter group per core group, where each group + * may have a different number and layout of counter blocks). + * Hardware Counter Block: + * A block of hardware counters (e.g. shader block, tiler block). + * Hardware Counter Block Instance: + * An instance of a Hardware Counter Block (e.g. an MP4 GPU might have + * 4 shader block instances). + * + * Block Header: + * A header value inside a counter block. Headers don't count anything, + * so it is only valid to copy or zero them. Headers are always the first + * values in the block. + * Block Counter: + * A counter value inside a counter block. Counters can be zeroed, copied, + * or accumulated. Counters are always immediately after the headers in the + * block. + * Block Value: + * A catch-all term for block headers and block counters. + * + * Enable Map: + * An array of u64 bitfields, where each bit either enables exactly one + * block value, or is unused (padding). + * Dump Buffer: + * An array of u32 values, where each u32 corresponds either to one block + * value, or is unused (padding). + * Availability Mask: + * A bitfield, where each bit corresponds to whether a block instance is + * physically available (e.g. an MP3 GPU may have a sparse core mask of + * 0b1011, meaning it only has 3 cores but for hardware counter dumps has the + * same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this + * case, the availability mask might be 0b1011111 (the exact layout will + * depend on the specific hardware architecture), with the 3 extra early bits + * corresponding to other block instances in the hardware counter system). + * Metadata: + * Structure describing the physical layout of the enable map and dump buffers + * for a specific hardware counter system. + * + */ + +#ifndef _KBASE_HWCNT_TYPES_H_ +#define _KBASE_HWCNT_TYPES_H_ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/types.h> +#include "mali_malisw.h" + +/* Number of bytes in each bitfield */ +#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64)) + +/* Number of bits in each bitfield */ +#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE) + +/* Number of bytes for each counter value */ +#define KBASE_HWCNT_VALUE_BYTES (sizeof(u32)) + +/* Number of bits in an availability mask (i.e. max total number of block + * instances supported in a Hardware Counter System) + */ +#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) + +/** + * struct kbase_hwcnt_block_description - Description of one or more identical, + * contiguous, Hardware Counter Blocks. + * @type: The arbitrary identifier used to identify the type of the block. + * @inst_cnt: The number of Instances of the block. + * @hdr_cnt: The number of 32-bit Block Headers in the block. + * @ctr_cnt: The number of 32-bit Block Counters in the block. + */ +struct kbase_hwcnt_block_description { + u64 type; + size_t inst_cnt; + size_t hdr_cnt; + size_t ctr_cnt; +}; + +/** + * struct kbase_hwcnt_group_description - Description of one or more identical, + * contiguous Hardware Counter Groups. + * @type: The arbitrary identifier used to identify the type of the group. + * @blk_cnt: The number of types of Hardware Counter Block in the group. + * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, + * describing each type of Hardware Counter Block in the group. + */ +struct kbase_hwcnt_group_description { + u64 type; + size_t blk_cnt; + const struct kbase_hwcnt_block_description *blks; +}; + +/** + * struct kbase_hwcnt_description - Description of a Hardware Counter System. + * @grp_cnt: The number of Hardware Counter Groups. + * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, + * describing each Hardware Counter Group in the system. + * @avail_mask: Flat Availability Mask for all block instances in the system. + */ +struct kbase_hwcnt_description { + size_t grp_cnt; + const struct kbase_hwcnt_group_description *grps; + u64 avail_mask; +}; + +/** + * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout + * of a block in a Hardware Counter System's + * Dump Buffers and Enable Maps. + * @type: The arbitrary identifier used to identify the type of the + * block. + * @inst_cnt: The number of Instances of the block. + * @hdr_cnt: The number of 32-bit Block Headers in the block. + * @ctr_cnt: The number of 32-bit Block Counters in the block. + * @enable_map_index: Index in u64s into the parent's Enable Map where the + * Enable Map bitfields of the Block Instances described by + * this metadata start. + * @enable_map_stride: Stride in u64s between the Enable Maps of each of the + * Block Instances described by this metadata. + * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the + * Dump Buffers of the Block Instances described by this + * metadata start. + * @dump_buf_stride: Stride in u32s between the Dump Buffers of each of the + * Block Instances described by this metadata. + * @avail_mask_index: Index in bits into the parent's Availability Mask where + * the Availability Masks of the Block Instances described + * by this metadata start. + */ +struct kbase_hwcnt_block_metadata { + u64 type; + size_t inst_cnt; + size_t hdr_cnt; + size_t ctr_cnt; + size_t enable_map_index; + size_t enable_map_stride; + size_t dump_buf_index; + size_t dump_buf_stride; + size_t avail_mask_index; +}; + +/** + * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout + * of a group of blocks in a Hardware + * Counter System's Dump Buffers and Enable + * Maps. + * @type: The arbitrary identifier used to identify the type of the + * group. + * @blk_cnt: The number of types of Hardware Counter Block in the + * group. + * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, + * describing the physical layout of each type of Hardware + * Counter Block in the group. + * @enable_map_index: Index in u64s into the parent's Enable Map where the + * Enable Maps of the blocks within the group described by + * this metadata start. + * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the + * Dump Buffers of the blocks within the group described by + * metadata start. + * @avail_mask_index: Index in bits into the parent's Availability Mask where + * the Availability Masks of the blocks within the group + * described by this metadata start. + */ +struct kbase_hwcnt_group_metadata { + u64 type; + size_t blk_cnt; + const struct kbase_hwcnt_block_metadata *blk_metadata; + size_t enable_map_index; + size_t dump_buf_index; + size_t avail_mask_index; +}; + +/** + * struct kbase_hwcnt_metadata - Metadata describing the physical layout + * of Dump Buffers and Enable Maps within a + * Hardware Counter System. + * @grp_cnt: The number of Hardware Counter Groups. + * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata, + * describing the physical layout of each Hardware Counter + * Group in the system. + * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. + * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. + * @avail_mask: The Availability Mask for the system. + */ +struct kbase_hwcnt_metadata { + size_t grp_cnt; + const struct kbase_hwcnt_group_metadata *grp_metadata; + size_t enable_map_bytes; + size_t dump_buf_bytes; + u64 avail_mask; +}; + +/** + * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64 + * bitfields. + * @metadata: Non-NULL pointer to metadata used to identify, and to describe + * the layout of the enable map. + * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array + * of u64 bitfields, each bit of which enables one hardware + * counter. + */ +struct kbase_hwcnt_enable_map { + const struct kbase_hwcnt_metadata *metadata; + u64 *enable_map; +}; + +/** + * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. Array of u32 + * values. + * @metadata: Non-NULL pointer to metadata used to identify, and to describe + * the layout of the Dump Buffer. + * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array + * of u32 values. + */ +struct kbase_hwcnt_dump_buffer { + const struct kbase_hwcnt_metadata *metadata; + u32 *dump_buf; +}; + +/** + * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array. + * @page_addr: Address of allocated pages. A single allocation is used for all + * Dump Buffers in the array. + * @page_order: The allocation order of the pages. + * @buf_cnt: The number of allocated Dump Buffers. + * @bufs: Non-NULL pointer to the array of Dump Buffers. + */ +struct kbase_hwcnt_dump_buffer_array { + unsigned long page_addr; + unsigned int page_order; + size_t buf_cnt; + struct kbase_hwcnt_dump_buffer *bufs; +}; + +/** + * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object + * from a description. + * @desc: Non-NULL pointer to a hardware counter description. + * @metadata: Non-NULL pointer to where created metadata will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_metadata_create( + const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **metadata); + +/** + * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object. + * @metadata: Pointer to hardware counter metadata + */ +void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_metadata_group_count() - Get the number of groups. + * @metadata: Non-NULL pointer to metadata. + * + * Return: Number of hardware counter groups described by metadata. + */ +#define kbase_hwcnt_metadata_group_count(metadata) \ + ((metadata)->grp_cnt) + +/** + * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * + * Return: Type of the group grp. + */ +#define kbase_hwcnt_metadata_group_type(metadata, grp) \ + ((metadata)->grp_metadata[(grp)].type) + +/** + * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * + * Return: Number of blocks in group grp. + */ +#define kbase_hwcnt_metadata_block_count(metadata, grp) \ + ((metadata)->grp_metadata[(grp)].blk_cnt) + +/** + * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Type of the block blk in group grp. + */ +#define kbase_hwcnt_metadata_block_type(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].type) + +/** + * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of + * a block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of instances of block blk in group grp. + */ +#define kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].inst_cnt) + +/** + * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter + * headers. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 counter headers in each instance of block blk in + * group grp. + */ +#define kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].hdr_cnt) + +/** + * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 counters in each instance of block blk in group + * grp. + */ +#define kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].ctr_cnt) + +/** + * kbase_hwcnt_metadata_block_values_count() - Get the number of values. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 headers plus counters in each instance of block blk + * in group grp. + */ +#define kbase_hwcnt_metadata_block_values_count(metadata, grp, blk) \ + (kbase_hwcnt_metadata_block_counters_count((metadata), (grp), (blk)) \ + + kbase_hwcnt_metadata_block_headers_count((metadata), (grp), (blk))) + +/** + * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in + * the metadata. + * @md: Non-NULL pointer to metadata. + * @grp: size_t variable used as group iterator. + * @blk: size_t variable used as block iterator. + * @blk_inst: size_t variable used as block instance iterator. + * + * Iteration order is group, then block, then block instance (i.e. linearly + * through memory). + */ +#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ + for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ + for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ + for ((blk_inst) = 0; (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); (blk_inst)++) + +/** + * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail + * mask corresponding to the block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: The bit index into the avail mask for the block. + */ +static inline size_t kbase_hwcnt_metadata_block_avail_bit( + const struct kbase_hwcnt_metadata *metadata, + size_t grp, + size_t blk) +{ + const size_t bit = + metadata->grp_metadata[grp].avail_mask_index + + metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index; + + return bit; +} + +/** + * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is + * available. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: true if the block instance is available, else false. + */ +static inline bool kbase_hwcnt_metadata_block_instance_avail( + const struct kbase_hwcnt_metadata *metadata, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t bit = kbase_hwcnt_metadata_block_avail_bit( + metadata, grp, blk) + blk_inst; + const u64 mask = 1ull << bit; + + return (metadata->avail_mask & mask) != 0; +} + +/** + * kbase_hwcnt_enable_map_alloc() - Allocate an enable map. + * @metadata: Non-NULL pointer to metadata describing the system. + * @enable_map: Non-NULL pointer to enable map to be initialised. Will be + * initialised to all zeroes (i.e. all counters disabled). + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_enable_map_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map); + +/** + * kbase_hwcnt_enable_map_free() - Free an enable map. + * @enable_map: Enable map to be freed. + * + * Can be safely called on an all-zeroed enable map structure, or on an already + * freed enable map. + */ +void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); + +/** + * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block + * instance's enable map. + * @map: Non-NULL pointer to (const) enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: (const) u64* to the bitfield(s) used as the enable map for the + * block instance. + */ +#define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \ + ((map)->enable_map + \ + (map)->metadata->grp_metadata[(grp)].enable_map_index + \ + (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \ + (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst)) + +/** + * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required + * to have at minimum one bit per value. + * @val_cnt: Number of values. + * + * Return: Number of required bitfields. + */ +static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) +{ + return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / + KBASE_HWCNT_BITFIELD_BITS; +} + +/** + * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block. + * @dst: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + */ +static inline void kbase_hwcnt_enable_map_block_disable_all( + struct kbase_hwcnt_enable_map *dst, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + dst->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES); +} + +/** + * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map. + * @dst: Non-NULL pointer to enable map to zero. + */ +static inline void kbase_hwcnt_enable_map_disable_all( + struct kbase_hwcnt_enable_map *dst) +{ + memset(dst->enable_map, 0, dst->metadata->enable_map_bytes); +} + +/** + * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block. + * @dst: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + */ +static inline void kbase_hwcnt_enable_map_block_enable_all( + struct kbase_hwcnt_enable_map *dst, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + dst->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + size_t bitfld_idx; + + for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { + const u64 remaining_values = val_cnt - + (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + u64 block_enable_map_mask = U64_MAX; + + if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) + block_enable_map_mask = (1ull << remaining_values) - 1; + + block_enable_map[bitfld_idx] = block_enable_map_mask; + } +} + +/** + * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in an enable + * map. + * @dst: Non-NULL pointer to enable map. + */ +static inline void kbase_hwcnt_enable_map_enable_all( + struct kbase_hwcnt_enable_map *dst) +{ + size_t grp, blk, blk_inst; + + kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) + kbase_hwcnt_enable_map_block_enable_all( + dst, grp, blk, blk_inst); +} + +/** + * kbase_hwcnt_enable_map_copy() - Copy an enable map to another. + * @dst: Non-NULL pointer to destination enable map. + * @src: Non-NULL pointer to source enable map. + * + * The dst and src MUST have been created from the same metadata. + */ +static inline void kbase_hwcnt_enable_map_copy( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + memcpy(dst->enable_map, + src->enable_map, + dst->metadata->enable_map_bytes); +} + +/** + * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst. + * @dst: Non-NULL pointer to destination enable map. + * @src: Non-NULL pointer to source enable map. + * + * The dst and src MUST have been created from the same metadata. + */ +static inline void kbase_hwcnt_enable_map_union( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + const size_t bitfld_count = + dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES; + size_t i; + + for (i = 0; i < bitfld_count; i++) + dst->enable_map[i] |= src->enable_map[i]; +} + +/** + * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block + * instance are enabled. + * @enable_map: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: true if any values in the block are enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_block_enabled( + const struct kbase_hwcnt_enable_map *enable_map, + size_t grp, + size_t blk, + size_t blk_inst) +{ + bool any_enabled = false; + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + enable_map->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + const u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + enable_map, grp, blk, blk_inst); + + size_t bitfld_idx; + + for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { + const u64 remaining_values = val_cnt - + (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + u64 block_enable_map_mask = U64_MAX; + + if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) + block_enable_map_mask = (1ull << remaining_values) - 1; + + any_enabled = any_enabled || + (block_enable_map[bitfld_idx] & block_enable_map_mask); + } + + return any_enabled; +} + +/** + * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled. + * @enable_map: Non-NULL pointer to enable map. + * + * Return: true if any values are enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_any_enabled( + const struct kbase_hwcnt_enable_map *enable_map) +{ + size_t grp, blk, blk_inst; + + kbase_hwcnt_metadata_for_each_block( + enable_map->metadata, grp, blk, blk_inst) { + if (kbase_hwcnt_enable_map_block_enabled( + enable_map, grp, blk, blk_inst)) + return true; + } + + return false; +} + +/** + * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block + * instance is enabled. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to check in the block instance. + * + * Return: true if the value was enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_block_value_enabled( + const u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + return (bitfld[idx] & mask) != 0; +} + +/** + * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block + * instance. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to enable in the block instance. + */ +static inline void kbase_hwcnt_enable_map_block_enable_value( + u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + bitfld[idx] |= mask; +} + +/** + * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block + * instance. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to disable in the block instance. + */ +static inline void kbase_hwcnt_enable_map_block_disable_value( + u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + bitfld[idx] &= ~mask; +} + +/** + * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer. + * @metadata: Non-NULL pointer to metadata describing the system. + * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be + * initialised to undefined values, so must be used as a copy dest, + * or cleared before use. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_free() - Free a dump buffer. + * @dump_buf: Dump buffer to be freed. + * + * Can be safely called on an all-zeroed dump buffer structure, or on an already + * freed dump buffer. + */ +void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. + * @metadata: Non-NULL pointer to metadata describing the system. + * @n: Number of dump buffers to allocate + * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. Each + * dump buffer in the array will be initialised to undefined values, + * so must be used as a copy dest, or cleared before use. + * + * A single contiguous page allocation will be used for all of the buffers + * inside the array, where: + * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_array_alloc( + const struct kbase_hwcnt_metadata *metadata, + size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array. + * @dump_bufs: Dump buffer array to be freed. + * + * Can be safely called on an all-zeroed dump buffer array structure, or on an + * already freed dump buffer array. + */ +void kbase_hwcnt_dump_buffer_array_free( + struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block + * instance's dump buffer. + * @buf: Non-NULL pointer to (const) dump buffer. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: (const) u32* to the dump buffer for the block instance. + */ +#define kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst) \ + ((buf)->dump_buf + \ + (buf)->metadata->grp_metadata[(grp)].dump_buf_index + \ + (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_index + \ + (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_stride * (blk_inst)) + +/** + * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst. + * After the operation, all non-enabled values + * will be undefined. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_zero( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_zero( + u32 *dst_blk, + size_t val_cnt) +{ + memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); +} + +/** + * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst. + * After the operation, all values + * (including padding bytes) will be + * zero. + * Slower than the non-strict variant. + * @dst: Non-NULL pointer to dump buffer. + */ +void kbase_hwcnt_dump_buffer_zero_strict( + struct kbase_hwcnt_dump_buffer *dst); + +/** + * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in + * dst (including padding bytes and + * unavailable blocks). + * After the operation, all enabled + * values will be unchanged. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_zero_non_enabled( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled + * values in a block. + * After the operation, all + * enabled values will be + * unchanged. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled( + u32 *dst_blk, + const u64 *blk_em, + size_t val_cnt) +{ + size_t val; + + for (val = 0; val < val_cnt; val++) { + if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val)) + dst_blk[val] = 0; + } +} + +/** + * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst. + * After the operation, all non-enabled values + * will be undefined. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_copy( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_copy( + u32 *dst_blk, + const u32 *src_blk, + size_t val_cnt) +{ + /* Copy all the counters in the block instance. + * Values of non-enabled counters are undefined. + */ + memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); +} + +/** + * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to + * dst. + * After the operation, all non-enabled + * values (including padding bytes) will + * be zero. + * Slower than the non-strict variant. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_copy_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values + * from src to dst. + * After the operation, all + * non-enabled values will be + * zero. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_cnt: Number of values in the block. + * + * After the copy, any disabled values in dst will be zero. + */ +static inline void kbase_hwcnt_dump_buffer_block_copy_strict( + u32 *dst_blk, + const u32 *src_blk, + const u64 *blk_em, + size_t val_cnt) +{ + size_t val; + + for (val = 0; val < val_cnt; val++) { + bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled( + blk_em, val); + + dst_blk[val] = val_enabled ? src_blk[val] : 0; + } +} + +/** + * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and + * accumulate all enabled counters from + * src to dst. + * After the operation, all non-enabled + * values will be undefined. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_accumulate( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and + * accumulate all block counters + * from src to dst. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @hdr_cnt: Number of headers in the block. + * @ctr_cnt: Number of counters in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_accumulate( + u32 *dst_blk, + const u32 *src_blk, + size_t hdr_cnt, + size_t ctr_cnt) +{ + size_t ctr; + /* Copy all the headers in the block instance. + * Values of non-enabled headers are undefined. + */ + memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES); + + /* Accumulate all the counters in the block instance. + * Values of non-enabled counters are undefined. + */ + for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { + u32 *dst_ctr = dst_blk + ctr; + const u32 *src_ctr = src_blk + ctr; + + const u32 src_counter = *src_ctr; + const u32 dst_counter = *dst_ctr; + + /* Saturating add */ + u32 accumulated = src_counter + dst_counter; + + if (accumulated < src_counter) + accumulated = U32_MAX; + + *dst_ctr = accumulated; + } +} + +/** + * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and + * accumulate all enabled counters + * from src to dst. + * After the operation, all + * non-enabled values (including + * padding bytes) will be zero. + * Slower than the non-strict + * variant. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_accumulate_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block + * headers and accumulate + * all block counters from + * src to dst. + * After the operation, all + * non-enabled values will + * be zero. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @hdr_cnt: Number of headers in the block. + * @ctr_cnt: Number of counters in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict( + u32 *dst_blk, + const u32 *src_blk, + const u64 *blk_em, + size_t hdr_cnt, + size_t ctr_cnt) +{ + size_t ctr; + + kbase_hwcnt_dump_buffer_block_copy_strict( + dst_blk, src_blk, blk_em, hdr_cnt); + + for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { + bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled( + blk_em, ctr); + + u32 *dst_ctr = dst_blk + ctr; + const u32 *src_ctr = src_blk + ctr; + + const u32 src_counter = *src_ctr; + const u32 dst_counter = *dst_ctr; + + /* Saturating add */ + u32 accumulated = src_counter + dst_counter; + + if (accumulated < src_counter) + accumulated = U32_MAX; + + *dst_ctr = ctr_enabled ? accumulated : 0; + } +} + +#endif /* _KBASE_HWCNT_TYPES_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_virtualizer.c b/mali_kbase/mali_kbase_hwcnt_virtualizer.c new file mode 100644 index 0000000..26e9852 --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_virtualizer.c @@ -0,0 +1,688 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_accumulator.h" +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" +#include "mali_kbase_linux.h" + +#include <linux/mutex.h> +#include <linux/slab.h> + +/** + * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure. + * @hctx: Hardware counter context being virtualized. + * @metadata: Hardware counter metadata. + * @lock: Lock acquired at all entrypoints, to protect mutable state. + * @client_count: Current number of virtualizer clients. + * @clients: List of virtualizer clients. + * @accum: Hardware counter accumulator. NULL if no clients. + * @scratch_map: Enable map used as scratch space during counter changes. + * @scratch_buf: Dump buffer used as scratch space during dumps. + */ +struct kbase_hwcnt_virtualizer { + struct kbase_hwcnt_context *hctx; + const struct kbase_hwcnt_metadata *metadata; + struct mutex lock; + size_t client_count; + struct list_head clients; + struct kbase_hwcnt_accumulator *accum; + struct kbase_hwcnt_enable_map scratch_map; + struct kbase_hwcnt_dump_buffer scratch_buf; +}; + +/** + * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure. + * @node: List node used for virtualizer client list. + * @hvirt: Hardware counter virtualizer. + * @enable_map: Enable map with client's current enabled counters. + * @accum_buf: Dump buffer with client's current accumulated counters. + * @has_accum: True if accum_buf contains any accumulated counters. + * @ts_start_ns: Counter collection start time of current dump. + */ +struct kbase_hwcnt_virtualizer_client { + struct list_head node; + struct kbase_hwcnt_virtualizer *hvirt; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer accum_buf; + bool has_accum; + u64 ts_start_ns; +}; + +const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( + struct kbase_hwcnt_virtualizer *hvirt) +{ + if (!hvirt) + return NULL; + + return hvirt->metadata; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_metadata); + +/** + * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory. + * @hvcli: Pointer to virtualizer client. + * + * Will safely free a client in any partial state of construction. + */ +static void kbasep_hwcnt_virtualizer_client_free( + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + if (!hvcli) + return; + + kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf); + kbase_hwcnt_enable_map_free(&hvcli->enable_map); + kfree(hvcli); +} + +/** + * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer + * client. + * @metadata: Non-NULL pointer to counter metadata. + * @out_hvcli: Non-NULL pointer to where created client will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_client_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_virtualizer_client **out_hvcli) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *hvcli = NULL; + + WARN_ON(!metadata); + WARN_ON(!out_hvcli); + + hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL); + if (!hvcli) + return -ENOMEM; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map); + if (errcode) + goto error; + + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf); + if (errcode) + goto error; + + *out_hvcli = hvcli; + return 0; +error: + kbasep_hwcnt_virtualizer_client_free(hvcli); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a + * client's accumulation buffer. + * @hvcli: Non-NULL pointer to virtualizer client. + * @dump_buf: Non-NULL pointer to dump buffer to accumulate from. + */ +static void kbasep_hwcnt_virtualizer_client_accumulate( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_dump_buffer *dump_buf) +{ + WARN_ON(!hvcli); + WARN_ON(!dump_buf); + lockdep_assert_held(&hvcli->hvirt->lock); + + if (hvcli->has_accum) { + /* If already some accumulation, accumulate */ + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, dump_buf, &hvcli->enable_map); + } else { + /* If no accumulation, copy */ + kbase_hwcnt_dump_buffer_copy( + &hvcli->accum_buf, dump_buf, &hvcli->enable_map); + } + hvcli->has_accum = true; +} + +/** + * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter + * accumulator after final client + * removal. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Will safely terminate the accumulator in any partial state of initialisation. + */ +static void kbasep_hwcnt_virtualizer_accumulator_term( + struct kbase_hwcnt_virtualizer *hvirt) +{ + WARN_ON(!hvirt); + lockdep_assert_held(&hvirt->lock); + WARN_ON(hvirt->client_count); + + kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf); + kbase_hwcnt_enable_map_free(&hvirt->scratch_map); + kbase_hwcnt_accumulator_release(hvirt->accum); + hvirt->accum = NULL; +} + +/** + * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter + * accumulator before first client + * addition. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_accumulator_init( + struct kbase_hwcnt_virtualizer *hvirt) +{ + int errcode; + + WARN_ON(!hvirt); + lockdep_assert_held(&hvirt->lock); + WARN_ON(hvirt->client_count); + WARN_ON(hvirt->accum); + + errcode = kbase_hwcnt_accumulator_acquire( + hvirt->hctx, &hvirt->accum); + if (errcode) + goto error; + + errcode = kbase_hwcnt_enable_map_alloc( + hvirt->metadata, &hvirt->scratch_map); + if (errcode) + goto error; + + errcode = kbase_hwcnt_dump_buffer_alloc( + hvirt->metadata, &hvirt->scratch_buf); + if (errcode) + goto error; + + return 0; +error: + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the + * virtualizer. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client to add. + * @enable_map: Non-NULL pointer to client's initial enable map. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_client_add( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode = 0; + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!enable_map); + lockdep_assert_held(&hvirt->lock); + + if (hvirt->client_count == 0) + /* First client added, so initialise the accumulator */ + errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt); + if (errcode) + return errcode; + + hvirt->client_count += 1; + + if (hvirt->client_count == 1) { + /* First client, so just pass the enable map onwards as is */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + enable_map, &ts_start_ns, &ts_end_ns, NULL); + } else { + struct kbase_hwcnt_virtualizer_client *pos; + + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_copy( + &hvirt->scratch_map, enable_map); + list_for_each_entry(pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); + /* Accumulate into only existing clients' accumulation bufs */ + if (!errcode) + list_for_each_entry(pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + } + if (errcode) + goto error; + + list_add(&hvcli->node, &hvirt->clients); + hvcli->hvirt = hvirt; + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); + hvcli->has_accum = false; + hvcli->ts_start_ns = ts_end_ns; + + return 0; +error: + hvirt->client_count -= 1; + if (hvirt->client_count == 0) + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the + * virtualizer. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client to remove. + */ +static void kbasep_hwcnt_virtualizer_client_remove( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + int errcode = 0; + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + lockdep_assert_held(&hvirt->lock); + + list_del(&hvcli->node); + hvirt->client_count -= 1; + + if (hvirt->client_count == 0) { + /* Last client removed, so terminate the accumulator */ + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + } else { + struct kbase_hwcnt_virtualizer_client *pos; + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map); + list_for_each_entry(pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); + /* Accumulate into remaining clients' accumulation bufs */ + if (!errcode) + list_for_each_entry(pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + } + WARN_ON(errcode); +} + +/** + * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's + * currently enabled counters, + * and enable a new set of + * counters that will be used for + * subsequent dumps. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @enable_map: Non-NULL pointer to the new counter enable map for the client. + * Must have the same metadata as the virtualizer. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *pos; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!enable_map); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(enable_map->metadata != hvirt->metadata); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); + list_for_each_entry(pos, &hvirt->clients, node) + /* Ignore the enable map of the selected client */ + if (pos != hvcli) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, ts_start_ns, ts_end_ns, + &hvirt->scratch_buf); + if (errcode) + return errcode; + + /* Accumulate into all accumulation bufs except the selected client's */ + list_for_each_entry(pos, &hvirt->clients, node) + if (pos != hvcli) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + + /* Finally, write into the dump buf */ + if (dump_buf) { + const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; + + if (hvcli->has_accum) { + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, src, &hvcli->enable_map); + src = &hvcli->accum_buf; + } + kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); + } + hvcli->has_accum = false; + + /* Update the selected client's enable map */ + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + return errcode; +} + +int kbase_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer *hvirt; + + if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hvirt = hvcli->hvirt; + + if ((enable_map->metadata != hvirt->metadata) || + (dump_buf && (dump_buf->metadata != hvirt->metadata))) + return -EINVAL; + + mutex_lock(&hvirt->lock); + + if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { + /* + * If there's only one client with no prior accumulation, we can + * completely skip the virtualize and just pass through the call + * to the accumulator, saving a fair few copies and + * accumulations. + */ + errcode = kbase_hwcnt_accumulator_set_counters( + hvirt->accum, enable_map, + ts_start_ns, ts_end_ns, dump_buf); + + if (!errcode) { + /* Update the selected client's enable map */ + kbase_hwcnt_enable_map_copy( + &hvcli->enable_map, enable_map); + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + } + } else { + /* Otherwise, do the full virtualize */ + errcode = kbasep_hwcnt_virtualizer_client_set_counters( + hvirt, hvcli, enable_map, + ts_start_ns, ts_end_ns, dump_buf); + } + + mutex_unlock(&hvirt->lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_set_counters); + +/** + * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's + * currently enabled counters. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *pos; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + /* Perform the dump */ + errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, + ts_start_ns, ts_end_ns, &hvirt->scratch_buf); + if (errcode) + return errcode; + + /* Accumulate into all accumulation bufs except the selected client's */ + list_for_each_entry(pos, &hvirt->clients, node) + if (pos != hvcli) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + + /* Finally, write into the dump buf */ + if (dump_buf) { + const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; + + if (hvcli->has_accum) { + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, src, &hvcli->enable_map); + src = &hvcli->accum_buf; + } + kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); + } + hvcli->has_accum = false; + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + return errcode; +} + +int kbase_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer *hvirt; + + if (!hvcli || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hvirt = hvcli->hvirt; + + if (dump_buf && (dump_buf->metadata != hvirt->metadata)) + return -EINVAL; + + mutex_lock(&hvirt->lock); + + if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { + /* + * If there's only one client with no prior accumulation, we can + * completely skip the virtualize and just pass through the call + * to the accumulator, saving a fair few copies and + * accumulations. + */ + errcode = kbase_hwcnt_accumulator_dump( + hvirt->accum, ts_start_ns, ts_end_ns, dump_buf); + + if (!errcode) { + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + } + } else { + /* Otherwise, do the full virtualize */ + errcode = kbasep_hwcnt_virtualizer_client_dump( + hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); + } + + mutex_unlock(&hvirt->lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_dump); + +int kbase_hwcnt_virtualizer_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *hvcli; + + if (!hvirt || !enable_map || !out_hvcli || + (enable_map->metadata != hvirt->metadata)) + return -EINVAL; + + errcode = kbasep_hwcnt_virtualizer_client_alloc( + hvirt->metadata, &hvcli); + if (errcode) + return errcode; + + mutex_lock(&hvirt->lock); + + errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map); + + mutex_unlock(&hvirt->lock); + + if (errcode) { + kbasep_hwcnt_virtualizer_client_free(hvcli); + return errcode; + } + + *out_hvcli = hvcli; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_create); + +void kbase_hwcnt_virtualizer_client_destroy( + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + if (!hvcli) + return; + + mutex_lock(&hvcli->hvirt->lock); + + kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli); + + mutex_unlock(&hvcli->hvirt->lock); + + kbasep_hwcnt_virtualizer_client_free(hvcli); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy); + +int kbase_hwcnt_virtualizer_init( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_virtualizer **out_hvirt) +{ + struct kbase_hwcnt_virtualizer *virt; + const struct kbase_hwcnt_metadata *metadata; + + if (!hctx || !out_hvirt) + return -EINVAL; + + metadata = kbase_hwcnt_context_metadata(hctx); + if (!metadata) + return -EINVAL; + + virt = kzalloc(sizeof(*virt), GFP_KERNEL); + if (!virt) + return -ENOMEM; + + virt->hctx = hctx; + virt->metadata = metadata; + + mutex_init(&virt->lock); + INIT_LIST_HEAD(&virt->clients); + + *out_hvirt = virt; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_init); + +void kbase_hwcnt_virtualizer_term( + struct kbase_hwcnt_virtualizer *hvirt) +{ + if (!hvirt) + return; + + /* Non-zero client count implies client leak */ + if (WARN_ON(hvirt->client_count != 0)) { + struct kbase_hwcnt_virtualizer_client *pos, *n; + + list_for_each_entry_safe(pos, n, &hvirt->clients, node) + kbase_hwcnt_virtualizer_client_destroy(pos); + } + + WARN_ON(hvirt->client_count != 0); + WARN_ON(hvirt->accum); + + kfree(hvirt); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_term); diff --git a/mali_kbase/mali_kbase_hwcnt_virtualizer.h b/mali_kbase/mali_kbase_hwcnt_virtualizer.h new file mode 100644 index 0000000..1efa81d --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_virtualizer.h @@ -0,0 +1,139 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter virtualizer API. + * + * Virtualizes a hardware counter context, so multiple clients can access + * a single hardware counter resource as though each was the exclusive user. + */ + +#ifndef _KBASE_HWCNT_VIRTUALIZER_H_ +#define _KBASE_HWCNT_VIRTUALIZER_H_ + +#include <linux/types.h> + +struct kbase_hwcnt_context; +struct kbase_hwcnt_virtualizer; +struct kbase_hwcnt_virtualizer_client; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer. + * @hctx: Non-NULL pointer to the hardware counter context to virtualize. + * @out_hvirt: Non-NULL pointer to where the pointer to the created virtualizer + * will be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_virtualizer_init( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_virtualizer **out_hvirt); + +/** + * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer. + * @hvirt: Pointer to virtualizer to be terminated. + */ +void kbase_hwcnt_virtualizer_term( + struct kbase_hwcnt_virtualizer *hvirt); + +/** + * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by + * the virtualizer, so related counter data + * structures can be created. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Return: Non-NULL pointer to metadata, or NULL on error. + */ +const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( + struct kbase_hwcnt_virtualizer *hvirt); + +/** + * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @enable_map: Non-NULL pointer to the enable map for the client. Must have the + * same metadata as the virtualizer. + * @out_hvcli: Non-NULL pointer to where the pointer to the created client will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_virtualizer_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli); + +/** + * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client. + * @hvcli: Pointer to the hardware counter client. + */ +void kbase_hwcnt_virtualizer_client_destroy( + struct kbase_hwcnt_virtualizer_client *hvcli); + +/** + * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's + * currently enabled counters, and + * enable a new set of counters + * that will be used for + * subsequent dumps. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @enable_map: Non-NULL pointer to the new counter enable map for the client. + * Must have the same metadata as the virtualizer. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's + * currently enabled counters. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */ diff --git a/mali_kbase/mali_kbase_ioctl.h b/mali_kbase/mali_kbase_ioctl.h index ffc30d8..ccf67df 100644 --- a/mali_kbase/mali_kbase_ioctl.h +++ b/mali_kbase/mali_kbase_ioctl.h @@ -64,9 +64,11 @@ extern "C" { * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags * 11.12: * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS + * 11.13: + * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 12 +#define BASE_UK_VERSION_MINOR 13 /** * struct kbase_ioctl_version_check - Check version compatibility with kernel @@ -673,6 +675,19 @@ union kbase_ioctl_cinstr_gwt_dump { _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) +/** + * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone + * + * @va_pages: Number of VA pages to reserve for EXEC_VA + */ +struct kbase_ioctl_mem_exec_init { + __u64 va_pages; +}; + +#define KBASE_IOCTL_MEM_EXEC_INIT \ + _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) + + /*************** * test ioctls * ***************/ @@ -747,6 +762,21 @@ union kbase_ioctl_cs_event_memory_read { #endif +/* Customer extension range */ +#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2) + +/* If the integration needs extra ioctl add them there + * like this: + * + * struct my_ioctl_args { + * .... + * } + * + * #define KBASE_IOCTL_MY_IOCTL \ + * _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args) + */ + + /********************************** * Definitions for GPU properties * **********************************/ diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c index 29cf193..97d7b43 100644 --- a/mali_kbase/mali_kbase_jd.c +++ b/mali_kbase/mali_kbase_jd.c @@ -804,7 +804,6 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom->extres = NULL; katom->device_nr = user_atom->device_nr; katom->jc = user_atom->jc; - katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; katom->core_req = user_atom->core_req; katom->atom_flags = 0; katom->retry_count = 0; @@ -1219,7 +1218,6 @@ void kbase_jd_done_worker(struct work_struct *data) struct kbasep_js_atom_retained_state katom_retained_state; bool context_idle; base_jd_core_req core_req = katom->core_req; - enum kbase_atom_coreref_state coreref_state = katom->coreref_state; /* Soft jobs should never reach this function */ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); @@ -1365,7 +1363,7 @@ void kbase_jd_done_worker(struct work_struct *data) mutex_unlock(&jctx->lock); } - kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state); + kbase_backend_complete_wq_post_sched(kbdev, core_req); if (context_idle) kbase_pm_context_idle(kbdev); diff --git a/mali_kbase/mali_kbase_jd_debugfs.c b/mali_kbase/mali_kbase_jd_debugfs.c index 271daef..7b15d8a 100644 --- a/mali_kbase/mali_kbase_jd_debugfs.c +++ b/mali_kbase/mali_kbase_jd_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -190,9 +190,8 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) kbasep_jd_debugfs_atom_deps(deps, atom); seq_printf(sfile, - "%3u, %8x, %2u, %2u, %c%3u %c%3u, %20lld, ", + "%3u, %8x, %2u, %c%3u %c%3u, %20lld, ", i, atom->core_req, atom->status, - atom->coreref_state, deps[0].type, deps[0].id, deps[1].type, deps[1].id, start_timestamp); diff --git a/mali_kbase/mali_kbase_jd_debugfs.h b/mali_kbase/mali_kbase_jd_debugfs.h index ce0cb61..697bdef 100644 --- a/mali_kbase/mali_kbase_jd_debugfs.h +++ b/mali_kbase/mali_kbase_jd_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,7 +30,7 @@ #include <linux/debugfs.h> -#define MALI_JD_DEBUGFS_VERSION 2 +#define MALI_JD_DEBUGFS_VERSION 3 /* Forward declarations */ struct kbase_context; diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c index 58a1b4b..80b6d77 100644 --- a/mali_kbase/mali_kbase_js.c +++ b/mali_kbase/mali_kbase_js.c @@ -2259,7 +2259,6 @@ static void js_return_worker(struct work_struct *data) bool context_idle = false; unsigned long flags; base_jd_core_req core_req = katom->core_req; - enum kbase_atom_coreref_state coreref_state = katom->coreref_state; KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom); @@ -2349,7 +2348,7 @@ static void js_return_worker(struct work_struct *data) kbase_js_sched_all(kbdev); - kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state); + kbase_backend_complete_wq_post_sched(kbdev, core_req); } void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) diff --git a/mali_kbase/mali_kbase_js_ctx_attr.c b/mali_kbase/mali_kbase_js_ctx_attr.c index 6fd908a..1ff230c 100644 --- a/mali_kbase/mali_kbase_js_ctx_attr.c +++ b/mali_kbase/mali_kbase_js_ctx_attr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -198,29 +198,6 @@ static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, stru * More commonly used public functions */ -void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx) -{ - bool runpool_state_changed = false; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - /* This context never submits, so don't track any scheduling attributes */ - return; - } - - /* Transfer attributes held in the context flags for contexts that have submit enabled */ - - /* ... More attributes can be added here ... */ - - /* The context should not have been scheduled yet, so ASSERT if this caused - * runpool state changes (note that other threads *can't* affect the value - * of runpool_state_changed, due to how it's calculated) */ - KBASE_DEBUG_ASSERT(runpool_state_changed == false); - CSTD_UNUSED(runpool_state_changed); -} - void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { bool runpool_state_changed; diff --git a/mali_kbase/mali_kbase_js_ctx_attr.h b/mali_kbase/mali_kbase_js_ctx_attr.h index be781e6..25fd397 100644 --- a/mali_kbase/mali_kbase_js_ctx_attr.h +++ b/mali_kbase/mali_kbase_js_ctx_attr.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,14 +46,6 @@ */ /** - * Set the initial attributes of a context (when context create flags are set) - * - * Requires: - * - Hold the jsctx_mutex - */ -void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** * Retain all attributes of a context * * This occurs on scheduling in the context on the runpool (but after diff --git a/mali_kbase/mali_kbase_js_defs.h b/mali_kbase/mali_kbase_js_defs.h index 7385daa..052a0b3 100644 --- a/mali_kbase/mali_kbase_js_defs.h +++ b/mali_kbase/mali_kbase_js_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -151,18 +151,19 @@ typedef u32 kbasep_js_atom_done_code; */ enum { /* - * In this mode, the context containing higher priority atoms will be - * scheduled first and also the new runnable higher priority atoms can - * preempt lower priority atoms currently running on the GPU, even if - * they belong to a different context. + * In this mode, higher priority atoms will be scheduled first, + * regardless of the context they belong to. Newly-runnable higher + * priority atoms can preempt lower priority atoms currently running on + * the GPU, even if they belong to a different context. */ KBASE_JS_SYSTEM_PRIORITY_MODE = 0, /* - * In this mode, the contexts are scheduled in round-robin fashion and - * the new runnable higher priority atoms can preempt the lower priority - * atoms currently running on the GPU, only if they belong to the same - * context. + * In this mode, the highest-priority atom will be chosen from each + * context in turn using a round-robin algorithm, so priority only has + * an effect within the context an atom belongs to. Newly-runnable + * higher priority atoms can preempt the lower priority atoms currently + * running on the GPU, but only if they belong to the same context. */ KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE, diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index 3940024..3d0de90 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -79,21 +79,28 @@ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, { struct rb_root *rbtree = NULL; + /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA + * zone if this has been initialized. + */ + if (gpu_pfn >= kctx->exec_va_start) + rbtree = &kctx->reg_rbtree_exec; + else { + u64 same_va_end; + #ifdef CONFIG_64BIT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) #endif /* CONFIG_64BIT */ - if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE) - rbtree = &kctx->reg_rbtree_custom; - else - rbtree = &kctx->reg_rbtree_same; + same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; #ifdef CONFIG_64BIT - } else { - if (gpu_pfn >= kctx->same_va_end) + else + same_va_end = kctx->same_va_end; +#endif /* CONFIG_64BIT */ + + if (gpu_pfn >= same_va_end) rbtree = &kctx->reg_rbtree_custom; else rbtree = &kctx->reg_rbtree_same; } -#endif /* CONFIG_64BIT */ return rbtree; } @@ -224,7 +231,6 @@ struct kbase_va_region *kbase_find_region_base_address( rbnode = rbnode->rb_right; else return reg; - } return NULL; @@ -615,11 +621,15 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx, kctx->reg_rbtree_same = RB_ROOT; kbase_region_tracker_insert(same_va_reg); - /* Although custom_va_reg doesn't always exist, + /* Although custom_va_reg and exec_va_reg don't always exist, * initialize unconditionally because of the mem_view debugfs - * implementation which relies on this being empty. + * implementation which relies on them being empty. + * + * The difference between the two is that the EXEC_VA region + * is never initialized at this stage. */ kctx->reg_rbtree_custom = RB_ROOT; + kctx->reg_rbtree_exec = RB_ROOT; if (custom_va_reg) kbase_region_tracker_insert(custom_va_reg); @@ -644,6 +654,7 @@ void kbase_region_tracker_term(struct kbase_context *kctx) { kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); } void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) @@ -657,9 +668,6 @@ static size_t kbase_get_same_va_bits(struct kbase_context *kctx) (size_t) kctx->kbdev->gpu_props.mmu.va_bits); } -/** - * Initialize the region tracker data structure. - */ int kbase_region_tracker_init(struct kbase_context *kctx) { struct kbase_va_region *same_va_reg; @@ -709,12 +717,17 @@ int kbase_region_tracker_init(struct kbase_context *kctx) goto fail_free_same_va; } #ifdef CONFIG_64BIT + } else { + custom_va_size = 0; } #endif kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); kctx->same_va_end = same_va_pages + 1; + kctx->gpu_va_end = kctx->same_va_end + custom_va_size; + kctx->exec_va_start = U64_MAX; + kctx->jit_va = false; kbase_gpu_vm_unlock(kctx); @@ -735,11 +748,12 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, struct kbase_va_region *custom_va_reg; u64 same_va_bits = kbase_get_same_va_bits(kctx); u64 total_va_size; - int err; total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; - kbase_gpu_vm_lock(kctx); + /* First verify that a JIT_VA zone has not been created already. */ + if (kctx->jit_va) + return -EINVAL; /* * Modify the same VA free region after creation. Be careful to ensure @@ -748,23 +762,11 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, */ same_va = kbase_region_tracker_find_region_base_address(kctx, PAGE_SIZE); - if (!same_va) { - err = -ENOMEM; - goto fail_unlock; - } - - /* The region flag or region size has changed since creation so bail. */ - if ((!(same_va->flags & KBASE_REG_FREE)) || - (same_va->nr_pages != total_va_size)) { - err = -ENOMEM; - goto fail_unlock; - } + if (!same_va) + return -ENOMEM; - if (same_va->nr_pages < jit_va_pages || - kctx->same_va_end < jit_va_pages) { - err = -ENOMEM; - goto fail_unlock; - } + if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages) + return -ENOMEM; /* It's safe to adjust the same VA zone now */ same_va->nr_pages -= jit_va_pages; @@ -779,44 +781,121 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); - if (!custom_va_reg) { - /* - * The context will be destroyed if we fail here so no point - * reverting the change we made to same_va. - */ - err = -ENOMEM; - goto fail_unlock; - } + /* + * The context will be destroyed if we fail here so no point + * reverting the change we made to same_va. + */ + if (!custom_va_reg) + return -ENOMEM; kbase_region_tracker_insert(custom_va_reg); - - kbase_gpu_vm_unlock(kctx); return 0; - -fail_unlock: - kbase_gpu_vm_unlock(kctx); - return err; } #endif int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, u8 max_allocations, u8 trim_level) { + int err = 0; + if (trim_level > 100) return -EINVAL; - kctx->jit_max_allocations = max_allocations; - kctx->trim_level = trim_level; + kbase_gpu_vm_lock(kctx); #ifdef CONFIG_64BIT if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) - return kbase_region_tracker_init_jit_64(kctx, jit_va_pages); + err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); #endif /* * Nothing to do for 32-bit clients, JIT uses the existing * custom VA zone. */ - return 0; + + if (!err) { + kctx->jit_max_allocations = max_allocations; + kctx->trim_level = trim_level; + kctx->jit_va = true; + } + + kbase_gpu_vm_unlock(kctx); + + return err; +} + +int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) +{ + struct kbase_va_region *shrinking_va_reg; + struct kbase_va_region *exec_va_reg; + u64 exec_va_start, exec_va_base_addr; + int err; + + /* The EXEC_VA zone shall be created by making space at the end of the + * address space. Firstly, verify that the number of EXEC_VA pages + * requested by the client is reasonable and then make sure that it is + * not greater than the address space itself before calculating the base + * address of the new zone. + */ + if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) + return -EINVAL; + + kbase_gpu_vm_lock(kctx); + + /* First verify that a JIT_VA zone has not been created already. */ + if (kctx->jit_va) { + err = -EPERM; + goto exit_unlock; + } + + if (exec_va_pages > kctx->gpu_va_end) { + err = -ENOMEM; + goto exit_unlock; + } + + exec_va_start = kctx->gpu_va_end - exec_va_pages; + exec_va_base_addr = exec_va_start << PAGE_SHIFT; + + shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx, + exec_va_base_addr); + if (!shrinking_va_reg) { + err = -ENOMEM; + goto exit_unlock; + } + + /* Make sure that the EXEC_VA region is still uninitialized */ + if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) == + KBASE_REG_ZONE_EXEC_VA) { + err = -EPERM; + goto exit_unlock; + } + + if (shrinking_va_reg->nr_pages <= exec_va_pages) { + err = -ENOMEM; + goto exit_unlock; + } + + exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, + exec_va_start, + exec_va_pages, + KBASE_REG_ZONE_EXEC_VA); + if (!exec_va_reg) { + err = -ENOMEM; + goto exit_unlock; + } + + shrinking_va_reg->nr_pages -= exec_va_pages; +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) + kctx->same_va_end -= exec_va_pages; +#endif + kctx->exec_va_start = exec_va_start; + + kbase_region_tracker_insert(exec_va_reg); + err = 0; + +exit_unlock: + kbase_gpu_vm_unlock(kctx); + return err; } @@ -938,6 +1017,10 @@ static struct kbase_context *kbase_reg_flags_to_kctx( kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same); break; + case KBASE_REG_ZONE_EXEC_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_exec); + break; default: WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); break; @@ -2917,6 +3000,30 @@ update_failed_unlocked: return ret; } +static void trace_jit_stats(struct kbase_context *kctx, + u32 bin_id, u32 max_allocations) +{ + const u32 alloc_count = + kctx->jit_current_allocations_per_bin[bin_id]; + + struct kbase_va_region *walker; + u32 va_pages = 0; + u32 ph_pages = 0; + + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { + if (walker->jit_bin_id != bin_id) + continue; + + va_pages += walker->nr_pages; + ph_pages += walker->gpu_alloc->nents; + } + mutex_unlock(&kctx->jit_evict_lock); + + KBASE_TLSTREAM_AUX_JIT_STATS(kctx->id, bin_id, max_allocations, + alloc_count, va_pages, ph_pages); +} + struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, struct base_jit_alloc_info *info) { @@ -3069,6 +3176,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, kctx->jit_current_allocations++; kctx->jit_current_allocations_per_bin[info->bin_id]++; + trace_jit_stats(kctx, info->bin_id, info->max_allocations); + reg->jit_usage_id = info->usage_id; reg->jit_bin_id = info->bin_id; @@ -3112,6 +3221,8 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) kctx->jit_current_allocations--; kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; + trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX); + kbase_mem_evictable_mark_reclaim(reg->gpu_alloc); kbase_gpu_vm_lock(kctx); @@ -3225,6 +3336,17 @@ void kbase_jit_term(struct kbase_context *kctx) cancel_work_sync(&kctx->jit_work); } +bool kbase_has_exec_va_zone(struct kbase_context *kctx) +{ + bool has_exec_va_zone; + + kbase_gpu_vm_lock(kctx); + has_exec_va_zone = (kctx->exec_va_start != U64_MAX); + kbase_gpu_vm_unlock(kctx); + + return has_exec_va_zone; +} + static int kbase_jd_user_buf_map(struct kbase_context *kctx, struct kbase_va_region *reg) { diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index 5958cf4..a873bb1 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -328,6 +328,13 @@ struct kbase_va_region { #define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) /* end 32-bit clients only */ +/* The starting address and size of the GPU-executable zone are dynamic + * and depend on the platform and the number of pages requested by the + * user process, with an upper limit of 4 GB. + */ +#define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2) +#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ + unsigned long flags; @@ -792,9 +799,40 @@ void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool); */ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); +/** + * kbase_region_tracker_init - Initialize the region tracker data structure + * @kctx: kbase context + * + * Return: 0 if success, negative error code otherwise. + */ int kbase_region_tracker_init(struct kbase_context *kctx); + +/** + * kbase_region_tracker_init_jit - Initialize the JIT region + * @kctx: kbase context + * @jit_va_pages: Size of the JIT region in pages + * @max_allocations: Maximum number of allocations allowed for the JIT region + * @trim_level: Trim level for the JIT region + * + * Return: 0 if success, negative error code otherwise. + */ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, u8 max_allocations, u8 trim_level); + +/** + * kbase_region_tracker_init_exec - Initialize the EXEC_VA region + * @kctx: kbase context + * @exec_va_pages: Size of the JIT region in pages. + * It must not be greater than 4 GB. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages); + +/** + * kbase_region_tracker_term - Terminate the JIT region + * @kctx: kbase context + */ void kbase_region_tracker_term(struct kbase_context *kctx); /** @@ -1349,6 +1387,18 @@ bool kbase_jit_evict(struct kbase_context *kctx); void kbase_jit_term(struct kbase_context *kctx); /** + * kbase_has_exec_va_zone - EXEC_VA zone predicate + * + * Determine whether an EXEC_VA zone has been created for the GPU address space + * of the given kbase context. + * + * @kctx: kbase context + * + * Return: True if the kbase context has an EXEC_VA zone. + */ +bool kbase_has_exec_va_zone(struct kbase_context *kctx); + +/** * kbase_map_external_resource - Map an external resource to the GPU. * @kctx: kbase context. * @reg: The region to map. diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index 1299353..c70112d 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -250,6 +250,16 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, goto bad_flags; } +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) { + /* Mask coherency flags if infinite cache is enabled to prevent + * the skipping of syncs from BASE side. + */ + *flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED | + BASE_MEM_COHERENT_SYSTEM); + } +#endif + if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ @@ -273,6 +283,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, if (*flags & BASE_MEM_SAME_VA) { rbtree = &kctx->reg_rbtree_same; zone = KBASE_REG_ZONE_SAME_VA; + } else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) { + rbtree = &kctx->reg_rbtree_exec; + zone = KBASE_REG_ZONE_EXEC_VA; } else { rbtree = &kctx->reg_rbtree_custom; zone = KBASE_REG_ZONE_CUSTOM_VA; @@ -914,6 +927,9 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, if (!reg) goto no_region; + if (kbase_update_region_flags(kctx, reg, *flags) != 0) + goto invalid_flags; + reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_UMM); if (IS_ERR_OR_NULL(reg->gpu_alloc)) @@ -924,9 +940,6 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, /* No pages to map yet */ reg->gpu_alloc->nents = 0; - if (kbase_update_region_flags(kctx, reg, *flags) != 0) - goto invalid_flags; - reg->flags &= ~KBASE_REG_FREE; reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ @@ -946,10 +959,8 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, return reg; -invalid_flags: - kbase_mem_phy_alloc_put(reg->gpu_alloc); - kbase_mem_phy_alloc_put(reg->cpu_alloc); no_alloc_obj: +invalid_flags: kfree(reg); no_region: bad_size: @@ -1186,7 +1197,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* mask to only allowed flags */ *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL | - BASE_MEM_COHERENT_SYSTEM_REQUIRED); + BASE_MEM_PROT_CPU_RD | BASE_MEM_COHERENT_SYSTEM_REQUIRED); if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { dev_warn(kctx->kbdev->dev, @@ -1787,6 +1798,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, struct tagged_addr *page_array; int err = 0; int i; + u64 start_off; map = kzalloc(sizeof(*map), GFP_KERNEL); @@ -1819,6 +1831,38 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, vma->vm_private_data = map; page_array = kbase_get_cpu_phy_pages(reg); + start_off = vma->vm_pgoff - reg->start_pfn + + (aligned_offset >> PAGE_SHIFT); + if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) { + struct kbase_aliased *aliased = + reg->cpu_alloc->imported.alias.aliased; + + if (!reg->cpu_alloc->imported.alias.stride || + reg->nr_pages < (start_off + nr_pages)) { + err = -EINVAL; + goto out; + } + + while (start_off >= reg->cpu_alloc->imported.alias.stride) { + aliased++; + start_off -= reg->cpu_alloc->imported.alias.stride; + } + + if (!aliased->alloc) { + /* sink page not available for dumping map */ + err = -EINVAL; + goto out; + } + + if ((start_off + nr_pages) > aliased->length) { + /* not fully backed by physical pages */ + err = -EINVAL; + goto out; + } + + /* ready the pages for dumping map */ + page_array = aliased->alloc->pages + aliased->offset; + } if (!(reg->flags & KBASE_REG_CPU_CACHED) && (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { @@ -1833,8 +1877,6 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, if (!kaddr) { unsigned long addr = vma->vm_start + aligned_offset; - u64 start_off = vma->vm_pgoff - reg->start_pfn + - (aligned_offset>>PAGE_SHIFT); vma->vm_flags |= VM_PFNMAP; for (i = 0; i < nr_pages; i++) { @@ -2127,8 +2169,19 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma) } #endif /* CONFIG_DMA_SHARED_BUFFER */ - /* limit what we map to the amount currently backed */ - if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) { + if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { + /* initial params check for aliased dumping map */ + if (nr_pages > reg->gpu_alloc->imported.alias.stride || + !reg->gpu_alloc->imported.alias.stride || + !nr_pages) { + err = -EINVAL; + dev_warn(dev, "mmap aliased: invalid params!\n"); + goto out_unlock; + } + } + else if (reg->cpu_alloc->nents < + (vma->vm_pgoff - reg->start_pfn + nr_pages)) { + /* limit what we map to the amount currently backed */ if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) nr_pages = 0; else @@ -2431,134 +2484,4 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_ return 0; } -void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle) -{ - int res; - void *va; - dma_addr_t dma_pa; - struct kbase_va_region *reg; - struct tagged_addr *page_array; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - unsigned long attrs = DMA_ATTR_WRITE_COMBINE; -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) - DEFINE_DMA_ATTRS(attrs); -#endif - - u32 pages = ((size - 1) >> PAGE_SHIFT) + 1; - u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | - BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR; - u32 i; - - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(0 != size); - KBASE_DEBUG_ASSERT(0 != pages); - - if (size == 0) - goto err; - - /* All the alloc calls return zeroed memory */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, - attrs); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, - &attrs); -#else - va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL); -#endif - if (!va) - goto err; - - /* Store the state so we can free it later. */ - handle->cpu_va = va; - handle->dma_pa = dma_pa; - handle->size = size; - - - reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, pages, - KBASE_REG_ZONE_SAME_VA); - if (!reg) - goto no_reg; - - reg->flags &= ~KBASE_REG_FREE; - if (kbase_update_region_flags(kctx, reg, flags) != 0) - goto invalid_flags; - - reg->cpu_alloc = kbase_alloc_create(kctx, pages, KBASE_MEM_TYPE_RAW); - if (IS_ERR_OR_NULL(reg->cpu_alloc)) - goto no_alloc; - - reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); - - page_array = kbase_get_cpu_phy_pages(reg); - - for (i = 0; i < pages; i++) - page_array[i] = as_tagged(dma_pa + ((dma_addr_t)i << PAGE_SHIFT)); - - reg->cpu_alloc->nents = pages; - - kbase_gpu_vm_lock(kctx); - res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1); - kbase_gpu_vm_unlock(kctx); - if (res) - goto no_mmap; - - return va; - -no_mmap: - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); -no_alloc: -invalid_flags: - kfree(reg); -no_reg: -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) - dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs); -#else - dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa); -#endif -err: - return NULL; -} -KBASE_EXPORT_SYMBOL(kbase_va_alloc); - -void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle) -{ - struct kbase_va_region *reg; - int err; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) - DEFINE_DMA_ATTRS(attrs); -#endif - - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(handle->cpu_va != NULL); - - kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va); - KBASE_DEBUG_ASSERT(reg); - err = kbase_gpu_munmap(kctx, reg); - kbase_gpu_vm_unlock(kctx); - KBASE_DEBUG_ASSERT(!err); - - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); - kfree(reg); - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - dma_free_attrs(kctx->kbdev->dev, handle->size, - handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - dma_free_attrs(kctx->kbdev->dev, handle->size, - handle->cpu_va, handle->dma_pa, &attrs); -#else - dma_free_writecombine(kctx->kbdev->dev, handle->size, - handle->cpu_va, handle->dma_pa); -#endif -} -KBASE_EXPORT_SYMBOL(kbase_va_free); diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h index 0a03bee..5cb88d1 100644 --- a/mali_kbase/mali_kbase_mem_linux.h +++ b/mali_kbase/mali_kbase_mem_linux.h @@ -303,22 +303,6 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, */ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); -/** @brief Allocate memory from kernel space and map it onto the GPU - * - * @param kctx The context used for the allocation/mapping - * @param size The size of the allocation in bytes - * @param handle An opaque structure used to contain the state needed to free the memory - * @return the VA for kernel space and GPU MMU - */ -void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle); - -/** @brief Free/unmap memory allocated by kbase_va_alloc - * - * @param kctx The context used for the allocation/mapping - * @param handle An opaque structure returned by the kbase_va_alloc function. - */ -void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle); - extern const struct vm_operations_struct kbase_vm_ops; /** diff --git a/mali_kbase/mali_kbase_mmu.c b/mali_kbase/mali_kbase_mmu.c index 5e6732a..84341ca 100644 --- a/mali_kbase/mali_kbase_mmu.c +++ b/mali_kbase/mali_kbase_mmu.c @@ -45,7 +45,7 @@ #include <mali_kbase_hw.h> #include <mali_kbase_mmu_hw.h> #include <mali_kbase_hwaccess_jm.h> -#include <mali_kbase_time.h> +#include <mali_kbase_hwaccess_time.h> #include <mali_kbase_mem.h> #define KBASE_MMU_PAGE_ENTRIES 512 @@ -1404,7 +1404,6 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, err = kbase_mmu_hw_do_operation(kbdev, &kbdev->as[kctx->as_nr], vpfn, nr, op, 0); -#if KBASE_GPU_RESET_EN if (err) { /* Flush failed to complete, assume the * GPU has hung and perform a reset to @@ -1414,7 +1413,6 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, if (kbase_prepare_to_reset_gpu_locked(kbdev)) kbase_reset_gpu_locked(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ #ifndef CONFIG_MALI_NO_MALI /* @@ -1454,7 +1452,6 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, err = kbase_mmu_hw_do_operation(kbdev, as, vpfn, nr, op, 0); -#if KBASE_GPU_RESET_EN if (err) { /* Flush failed to complete, assume the GPU has hung and * perform a reset to recover @@ -1464,7 +1461,6 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, if (kbase_prepare_to_reset_gpu(kbdev)) kbase_reset_gpu(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ @@ -2054,9 +2050,7 @@ void bus_fault_worker(struct work_struct *data) struct kbase_context *kctx; struct kbase_device *kbdev; struct kbase_fault *fault; -#if KBASE_GPU_RESET_EN bool reset_status = false; -#endif /* KBASE_GPU_RESET_EN */ faulting_as = container_of(data, struct kbase_as, work_busfault); fault = &faulting_as->bf_data; @@ -2088,7 +2082,6 @@ void bus_fault_worker(struct work_struct *data) } -#if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. * We start the reset before switching to UNMAPPED to ensure that unrelated jobs @@ -2097,7 +2090,6 @@ void bus_fault_worker(struct work_struct *data) dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n"); reset_status = kbase_prepare_to_reset_gpu(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */ if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { unsigned long flags; @@ -2122,10 +2114,8 @@ void bus_fault_worker(struct work_struct *data) kbase_pm_context_idle(kbdev); } -#if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) kbase_reset_gpu(kbdev); -#endif /* KBASE_GPU_RESET_EN */ kbasep_js_runpool_release_ctx(kbdev, kctx); @@ -2336,9 +2326,7 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_device *kbdev; struct kbasep_js_device_data *js_devdata; -#if KBASE_GPU_RESET_EN bool reset_status = false; -#endif as_no = as->number; kbdev = kctx->kbdev; @@ -2375,11 +2363,9 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING)) { - unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; - if ((fault->addr >= kbdev->hwcnt.addr) && (fault->addr < (kbdev->hwcnt.addr + - (num_core_groups * 2048)))) + kbdev->hwcnt.addr_bytes))) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; } @@ -2394,7 +2380,6 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, kbase_backend_jm_kill_jobs_from_kctx(kctx); /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); -#if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. * We start the reset before switching to UNMAPPED to ensure that unrelated jobs @@ -2403,7 +2388,6 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery."); reset_status = kbase_prepare_to_reset_gpu(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_mmu_disable(kctx); @@ -2417,10 +2401,8 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); -#if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) kbase_reset_gpu(kbdev); -#endif /* KBASE_GPU_RESET_EN */ } void kbasep_as_do_poke(struct work_struct *work) @@ -2608,7 +2590,6 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); } -#if KBASE_GPU_RESET_EN if (kbase_as_has_bus_fault(as) && kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { bool reset_status; @@ -2622,7 +2603,6 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, if (reset_status) kbase_reset_gpu_locked(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ return; } diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c index d5b8c77..5699eb8 100644 --- a/mali_kbase/mali_kbase_pm.c +++ b/mali_kbase/mali_kbase_pm.c @@ -30,6 +30,7 @@ #include <mali_kbase.h> #include <mali_midg_regmap.h> #include <mali_kbase_vinstr.h> +#include <mali_kbase_hwcnt_context.h> #include <mali_kbase_pm.h> @@ -83,10 +84,6 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbas * the policy */ kbase_hwaccess_pm_gpu_active(kbdev); } -#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ) - if (kbdev->ipa.gpu_active_callback) - kbdev->ipa.gpu_active_callback(kbdev->ipa.model_data); -#endif mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); @@ -118,25 +115,11 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) /* Wake up anyone waiting for this to become 0 (e.g. suspend). The * waiters must synchronize with us by locking the pm.lock after - * waiting */ + * waiting. + */ wake_up(&kbdev->pm.zero_active_count_wait); } -#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ) - /* IPA may be using vinstr, in which case there may be one PM reference - * still held when all other contexts have left the GPU. Inform IPA that - * the GPU is now idle so that vinstr can drop it's reference. - * - * If the GPU was only briefly active then it might have gone idle - * before vinstr has taken a PM reference, meaning that active_count is - * zero. We still need to inform IPA in this case, so that vinstr can - * drop the PM reference and avoid keeping the GPU powered - * unnecessarily. - */ - if (c <= 1 && kbdev->ipa.gpu_idle_callback) - kbdev->ipa.gpu_idle_callback(kbdev->ipa.model_data); -#endif - mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); } @@ -147,10 +130,16 @@ void kbase_pm_suspend(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); - /* Suspend vinstr. - * This call will block until vinstr is suspended. */ + /* Suspend vinstr. This blocks until the vinstr worker and timer are + * no longer running. + */ kbase_vinstr_suspend(kbdev->vinstr_ctx); + /* Disable GPU hardware counters. + * This call will block until counters are disabled. + */ + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + mutex_lock(&kbdev->pm.lock); KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); kbdev->pm.suspending = true; @@ -177,6 +166,8 @@ void kbase_pm_suspend(struct kbase_device *kbdev) void kbase_pm_resume(struct kbase_device *kbdev) { + unsigned long flags; + /* MUST happen before any pm_context_active calls occur */ kbase_hwaccess_pm_resume(kbdev); @@ -195,7 +186,11 @@ void kbase_pm_resume(struct kbase_device *kbdev) * need it and the policy doesn't want it on */ kbase_pm_context_idle(kbdev); - /* Resume vinstr operation */ + /* Re-enable GPU hardware counters */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Resume vinstr */ kbase_vinstr_resume(kbdev->vinstr_ctx); } - diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c index a3090c1..e762af4 100644 --- a/mali_kbase/mali_kbase_softjobs.c +++ b/mali_kbase/mali_kbase_softjobs.c @@ -1129,8 +1129,9 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) reg = kctx->jit_alloc[info->id]; new_addr = reg->start_pfn << PAGE_SHIFT; *ptr = new_addr; - KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( - katom, info->gpu_alloc_addr, new_addr); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(katom, + info->gpu_alloc_addr, + new_addr, info->va_pages); kbase_vunmap(kctx, &mapping); } diff --git a/mali_kbase/mali_kbase_tlstream.c b/mali_kbase/mali_kbase_tlstream.c index aaf5782..10e3889 100644 --- a/mali_kbase/mali_kbase_tlstream.c +++ b/mali_kbase/mali_kbase_tlstream.c @@ -170,7 +170,8 @@ enum tl_msg_id_aux { KBASE_AUX_PROTECTED_ENTER_START, KBASE_AUX_PROTECTED_ENTER_END, KBASE_AUX_PROTECTED_LEAVE_START, - KBASE_AUX_PROTECTED_LEAVE_END + KBASE_AUX_PROTECTED_LEAVE_END, + KBASE_AUX_JIT_STATS, }; /*****************************************************************************/ @@ -448,8 +449,8 @@ static const struct tp_desc tp_desc_obj[] = { KBASE_TL_ATTRIB_ATOM_JIT, __stringify(KBASE_TL_ATTRIB_ATOM_JIT), "jit done for atom", - "@pLL", - "atom,edit_addr,new_addr" + "@pLLL", + "atom,edit_addr,new_addr,va_pages" }, { KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, @@ -573,6 +574,13 @@ static const struct tp_desc tp_desc_aux[] = { "leave protected mode end", "@p", "gpu" + }, + { + KBASE_AUX_JIT_STATS, + __stringify(KBASE_AUX_JIT_STATS), + "per-bin JIT statistics", + "@IIIIII", + "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages" } }; @@ -2165,12 +2173,12 @@ void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom) } void __kbase_tlstream_tl_attrib_atom_jit( - void *atom, u64 edit_addr, u64 new_addr) + void *atom, u64 edit_addr, u64 new_addr, u64 va_pages) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) - + sizeof(edit_addr) + sizeof(new_addr); + + sizeof(edit_addr) + sizeof(new_addr) + sizeof(va_pages); unsigned long flags; char *buffer; size_t pos = 0; @@ -2188,6 +2196,9 @@ void __kbase_tlstream_tl_attrib_atom_jit( buffer, pos, &edit_addr, sizeof(edit_addr)); pos = kbasep_tlstream_write_bytes( buffer, pos, &new_addr, sizeof(new_addr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &va_pages, sizeof(va_pages)); + KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); @@ -2624,3 +2635,40 @@ void __kbase_tlstream_aux_protected_leave_end(void *gpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } + +void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bid, + u32 max_allocs, u32 allocs, + u32 va_pages, u32 ph_pages) +{ + const u32 msg_id = KBASE_AUX_JIT_STATS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx_nr) + sizeof(bid) + + sizeof(max_allocs) + sizeof(allocs) + + sizeof(va_pages) + sizeof(ph_pages); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &bid, sizeof(bid)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &max_allocs, sizeof(max_allocs)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &allocs, sizeof(allocs)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &va_pages, sizeof(va_pages)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &ph_pages, sizeof(ph_pages)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} diff --git a/mali_kbase/mali_kbase_tlstream.h b/mali_kbase/mali_kbase_tlstream.h index 6f9656f..e2a3ea4 100644 --- a/mali_kbase/mali_kbase_tlstream.h +++ b/mali_kbase/mali_kbase_tlstream.h @@ -141,7 +141,7 @@ void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio); void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state); void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom); void __kbase_tlstream_tl_attrib_atom_jit( - void *atom, u64 edit_addr, u64 new_addr); + void *atom, u64 edit_addr, u64 new_addr, u64 va_pages); void __kbase_tlstream_tl_attrib_atom_jitallocinfo( void *atom, u64 va_pages, u64 commit_pages, u64 extent, u32 jit_id, u32 bin_id, u32 max_allocations, u32 flags, @@ -163,6 +163,9 @@ void __kbase_tlstream_aux_protected_enter_start(void *gpu); void __kbase_tlstream_aux_protected_enter_end(void *gpu); void __kbase_tlstream_aux_protected_leave_start(void *gpu); void __kbase_tlstream_aux_protected_leave_end(void *gpu); +void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bin_id, + u32 max_allocations, u32 allocations, + u32 va_pages_nr, u32 ph_pages_nr); #define TLSTREAM_ENABLED (1 << 31) @@ -472,9 +475,11 @@ extern atomic_t kbase_tlstream_enabled; * @atom: atom identifier * @edit_addr: address edited by jit * @new_addr: address placed into the edited location + * @va_pages: maximum number of pages this jit can allocate */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr) \ - __TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, new_addr) +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr, va_pages) \ + __TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, \ + new_addr, va_pages) /** * Information about the JIT allocation atom. @@ -652,5 +657,24 @@ extern atomic_t kbase_tlstream_enabled; #define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \ __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu) +/** + * KBASE_TLSTREAM_AUX_JIT_STATS - JIT allocations per bin statistics + * + * @ctx_nr: kernel context number + * @bid: JIT bin id + * @max_allocs: maximum allocations allowed in this bin. + * UINT_MAX is a special value. It denotes that + * the parameter was not changed since the last time. + * @allocs: number of active allocations in this bin + * @va_pages: number of virtual pages allocated in this bin + * @ph_pages: number of physical pages allocated in this bin + * + * Function emits a timeline message indicating the JIT statistics + * for a given bin have chaned. + */ +#define KBASE_TLSTREAM_AUX_JIT_STATS(ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages) \ + __TRACE_IF_ENABLED(aux_jit_stats, ctx_nr, bid, \ + max_allocs, allocs, \ + va_pages, ph_pages) #endif /* _KBASE_TLSTREAM_H */ diff --git a/mali_kbase/mali_kbase_trace_defs.h b/mali_kbase/mali_kbase_trace_defs.h index d7364d5..77fb818 100644 --- a/mali_kbase/mali_kbase_trace_defs.h +++ b/mali_kbase/mali_kbase_trace_defs.h @@ -172,8 +172,6 @@ int dummy_array[] = { KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), /* gpu_addr==value to write into JS_HEAD */ KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), - /* kctx is the one being evicted, info_val == kctx to put in */ - KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX), KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), /* info_val == lower 32 bits of affinity */ KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), diff --git a/mali_kbase/mali_kbase_utility.c b/mali_kbase/mali_kbase_utility.c deleted file mode 100644 index 3ea234a..0000000 --- a/mali_kbase/mali_kbase_utility.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#include <mali_kbase.h> - -bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry) -{ - struct list_head *pos = base->next; - - while (pos != base) { - if (pos == entry) - return true; - - pos = pos->next; - } - return false; -} diff --git a/mali_kbase/mali_kbase_utility.h b/mali_kbase/mali_kbase_utility.h index f2e5a33..8d4f044 100644 --- a/mali_kbase/mali_kbase_utility.h +++ b/mali_kbase/mali_kbase_utility.h @@ -29,17 +29,6 @@ #error "Don't include this file directly, use mali_kbase.h instead" #endif -/** Test whether the given list entry is a member of the given list. - * - * @param base The head of the list to be tested - * @param entry The list entry to be tested - * - * @return true if entry is a member of base - * false otherwise - */ -bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry); - - static inline void kbase_timer_setup(struct timer_list *timer, void (*callback)(struct timer_list *timer)) { diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c index df936cf..51cb365 100644 --- a/mali_kbase/mali_kbase_vinstr.c +++ b/mali_kbase/mali_kbase_vinstr.c @@ -20,221 +20,109 @@ * */ +#include "mali_kbase_vinstr.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase_hwcnt_reader.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_ioctl.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" + #include <linux/anon_inodes.h> -#include <linux/atomic.h> +#include <linux/fcntl.h> +#include <linux/fs.h> #include <linux/hrtimer.h> -#include <linux/jiffies.h> -#include <linux/kthread.h> -#include <linux/list.h> #include <linux/mm.h> +#include <linux/mutex.h> #include <linux/poll.h> -#include <linux/preempt.h> #include <linux/slab.h> -#include <linux/wait.h> - -#include <mali_kbase.h> -#include <mali_kbase_hwaccess_instr.h> -#include <mali_kbase_hwaccess_jm.h> -#include <mali_kbase_hwcnt_reader.h> -#include <mali_kbase_mem_linux.h> -#include <mali_kbase_tlstream.h> -#ifdef CONFIG_MALI_NO_MALI -#include <backend/gpu/mali_kbase_model_dummy.h> -#endif - -/*****************************************************************************/ +#include <linux/workqueue.h> /* Hwcnt reader API version */ -#define HWCNT_READER_API 1 - -/* The number of nanoseconds in a second. */ -#define NSECS_IN_SEC 1000000000ull /* ns */ - -/* The time resolution of dumping service. */ -#define DUMPING_RESOLUTION 500000ull /* ns */ +#define HWCNT_READER_API 1 -/* The maximal supported number of dumping buffers. */ -#define MAX_BUFFER_COUNT 32 +/* The minimum allowed interval between dumps (equivalent to 10KHz) */ +#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) -/* Size and number of hw counters blocks. */ -#define NR_CNT_BLOCKS_PER_GROUP 8 -#define NR_CNT_PER_BLOCK 64 -#define NR_BYTES_PER_CNT 4 -#define NR_BYTES_PER_HDR 16 -#define PRFCNT_EN_MASK_OFFSET 0x8 - -/*****************************************************************************/ - -enum { - SHADER_HWCNT_BM, - TILER_HWCNT_BM, - MMU_L2_HWCNT_BM, - JM_HWCNT_BM -}; - -enum vinstr_state { - VINSTR_IDLE, - VINSTR_DUMPING, - VINSTR_SUSPENDING, - VINSTR_SUSPENDED, - VINSTR_RESUMING -}; +/* The maximum allowed buffers per client */ +#define MAX_BUFFER_COUNT 32 /** - * struct kbase_vinstr_context - vinstr context per device - * @lock: protects the entire vinstr context, but the list of - * vinstr clients can be updated outside the lock using - * @state_lock. - * @kbdev: pointer to kbase device - * @kctx: pointer to kbase context - * @vmap: vinstr vmap for mapping hwcnt dump buffer - * @gpu_va: GPU hwcnt dump buffer address - * @cpu_va: the CPU side mapping of the hwcnt dump buffer - * @dump_size: size of the dump buffer in bytes - * @bitmap: current set of counters monitored, not always in sync - * with hardware - * @reprogram: when true, reprogram hwcnt block with the new set of - * counters - * @state: vinstr state - * @state_lock: protects information about vinstr state and list of - * clients. - * @suspend_waitq: notification queue to trigger state re-validation - * @suspend_cnt: reference counter of vinstr's suspend state - * @suspend_work: worker to execute on entering suspended state - * @resume_work: worker to execute on leaving suspended state - * @nclients: number of attached clients, pending or idle - * @nclients_suspended: number of attached but suspended clients - * @waiting_clients: head of list of clients being periodically sampled - * @idle_clients: head of list of clients being idle - * @suspended_clients: head of list of clients being suspended - * @thread: periodic sampling thread - * @waitq: notification queue of sampling thread - * @request_pending: request for action for sampling thread - * @clients_present: when true, we have at least one client - * Note: this variable is in sync. with nclients and is - * present to preserve simplicity. Protected by state_lock. - * @need_suspend: when true, a suspend has been requested while a resume is - * in progress. Resume worker should queue a suspend. - * @need_resume: when true, a resume has been requested while a suspend is - * in progress. Suspend worker should queue a resume. - * @forced_suspend: when true, the suspend of vinstr needs to take place - * regardless of the kernel/user space clients attached - * to it. In particular, this flag is set when the suspend - * of vinstr is requested on entering protected mode or at - * the time of device suspend. + * struct kbase_vinstr_context - IOCTL interface for userspace hardware + * counters. + * @hvirt: Hardware counter virtualizer used by vinstr. + * @metadata: Hardware counter metadata provided by virtualizer. + * @lock: Lock protecting all vinstr state. + * @suspend_count: Suspend reference count. If non-zero, timer and worker are + * prevented from being re-scheduled. + * @client_count: Number of vinstr clients. + * @clients: List of vinstr clients. + * @dump_timer: Timer that enqueues dump_work to a workqueue. + * @dump_work: Worker for performing periodic counter dumps. */ struct kbase_vinstr_context { - struct mutex lock; - struct kbase_device *kbdev; - struct kbase_context *kctx; - - struct kbase_vmap_struct *vmap; - u64 gpu_va; - void *cpu_va; - size_t dump_size; - u32 bitmap[4]; - bool reprogram; - - enum vinstr_state state; - struct spinlock state_lock; - wait_queue_head_t suspend_waitq; - unsigned int suspend_cnt; - struct work_struct suspend_work; - struct work_struct resume_work; - - u32 nclients; - u32 nclients_suspended; - struct list_head waiting_clients; - struct list_head idle_clients; - struct list_head suspended_clients; - - struct task_struct *thread; - wait_queue_head_t waitq; - atomic_t request_pending; - - bool clients_present; - - bool need_suspend; - bool need_resume; - bool forced_suspend; + struct kbase_hwcnt_virtualizer *hvirt; + const struct kbase_hwcnt_metadata *metadata; + struct mutex lock; + size_t suspend_count; + size_t client_count; + struct list_head clients; + struct hrtimer dump_timer; + struct work_struct dump_work; }; /** - * struct kbase_vinstr_client - a vinstr client attached to a vinstr context - * @vinstr_ctx: vinstr context client is attached to - * @list: node used to attach this client to list in vinstr context - * @buffer_count: number of buffers this client is using - * @event_mask: events this client reacts to - * @dump_size: size of one dump buffer in bytes - * @bitmap: bitmap request for JM, TILER, SHADER and MMU counters - * @legacy_buffer: userspace hwcnt dump buffer (legacy interface) - * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface) - * @accum_buffer: temporary accumulation buffer for preserving counters - * @dump_time: next time this clients shall request hwcnt dump - * @dump_interval: interval between periodic hwcnt dumps - * @dump_buffers: kernel hwcnt dump buffers allocated by this client - * @dump_buffers_meta: metadata of dump buffers - * @meta_idx: index of metadata being accessed by userspace - * @read_idx: index of buffer read by userspace - * @write_idx: index of buffer being written by dumping service - * @waitq: client's notification queue - * @pending: when true, client has attached but hwcnt not yet updated - * @suspended: when true, client is suspended + * struct kbase_vinstr_client - A vinstr client attached to a vinstr context. + * @vctx: Vinstr context client is attached to. + * @hvcli: Hardware counter virtualizer client. + * @node: Node used to attach this client to list in vinstr + * context. + * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic + * client. + * @next_dump_time_ns: Time in ns when this client's next periodic dump must + * occur. If 0, not a periodic client. + * @enable_map: Counters enable map. + * @dump_bufs: Array of dump buffers allocated by this client. + * @dump_bufs_meta: Metadata of dump buffers. + * @meta_idx: Index of metadata being accessed by userspace. + * @read_idx: Index of buffer read by userspace. + * @write_idx: Index of buffer being written by dump worker. + * @waitq: Client's notification queue. */ struct kbase_vinstr_client { - struct kbase_vinstr_context *vinstr_ctx; - struct list_head list; - unsigned int buffer_count; - u32 event_mask; - size_t dump_size; - u32 bitmap[4]; - void __user *legacy_buffer; - void *kernel_buffer; - void *accum_buffer; - u64 dump_time; - u32 dump_interval; - char *dump_buffers; - struct kbase_hwcnt_reader_metadata *dump_buffers_meta; - atomic_t meta_idx; - atomic_t read_idx; - atomic_t write_idx; - wait_queue_head_t waitq; - bool pending; - bool suspended; -}; - -/** - * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer - * @hrtimer: high resolution timer - * @vinstr_ctx: vinstr context - */ -struct kbasep_vinstr_wake_up_timer { - struct hrtimer hrtimer; - struct kbase_vinstr_context *vinstr_ctx; + struct kbase_vinstr_context *vctx; + struct kbase_hwcnt_virtualizer_client *hvcli; + struct list_head node; + u64 next_dump_time_ns; + u32 dump_interval_ns; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer_array dump_bufs; + struct kbase_hwcnt_reader_metadata *dump_bufs_meta; + atomic_t meta_idx; + atomic_t read_idx; + atomic_t write_idx; + wait_queue_head_t waitq; }; -/*****************************************************************************/ - -static void kbase_vinstr_update_suspend( - struct kbase_vinstr_context *vinstr_ctx); - -static int kbasep_vinstr_service_task(void *data); - static unsigned int kbasep_vinstr_hwcnt_reader_poll( - struct file *filp, - poll_table *wait); + struct file *filp, + poll_table *wait); + static long kbasep_vinstr_hwcnt_reader_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long arg); + struct file *filp, + unsigned int cmd, + unsigned long arg); + static int kbasep_vinstr_hwcnt_reader_mmap( - struct file *filp, - struct vm_area_struct *vma); + struct file *filp, + struct vm_area_struct *vma); + static int kbasep_vinstr_hwcnt_reader_release( - struct inode *inode, - struct file *filp); + struct inode *inode, + struct file *filp); -/* The timeline stream file operations structure. */ +/* Vinstr client file operations */ static const struct file_operations vinstr_client_fops = { .poll = kbasep_vinstr_hwcnt_reader_poll, .unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, @@ -243,1211 +131,546 @@ static const struct file_operations vinstr_client_fops = { .release = kbasep_vinstr_hwcnt_reader_release, }; -/*****************************************************************************/ - -static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) +/** + * kbasep_vinstr_timestamp_ns() - Get the current time in nanoseconds. + * + * Return: Current time in nanoseconds. + */ +static u64 kbasep_vinstr_timestamp_ns(void) { - struct kbase_context *kctx = vinstr_ctx->kctx; - struct kbase_device *kbdev = kctx->kbdev; - struct kbase_ioctl_hwcnt_enable enable; - int err; - - enable.dump_buffer = vinstr_ctx->gpu_va; - enable.jm_bm = vinstr_ctx->bitmap[JM_HWCNT_BM]; - enable.tiler_bm = vinstr_ctx->bitmap[TILER_HWCNT_BM]; - enable.shader_bm = vinstr_ctx->bitmap[SHADER_HWCNT_BM]; - enable.mmu_l2_bm = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM]; - - /* Mark the context as active so the GPU is kept turned on */ - /* A suspend won't happen here, because we're in a syscall from a - * userspace thread. */ - kbase_pm_context_active(kbdev); - - /* Schedule the context in */ - kbasep_js_schedule_privileged_ctx(kbdev, kctx); - err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); - if (err) { - /* Release the context. This had its own Power Manager Active - * reference */ - kbasep_js_release_privileged_ctx(kbdev, kctx); - - /* Also release our Power Manager Active reference */ - kbase_pm_context_idle(kbdev); - } + struct timespec ts; - return err; + getrawmonotonic(&ts); + return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; } -static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) +/** + * kbasep_vinstr_next_dump_time_ns() - Calculate the next periodic dump time. + * @cur_ts_ns: Current time in nanoseconds. + * @interval: Interval between dumps in nanoseconds. + * + * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump + * time that occurs after cur_ts_ns. + */ +static u64 kbasep_vinstr_next_dump_time_ns(u64 cur_ts_ns, u32 interval) { - struct kbase_context *kctx = vinstr_ctx->kctx; - struct kbase_device *kbdev = kctx->kbdev; - int err; - - err = kbase_instr_hwcnt_disable_internal(kctx); - if (err) { - dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)", - kctx); - return; - } - - /* Release the context. This had its own Power Manager Active reference. */ - kbasep_js_release_privileged_ctx(kbdev, kctx); + /* Non-periodic client */ + if (interval == 0) + return 0; - /* Also release our Power Manager Active reference. */ - kbase_pm_context_idle(kbdev); - - dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); + /* + * Return the next interval after the current time relative to t=0. + * This means multiple clients with the same period will synchronise, + * regardless of when they were started, allowing the worker to be + * scheduled less frequently. + */ + do_div(cur_ts_ns, interval); + return (cur_ts_ns + 1) * interval; } -static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx) -{ - disable_hwcnt(vinstr_ctx); - return enable_hwcnt(vinstr_ctx); -} +/** + * kbasep_vinstr_client_dump() - Perform a dump for a client. + * @vcli: Non-NULL pointer to a vinstr client. + * @event_id: Event type that triggered the dump. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_dump( + struct kbase_vinstr_client *vcli, + enum base_hwcnt_reader_event event_id) +{ + int errcode; + u64 ts_start_ns; + u64 ts_end_ns; + unsigned int write_idx; + unsigned int read_idx; + struct kbase_hwcnt_dump_buffer *dump_buf; + struct kbase_hwcnt_reader_metadata *meta; -static void hwcnt_bitmap_set(u32 dst[4], u32 src[4]) -{ - dst[JM_HWCNT_BM] = src[JM_HWCNT_BM]; - dst[TILER_HWCNT_BM] = src[TILER_HWCNT_BM]; - dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM]; - dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM]; -} + WARN_ON(!vcli); + lockdep_assert_held(&vcli->vctx->lock); -static void hwcnt_bitmap_union(u32 dst[4], u32 src[4]) -{ - dst[JM_HWCNT_BM] |= src[JM_HWCNT_BM]; - dst[TILER_HWCNT_BM] |= src[TILER_HWCNT_BM]; - dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM]; - dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM]; -} + write_idx = atomic_read(&vcli->write_idx); + read_idx = atomic_read(&vcli->read_idx); -size_t kbase_vinstr_dump_size(struct kbase_device *kbdev) -{ - size_t dump_size; - -#ifndef CONFIG_MALI_NO_MALI - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { - u32 nr_cg; - - nr_cg = kbdev->gpu_props.num_core_groups; - dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP * - NR_CNT_PER_BLOCK * - NR_BYTES_PER_CNT; - } else -#endif /* CONFIG_MALI_NO_MALI */ - { - /* assume v5 for now */ -#ifdef CONFIG_MALI_NO_MALI - u32 nr_l2 = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; - u64 core_mask = - (1ULL << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; -#else - base_gpu_props *props = &kbdev->gpu_props.props; - u32 nr_l2 = props->l2_props.num_l2_slices; - u64 core_mask = props->coherency_info.group[0].core_mask; -#endif - u32 nr_blocks = fls64(core_mask); + /* Check if there is a place to copy HWC block into. */ + if (write_idx - read_idx == vcli->dump_bufs.buf_cnt) + return -EBUSY; + write_idx %= vcli->dump_bufs.buf_cnt; - /* JM and tiler counter blocks are always present */ - dump_size = (2 + nr_l2 + nr_blocks) * - NR_CNT_PER_BLOCK * - NR_BYTES_PER_CNT; - } - return dump_size; -} -KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size); + dump_buf = &vcli->dump_bufs.bufs[write_idx]; + meta = &vcli->dump_bufs_meta[write_idx]; -static size_t kbasep_vinstr_dump_size_ctx( - struct kbase_vinstr_context *vinstr_ctx) -{ - return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev); -} + errcode = kbase_hwcnt_virtualizer_client_dump( + vcli->hvcli, &ts_start_ns, &ts_end_ns, dump_buf); + if (errcode) + return errcode; -static int kbasep_vinstr_map_kernel_dump_buffer( - struct kbase_vinstr_context *vinstr_ctx) -{ - struct kbase_va_region *reg; - struct kbase_context *kctx = vinstr_ctx->kctx; - u64 flags, nr_pages; - - flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR | - BASE_MEM_PERMANENT_KERNEL_MAPPING | BASE_MEM_CACHED_CPU; - if (kctx->kbdev->mmu_mode->flags & - KBASE_MMU_MODE_HAS_NON_CACHEABLE) - flags |= BASE_MEM_UNCACHED_GPU; - vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); - nr_pages = PFN_UP(vinstr_ctx->dump_size); - - reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, - &vinstr_ctx->gpu_va); - if (!reg) - return -ENOMEM; + /* Patch the dump buf headers, to hide the counters that other hwcnt + * clients are using. + */ + kbase_hwcnt_gpu_patch_dump_headers(dump_buf, &vcli->enable_map); - vinstr_ctx->cpu_va = kbase_phy_alloc_mapping_get(kctx, - vinstr_ctx->gpu_va, &vinstr_ctx->vmap); + /* Zero all non-enabled counters (current values are undefined) */ + kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map); - if (!vinstr_ctx->cpu_va) { - kbase_mem_free(kctx, vinstr_ctx->gpu_va); - return -ENOMEM; - } + meta->timestamp = ts_end_ns; + meta->event_id = event_id; + meta->buffer_idx = write_idx; + /* Notify client. Make sure all changes to memory are visible. */ + wmb(); + atomic_inc(&vcli->write_idx); + wake_up_interruptible(&vcli->waitq); return 0; } -static void kbasep_vinstr_unmap_kernel_dump_buffer( - struct kbase_vinstr_context *vinstr_ctx) +/** + * kbasep_vinstr_client_clear() - Reset all the client's counters to zero. + * @vcli: Non-NULL pointer to a vinstr client. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_clear(struct kbase_vinstr_client *vcli) { - struct kbase_context *kctx = vinstr_ctx->kctx; + u64 ts_start_ns; + u64 ts_end_ns; - kbase_phy_alloc_mapping_put(kctx, vinstr_ctx->vmap); - kbase_mem_free(kctx, vinstr_ctx->gpu_va); + WARN_ON(!vcli); + lockdep_assert_held(&vcli->vctx->lock); + + /* A virtualizer dump with a NULL buffer will just clear the virtualizer + * client's buffer. + */ + return kbase_hwcnt_virtualizer_client_dump( + vcli->hvcli, &ts_start_ns, &ts_end_ns, NULL); } /** - * kbasep_vinstr_create_kctx - create kernel context for vinstr - * @vinstr_ctx: vinstr context - * Return: zero on success + * kbasep_vinstr_reschedule_worker() - Update next dump times for all periodic + * vinstr clients, then reschedule the dump + * worker appropriately. + * @vctx: Non-NULL pointer to the vinstr context. + * + * If there are no periodic clients, then the dump worker will not be + * rescheduled. Else, the dump worker will be rescheduled for the next periodic + * client dump. */ -static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) +static void kbasep_vinstr_reschedule_worker(struct kbase_vinstr_context *vctx) { - struct kbase_device *kbdev = vinstr_ctx->kbdev; - struct kbasep_kctx_list_element *element = NULL; - unsigned long flags; - bool enable_backend = false; - int err; - - vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true); - if (!vinstr_ctx->kctx) - return -ENOMEM; + u64 cur_ts_ns; + u64 earliest_next_ns = U64_MAX; + struct kbase_vinstr_client *pos; - /* Map the master kernel dump buffer. The HW dumps the counters - * into this memory region. */ - err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx); - if (err) - goto failed_map; - - /* Add kernel context to list of contexts associated with device. */ - element = kzalloc(sizeof(*element), GFP_KERNEL); - if (element) { - element->kctx = vinstr_ctx->kctx; - mutex_lock(&kbdev->kctx_list_lock); - list_add(&element->link, &kbdev->kctx_list); - - /* Inform timeline client about new context. - * Do this while holding the lock to avoid tracepoint - * being created in both body and summary stream. */ - KBASE_TLSTREAM_TL_NEW_CTX( - vinstr_ctx->kctx, - vinstr_ctx->kctx->id, - (u32)(vinstr_ctx->kctx->tgid)); - - mutex_unlock(&kbdev->kctx_list_lock); - } else { - /* Don't treat this as a fail - just warn about it. */ - dev_warn(kbdev->dev, - "couldn't add kctx to kctx_list\n"); - } + WARN_ON(!vctx); + lockdep_assert_held(&vctx->lock); - /* Don't enable hardware counters if vinstr is suspended. - * Note that vinstr resume code is run under vinstr context lock, - * lower layer will be enabled as needed on resume. */ - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - if (VINSTR_IDLE == vinstr_ctx->state) - enable_backend = true; - vinstr_ctx->clients_present = true; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - if (enable_backend) - err = enable_hwcnt(vinstr_ctx); - if (err) - goto failed_enable; - - vinstr_ctx->thread = kthread_run( - kbasep_vinstr_service_task, - vinstr_ctx, - "mali_vinstr_service"); - if (IS_ERR(vinstr_ctx->thread)) { - err = PTR_ERR(vinstr_ctx->thread); - goto failed_kthread; - } + cur_ts_ns = kbasep_vinstr_timestamp_ns(); - return 0; + /* + * Update each client's next dump time, and find the earliest next + * dump time if any of the clients have a non-zero interval. + */ + list_for_each_entry(pos, &vctx->clients, node) { + const u64 cli_next_ns = + kbasep_vinstr_next_dump_time_ns( + cur_ts_ns, pos->dump_interval_ns); -failed_kthread: - disable_hwcnt(vinstr_ctx); -failed_enable: - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->clients_present = false; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); - if (element) { - mutex_lock(&kbdev->kctx_list_lock); - list_del(&element->link); - kfree(element); - mutex_unlock(&kbdev->kctx_list_lock); - KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx); + /* Non-zero next dump time implies a periodic client */ + if ((cli_next_ns != 0) && (cli_next_ns < earliest_next_ns)) + earliest_next_ns = cli_next_ns; + + pos->next_dump_time_ns = cli_next_ns; } -failed_map: - kbase_destroy_context(vinstr_ctx->kctx); - vinstr_ctx->kctx = NULL; - return err; + + /* Cancel the timer if it is already pending */ + hrtimer_cancel(&vctx->dump_timer); + + /* Start the timer if there are periodic clients and vinstr is not + * suspended. + */ + if ((earliest_next_ns != U64_MAX) && + (vctx->suspend_count == 0) && + !WARN_ON(earliest_next_ns < cur_ts_ns)) + hrtimer_start( + &vctx->dump_timer, + ns_to_ktime(earliest_next_ns - cur_ts_ns), + HRTIMER_MODE_REL); } /** - * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context - * @vinstr_ctx: vinstr context + * kbasep_vinstr_dump_worker()- Dump worker, that dumps all periodic clients + * that need to be dumped, then reschedules itself. + * @work: Work structure. */ -static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx) +static void kbasep_vinstr_dump_worker(struct work_struct *work) { - struct kbase_device *kbdev = vinstr_ctx->kbdev; - struct kbasep_kctx_list_element *element; - struct kbasep_kctx_list_element *tmp; - bool found = false; - bool hwcnt_disabled = false; - unsigned long flags; - - /* Release hw counters dumping resources. */ - vinstr_ctx->thread = NULL; - - /* Simplify state transitions by specifying that we have no clients. */ - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->clients_present = false; - if ((VINSTR_SUSPENDED == vinstr_ctx->state) || (VINSTR_RESUMING == vinstr_ctx->state)) - hwcnt_disabled = true; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - if (!hwcnt_disabled) - disable_hwcnt(vinstr_ctx); - - kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); - - /* Remove kernel context from the device's contexts list. */ - mutex_lock(&kbdev->kctx_list_lock); - list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { - if (element->kctx == vinstr_ctx->kctx) { - list_del(&element->link); - kfree(element); - found = true; - } - } - mutex_unlock(&kbdev->kctx_list_lock); + struct kbase_vinstr_context *vctx = + container_of(work, struct kbase_vinstr_context, dump_work); + struct kbase_vinstr_client *pos; + u64 cur_time_ns; - if (!found) - dev_warn(kbdev->dev, "kctx not in kctx_list\n"); + mutex_lock(&vctx->lock); - /* Destroy context. */ - kbase_destroy_context(vinstr_ctx->kctx); + cur_time_ns = kbasep_vinstr_timestamp_ns(); - /* Inform timeline client about context destruction. */ - KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx); + /* Dump all periodic clients whose next dump time is before the current + * time. + */ + list_for_each_entry(pos, &vctx->clients, node) { + if ((pos->next_dump_time_ns != 0) && + (pos->next_dump_time_ns < cur_time_ns)) + kbasep_vinstr_client_dump( + pos, BASE_HWCNT_READER_EVENT_PERIODIC); + } - vinstr_ctx->kctx = NULL; + /* Update the next dump times of all periodic clients, then reschedule + * this worker at the earliest next dump time. + */ + kbasep_vinstr_reschedule_worker(vctx); + + mutex_unlock(&vctx->lock); } /** - * kbasep_vinstr_attach_client - Attach a client to the vinstr core - * @vinstr_ctx: vinstr context - * @buffer_count: requested number of dump buffers - * @bitmap: bitmaps describing which counters should be enabled - * @argp: pointer where notification descriptor shall be stored - * @kernel_buffer: pointer to kernel side buffer - * - * Return: vinstr opaque client handle or NULL on failure + * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for + * execution as soon as possible. + * @timer: Timer structure. */ -static struct kbase_vinstr_client *kbasep_vinstr_attach_client( - struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count, - u32 bitmap[4], void *argp, void *kernel_buffer) +static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer) { - struct task_struct *thread = NULL; - struct kbase_vinstr_client *cli; - unsigned long flags; - bool clients_present = false; - - KBASE_DEBUG_ASSERT(vinstr_ctx); + struct kbase_vinstr_context *vctx = + container_of(timer, struct kbase_vinstr_context, dump_timer); - if (buffer_count > MAX_BUFFER_COUNT - || (buffer_count & (buffer_count - 1))) - return NULL; - - cli = kzalloc(sizeof(*cli), GFP_KERNEL); - if (!cli) - return NULL; - - cli->vinstr_ctx = vinstr_ctx; - cli->buffer_count = buffer_count; - cli->event_mask = - (1 << BASE_HWCNT_READER_EVENT_MANUAL) | - (1 << BASE_HWCNT_READER_EVENT_PERIODIC); - cli->pending = true; + /* We don't need to check vctx->suspend_count here, as the suspend + * function will ensure that any worker enqueued here is immediately + * cancelled, and the worker itself won't reschedule this timer if + * suspend_count != 0. + */ +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &vctx->dump_work); +#else + queue_work(system_highpri_wq, &vctx->dump_work); +#endif + return HRTIMER_NORESTART; +} - hwcnt_bitmap_set(cli->bitmap, bitmap); +/** + * kbasep_vinstr_client_destroy() - Destroy a vinstr client. + * @vcli: vinstr client. Must not be attached to a vinstr context. + */ +static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) +{ + if (!vcli) + return; - mutex_lock(&vinstr_ctx->lock); + kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli); + kfree(vcli->dump_bufs_meta); + kbase_hwcnt_dump_buffer_array_free(&vcli->dump_bufs); + kbase_hwcnt_enable_map_free(&vcli->enable_map); + kfree(vcli); +} - hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap); - vinstr_ctx->reprogram = true; +/** + * kbasep_vinstr_client_create() - Create a vinstr client. Does not attach to + * the vinstr context. + * @vctx: Non-NULL pointer to vinstr context. + * @setup: Non-NULL pointer to hardware counter ioctl setup structure. + * setup->buffer_count must not be 0. + * @out_vcli: Non-NULL pointer to where created client will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_create( + struct kbase_vinstr_context *vctx, + struct kbase_ioctl_hwcnt_reader_setup *setup, + struct kbase_vinstr_client **out_vcli) +{ + int errcode; + struct kbase_vinstr_client *vcli; + struct kbase_hwcnt_physical_enable_map phys_em; - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended); - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + WARN_ON(!vctx); + WARN_ON(!setup); + WARN_ON(setup->buffer_count == 0); - /* If this is the first client, create the vinstr kbase - * context. This context is permanently resident until the - * last client exits. */ - if (!clients_present) { - hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap); - if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0) - goto error; + vcli = kzalloc(sizeof(*vcli), GFP_KERNEL); + if (!vcli) + return -ENOMEM; - vinstr_ctx->reprogram = false; - cli->pending = false; - } + vcli->vctx = vctx; - /* The GPU resets the counter block every time there is a request - * to dump it. We need a per client kernel buffer for accumulating - * the counters. */ - cli->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); - cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL); - if (!cli->accum_buffer) + errcode = kbase_hwcnt_enable_map_alloc( + vctx->metadata, &vcli->enable_map); + if (errcode) goto error; - /* Prepare buffers. */ - if (cli->buffer_count) { - int *fd = (int *)argp; - size_t tmp; - - /* Allocate area for buffers metadata storage. */ - tmp = sizeof(struct kbase_hwcnt_reader_metadata) * - cli->buffer_count; - cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL); - if (!cli->dump_buffers_meta) - goto error; - - /* Allocate required number of dumping buffers. */ - cli->dump_buffers = (char *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(cli->dump_size * cli->buffer_count)); - if (!cli->dump_buffers) - goto error; - - /* Create descriptor for user-kernel data exchange. */ - *fd = anon_inode_getfd( - "[mali_vinstr_desc]", - &vinstr_client_fops, - cli, - O_RDONLY | O_CLOEXEC); - if (0 > *fd) - goto error; - } else if (kernel_buffer) { - cli->kernel_buffer = kernel_buffer; - } else { - cli->legacy_buffer = (void __user *)argp; - } + phys_em.jm_bm = setup->jm_bm; + phys_em.shader_bm = setup->shader_bm; + phys_em.tiler_bm = setup->tiler_bm; + phys_em.mmu_l2_bm = setup->mmu_l2_bm; + kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em); - atomic_set(&cli->read_idx, 0); - atomic_set(&cli->meta_idx, 0); - atomic_set(&cli->write_idx, 0); - init_waitqueue_head(&cli->waitq); + errcode = kbase_hwcnt_dump_buffer_array_alloc( + vctx->metadata, setup->buffer_count, &vcli->dump_bufs); + if (errcode) + goto error; - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->nclients++; - list_add(&cli->list, &vinstr_ctx->idle_clients); - kbase_vinstr_update_suspend(vinstr_ctx); - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + errcode = -ENOMEM; + vcli->dump_bufs_meta = kmalloc_array( + setup->buffer_count, sizeof(*vcli->dump_bufs_meta), GFP_KERNEL); + if (!vcli->dump_bufs_meta) + goto error; - mutex_unlock(&vinstr_ctx->lock); + errcode = kbase_hwcnt_virtualizer_client_create( + vctx->hvirt, &vcli->enable_map, &vcli->hvcli); + if (errcode) + goto error; - return cli; + init_waitqueue_head(&vcli->waitq); + *out_vcli = vcli; + return 0; error: - kfree(cli->dump_buffers_meta); - if (cli->dump_buffers) - free_pages( - (unsigned long)cli->dump_buffers, - get_order(cli->dump_size * cli->buffer_count)); - kfree(cli->accum_buffer); - if (!clients_present && vinstr_ctx->kctx) { - thread = vinstr_ctx->thread; - kbasep_vinstr_destroy_kctx(vinstr_ctx); - } - kfree(cli); - - mutex_unlock(&vinstr_ctx->lock); - - /* Thread must be stopped after lock is released. */ - if (thread) - kthread_stop(thread); - - return NULL; + kbasep_vinstr_client_destroy(vcli); + return errcode; } -void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli) +int kbase_vinstr_init( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_vinstr_context **out_vctx) { - struct kbase_vinstr_context *vinstr_ctx; - struct kbase_vinstr_client *iter, *tmp; - struct task_struct *thread = NULL; - u32 zerobitmap[4] = { 0 }; - int cli_found = 0; - unsigned long flags; - bool clients_present; - - KBASE_DEBUG_ASSERT(cli); - vinstr_ctx = cli->vinstr_ctx; - KBASE_DEBUG_ASSERT(vinstr_ctx); - - mutex_lock(&vinstr_ctx->lock); - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - - list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) { - if (iter == cli) { - cli_found = 1; - break; - } - } - if (!cli_found) { - list_for_each_entry_safe( - iter, tmp, &vinstr_ctx->waiting_clients, list) { - if (iter == cli) { - cli_found = 1; - break; - } - } - } - if (!cli_found) { - list_for_each_entry_safe( - iter, tmp, &vinstr_ctx->suspended_clients, list) { - if (iter == cli) { - cli_found = 1; - break; - } - } - } - KBASE_DEBUG_ASSERT(cli_found); + struct kbase_vinstr_context *vctx; + const struct kbase_hwcnt_metadata *metadata; - if (cli_found) { - vinstr_ctx->reprogram = true; - list_del(&iter->list); - } - - if (!cli->suspended) - vinstr_ctx->nclients--; - else - vinstr_ctx->nclients_suspended--; - - kbase_vinstr_update_suspend(vinstr_ctx); - - clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended); - - /* Rebuild context bitmap now that the client has detached */ - hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap); - list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) - hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); - list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) - hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); - list_for_each_entry(iter, &vinstr_ctx->suspended_clients, list) - hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); + if (!hvirt || !out_vctx) + return -EINVAL; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + metadata = kbase_hwcnt_virtualizer_metadata(hvirt); + if (!metadata) + return -EINVAL; - kfree(cli->dump_buffers_meta); - free_pages( - (unsigned long)cli->dump_buffers, - get_order(cli->dump_size * cli->buffer_count)); - kfree(cli->accum_buffer); - kfree(cli); + vctx = kzalloc(sizeof(*vctx), GFP_KERNEL); + if (!vctx) + return -ENOMEM; - if (!clients_present) { - thread = vinstr_ctx->thread; - kbasep_vinstr_destroy_kctx(vinstr_ctx); - } + vctx->hvirt = hvirt; + vctx->metadata = metadata; - mutex_unlock(&vinstr_ctx->lock); + mutex_init(&vctx->lock); + INIT_LIST_HEAD(&vctx->clients); + hrtimer_init(&vctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + vctx->dump_timer.function = kbasep_vinstr_dump_timer; + INIT_WORK(&vctx->dump_work, kbasep_vinstr_dump_worker); - /* Thread must be stopped after lock is released. */ - if (thread) - kthread_stop(thread); + *out_vctx = vctx; + return 0; } -KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client); -/* Accumulate counters in the dump buffer */ -static void accum_dump_buffer(void *dst, void *src, size_t dump_size) +void kbase_vinstr_term(struct kbase_vinstr_context *vctx) { - size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; - u32 *d = dst; - u32 *s = src; - size_t i, j; - - for (i = 0; i < dump_size; i += block_size) { - /* skip over the header block */ - d += NR_BYTES_PER_HDR / sizeof(u32); - s += NR_BYTES_PER_HDR / sizeof(u32); - for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) { - /* saturate result if addition would result in wraparound */ - if (U32_MAX - *d < *s) - *d = U32_MAX; - else - *d += *s; - d++; - s++; - } - } -} + if (!vctx) + return; -/* This is the Midgard v4 patch function. It copies the headers for each - * of the defined blocks from the master kernel buffer and then patches up - * the performance counter enable mask for each of the blocks to exclude - * counters that were not requested by the client. */ -static void patch_dump_buffer_hdr_v4( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_vinstr_client *cli) -{ - u32 *mask; - u8 *dst = cli->accum_buffer; - u8 *src = vinstr_ctx->cpu_va; - u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups; - size_t i, group_size, group; - enum { - SC0_BASE = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - SC1_BASE = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - SC2_BASE = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - SC3_BASE = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - TILER_BASE = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - JM_BASE = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT - }; - - group_size = NR_CNT_BLOCKS_PER_GROUP * - NR_CNT_PER_BLOCK * - NR_BYTES_PER_CNT; - for (i = 0; i < nr_cg; i++) { - group = i * group_size; - /* copy shader core headers */ - memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE], - NR_BYTES_PER_HDR); - memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE], - NR_BYTES_PER_HDR); - memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE], - NR_BYTES_PER_HDR); - memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE], - NR_BYTES_PER_HDR); - - /* copy tiler header */ - memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE], - NR_BYTES_PER_HDR); - - /* copy mmu header */ - memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE], - NR_BYTES_PER_HDR); - - /* copy job manager header */ - memcpy(&dst[group + JM_BASE], &src[group + JM_BASE], - NR_BYTES_PER_HDR); - - /* patch the shader core enable mask */ - mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[SHADER_HWCNT_BM]; - mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[SHADER_HWCNT_BM]; - mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[SHADER_HWCNT_BM]; - mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[SHADER_HWCNT_BM]; - - /* patch the tiler core enable mask */ - mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[TILER_HWCNT_BM]; - - /* patch the mmu core enable mask */ - mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[MMU_L2_HWCNT_BM]; - - /* patch the job manager enable mask */ - mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[JM_HWCNT_BM]; - } -} + cancel_work_sync(&vctx->dump_work); -/* This is the Midgard v5 patch function. It copies the headers for each - * of the defined blocks from the master kernel buffer and then patches up - * the performance counter enable mask for each of the blocks to exclude - * counters that were not requested by the client. */ -static void patch_dump_buffer_hdr_v5( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_vinstr_client *cli) -{ - struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev; - u32 i, nr_l2; - u64 core_mask; - u32 *mask; - u8 *dst = cli->accum_buffer; - u8 *src = vinstr_ctx->cpu_va; - size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; - - /* copy and patch job manager header */ - memcpy(dst, src, NR_BYTES_PER_HDR); - mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[JM_HWCNT_BM]; - dst += block_size; - src += block_size; - - /* copy and patch tiler header */ - memcpy(dst, src, NR_BYTES_PER_HDR); - mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[TILER_HWCNT_BM]; - dst += block_size; - src += block_size; - - /* copy and patch MMU/L2C headers */ - nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices; - for (i = 0; i < nr_l2; i++) { - memcpy(dst, src, NR_BYTES_PER_HDR); - mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[MMU_L2_HWCNT_BM]; - dst += block_size; - src += block_size; - } + /* Non-zero client count implies client leak */ + if (WARN_ON(vctx->client_count != 0)) { + struct kbase_vinstr_client *pos, *n; - /* copy and patch shader core headers */ - core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; - while (0ull != core_mask) { - memcpy(dst, src, NR_BYTES_PER_HDR); - if (0ull != (core_mask & 1ull)) { - /* if block is not reserved update header */ - mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[SHADER_HWCNT_BM]; + list_for_each_entry_safe(pos, n, &vctx->clients, node) { + list_del(&pos->node); + vctx->client_count--; + kbasep_vinstr_client_destroy(pos); } - dst += block_size; - src += block_size; - - core_mask >>= 1; } + + WARN_ON(vctx->client_count != 0); + kfree(vctx); } -/** - * accum_clients - accumulate dumped hw counters for all known clients - * @vinstr_ctx: vinstr context - */ -static void accum_clients(struct kbase_vinstr_context *vinstr_ctx) +void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx) { - struct kbase_vinstr_client *iter; - int v4 = 0; + if (WARN_ON(!vctx)) + return; -#ifndef CONFIG_MALI_NO_MALI - v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4); -#endif + mutex_lock(&vctx->lock); - list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) { - /* Don't bother accumulating clients whose hwcnt requests - * have not yet been honoured. */ - if (iter->pending) - continue; - if (v4) - patch_dump_buffer_hdr_v4(vinstr_ctx, iter); - else - patch_dump_buffer_hdr_v5(vinstr_ctx, iter); - accum_dump_buffer( - iter->accum_buffer, - vinstr_ctx->cpu_va, - iter->dump_size); - } - list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) { - /* Don't bother accumulating clients whose hwcnt requests - * have not yet been honoured. */ - if (iter->pending) - continue; - if (v4) - patch_dump_buffer_hdr_v4(vinstr_ctx, iter); - else - patch_dump_buffer_hdr_v5(vinstr_ctx, iter); - accum_dump_buffer( - iter->accum_buffer, - vinstr_ctx->cpu_va, - iter->dump_size); - } -} + if (!WARN_ON(vctx->suspend_count == SIZE_MAX)) + vctx->suspend_count++; -/*****************************************************************************/ + mutex_unlock(&vctx->lock); -/** - * kbasep_vinstr_get_timestamp - return timestamp - * - * Function returns timestamp value based on raw monotonic timer. Value will - * wrap around zero in case of overflow. - * - * Return: timestamp value - */ -static u64 kbasep_vinstr_get_timestamp(void) -{ - struct timespec ts; - - getrawmonotonic(&ts); - return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec; + /* Always sync cancel the timer and then the worker, regardless of the + * new suspend count. + * + * This ensures concurrent calls to kbase_vinstr_suspend() always block + * until vinstr is fully suspended. + * + * The timer is cancelled before the worker, as the timer + * unconditionally re-enqueues the worker, but the worker checks the + * suspend_count that we just incremented before rescheduling the timer. + * + * Therefore if we cancel the worker first, the timer might re-enqueue + * the worker before we cancel the timer, but the opposite is not + * possible. + */ + hrtimer_cancel(&vctx->dump_timer); + cancel_work_sync(&vctx->dump_work); } -/** - * kbasep_vinstr_add_dump_request - register client's dumping request - * @cli: requesting client - * @waiting_clients: list of pending dumping requests - */ -static void kbasep_vinstr_add_dump_request( - struct kbase_vinstr_client *cli, - struct list_head *waiting_clients) +void kbase_vinstr_resume(struct kbase_vinstr_context *vctx) { - struct kbase_vinstr_client *tmp; - - if (list_empty(waiting_clients)) { - list_add(&cli->list, waiting_clients); + if (WARN_ON(!vctx)) return; - } - list_for_each_entry(tmp, waiting_clients, list) { - if (tmp->dump_time > cli->dump_time) { - list_add_tail(&cli->list, &tmp->list); - return; - } - } - list_add_tail(&cli->list, waiting_clients); -} -/** - * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level - * dump and accumulate them for known - * clients - * @vinstr_ctx: vinstr context - * @timestamp: pointer where collection timestamp will be recorded - * - * Return: zero on success - */ -static int kbasep_vinstr_collect_and_accumulate( - struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp) -{ - unsigned long flags; - int rcode; + mutex_lock(&vctx->lock); -#ifdef CONFIG_MALI_NO_MALI - /* The dummy model needs the CPU mapping. */ - gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va); -#endif + if (!WARN_ON(vctx->suspend_count == 0)) { + vctx->suspend_count--; - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - if (VINSTR_IDLE != vinstr_ctx->state) { - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - return -EAGAIN; - } else { - vinstr_ctx->state = VINSTR_DUMPING; - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - /* Request HW counters dump. - * Disable preemption to make dump timestamp more accurate. */ - preempt_disable(); - *timestamp = kbasep_vinstr_get_timestamp(); - rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx); - preempt_enable(); - - if (!rcode) - rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx); - WARN_ON(rcode); - - if (!rcode) { - /* Invalidate the kernel buffer before reading from it. - * As the vinstr_ctx->lock is already held by the caller, the - * unmap of kernel buffer cannot take place simultaneously. + /* Last resume, so re-enqueue the worker if we have any periodic + * clients. */ - lockdep_assert_held(&vinstr_ctx->lock); - kbase_sync_mem_regions(vinstr_ctx->kctx, vinstr_ctx->vmap, - KBASE_SYNC_TO_CPU); - } - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - switch (vinstr_ctx->state) { - case VINSTR_SUSPENDING: - schedule_work(&vinstr_ctx->suspend_work); - break; - case VINSTR_DUMPING: - vinstr_ctx->state = VINSTR_IDLE; - wake_up_all(&vinstr_ctx->suspend_waitq); - break; - default: - break; - } + if (vctx->suspend_count == 0) { + struct kbase_vinstr_client *pos; + bool has_periodic_clients = false; - /* Accumulate values of collected counters. */ - if (!rcode) - accum_clients(vinstr_ctx); + list_for_each_entry(pos, &vctx->clients, node) { + if (pos->dump_interval_ns != 0) { + has_periodic_clients = true; + break; + } + } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + if (has_periodic_clients) +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &vctx->dump_work); +#else + queue_work(system_highpri_wq, &vctx->dump_work); +#endif + } + } - return rcode; + mutex_unlock(&vctx->lock); } -/** - * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel - * buffer - * @cli: requesting client - * @timestamp: timestamp when counters were collected - * @event_id: id of event that caused triggered counters collection - * - * Return: zero on success - */ -static int kbasep_vinstr_fill_dump_buffer( - struct kbase_vinstr_client *cli, u64 timestamp, - enum base_hwcnt_reader_event event_id) +int kbase_vinstr_hwcnt_reader_setup( + struct kbase_vinstr_context *vctx, + struct kbase_ioctl_hwcnt_reader_setup *setup) { - unsigned int write_idx = atomic_read(&cli->write_idx); - unsigned int read_idx = atomic_read(&cli->read_idx); + int errcode; + int fd; + struct kbase_vinstr_client *vcli = NULL; - struct kbase_hwcnt_reader_metadata *meta; - void *buffer; + if (!vctx || !setup || + (setup->buffer_count == 0) || + (setup->buffer_count > MAX_BUFFER_COUNT)) + return -EINVAL; - /* Check if there is a place to copy HWC block into. */ - if (write_idx - read_idx == cli->buffer_count) - return -1; - write_idx %= cli->buffer_count; - - /* Fill in dump buffer and its metadata. */ - buffer = &cli->dump_buffers[write_idx * cli->dump_size]; - meta = &cli->dump_buffers_meta[write_idx]; - meta->timestamp = timestamp; - meta->event_id = event_id; - meta->buffer_idx = write_idx; - memcpy(buffer, cli->accum_buffer, cli->dump_size); - return 0; -} + errcode = kbasep_vinstr_client_create(vctx, setup, &vcli); + if (errcode) + goto error; -/** - * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer - * allocated in userspace - * @cli: requesting client - * - * Return: zero on success - * - * This is part of legacy ioctl interface. - */ -static int kbasep_vinstr_fill_dump_buffer_legacy( - struct kbase_vinstr_client *cli) -{ - void __user *buffer = cli->legacy_buffer; - int rcode; + errcode = anon_inode_getfd( + "[mali_vinstr_desc]", + &vinstr_client_fops, + vcli, + O_RDONLY | O_CLOEXEC); + if (errcode < 0) + goto error; - /* Copy data to user buffer. */ - rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size); - if (rcode) { - pr_warn("error while copying buffer to user\n"); - return -EFAULT; - } - return 0; -} + fd = errcode; -/** - * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer - * allocated in kernel space - * @cli: requesting client - * - * Return: zero on success - * - * This is part of the kernel client interface. - */ -static int kbasep_vinstr_fill_dump_buffer_kernel( - struct kbase_vinstr_client *cli) -{ - memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size); + /* Add the new client. No need to reschedule worker, as not periodic */ + mutex_lock(&vctx->lock); - return 0; -} + vctx->client_count++; + list_add(&vcli->node, &vctx->clients); -/** - * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst - * @vinstr_ctx: vinstr context - */ -static void kbasep_vinstr_reprogram( - struct kbase_vinstr_context *vinstr_ctx) -{ - unsigned long flags; - bool suspended = false; - - /* Don't enable hardware counters if vinstr is suspended. */ - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - if (VINSTR_IDLE != vinstr_ctx->state) - suspended = true; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - if (suspended) - return; + mutex_unlock(&vctx->lock); - /* Change to suspended state is done while holding vinstr context - * lock. Below code will then no re-enable the instrumentation. */ - - if (vinstr_ctx->reprogram) { - struct kbase_vinstr_client *iter; - - if (!reprogram_hwcnt(vinstr_ctx)) { - vinstr_ctx->reprogram = false; - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - list_for_each_entry( - iter, - &vinstr_ctx->idle_clients, - list) - iter->pending = false; - list_for_each_entry( - iter, - &vinstr_ctx->waiting_clients, - list) - iter->pending = false; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - } - } + return fd; +error: + kbasep_vinstr_client_destroy(vcli); + return errcode; } /** - * kbasep_vinstr_update_client - copy accumulated counters to user readable - * buffer and notify the user - * @cli: requesting client - * @timestamp: timestamp when counters were collected - * @event_id: id of event that caused triggered counters collection + * kbasep_vinstr_hwcnt_reader_buffer_ready() - Check if client has ready + * buffers. + * @cli: Non-NULL pointer to vinstr client. * - * Return: zero on success + * Return: Non-zero if client has at least one dumping buffer filled that was + * not notified to user yet. */ -static int kbasep_vinstr_update_client( - struct kbase_vinstr_client *cli, u64 timestamp, - enum base_hwcnt_reader_event event_id) +static int kbasep_vinstr_hwcnt_reader_buffer_ready( + struct kbase_vinstr_client *cli) { - int rcode = 0; - unsigned long flags; - - /* Copy collected counters to user readable buffer. */ - if (cli->buffer_count) - rcode = kbasep_vinstr_fill_dump_buffer( - cli, timestamp, event_id); - else if (cli->kernel_buffer) - rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli); - else - rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli); - - /* Prepare for next request. */ - memset(cli->accum_buffer, 0, cli->dump_size); - - spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags); - /* Check if client was put to suspend state while it was being updated */ - if (cli->suspended) - rcode = -EINVAL; - spin_unlock_irqrestore(&cli->vinstr_ctx->state_lock, flags); - - if (rcode) - goto exit; - - /* Notify client. Make sure all changes to memory are visible. */ - wmb(); - atomic_inc(&cli->write_idx); - wake_up_interruptible(&cli->waitq); - -exit: - return rcode; + WARN_ON(!cli); + return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); } /** - * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function + * kbasep_vinstr_hwcnt_reader_ioctl_dump() - Dump ioctl command. + * @cli: Non-NULL pointer to vinstr client. * - * @hrtimer: high resolution timer - * - * Return: High resolution timer restart enum. + * Return: 0 on success, else error code. */ -static enum hrtimer_restart kbasep_vinstr_wake_up_callback( - struct hrtimer *hrtimer) +static long kbasep_vinstr_hwcnt_reader_ioctl_dump( + struct kbase_vinstr_client *cli) { - struct kbasep_vinstr_wake_up_timer *timer = - container_of( - hrtimer, - struct kbasep_vinstr_wake_up_timer, - hrtimer); + int errcode; - KBASE_DEBUG_ASSERT(timer); + mutex_lock(&cli->vctx->lock); - atomic_set(&timer->vinstr_ctx->request_pending, 1); - wake_up_all(&timer->vinstr_ctx->waitq); + errcode = kbasep_vinstr_client_dump( + cli, BASE_HWCNT_READER_EVENT_MANUAL); - return HRTIMER_NORESTART; + mutex_unlock(&cli->vctx->lock); + return errcode; } /** - * kbasep_vinstr_service_task - HWC dumping service thread + * kbasep_vinstr_hwcnt_reader_ioctl_clear() - Clear ioctl command. + * @cli: Non-NULL pointer to vinstr client. * - * @data: Pointer to vinstr context structure. - * - * Return: 0 on success; -ENOMEM if timer allocation fails + * Return: 0 on success, else error code. */ -static int kbasep_vinstr_service_task(void *data) +static long kbasep_vinstr_hwcnt_reader_ioctl_clear( + struct kbase_vinstr_client *cli) { - struct kbase_vinstr_context *vinstr_ctx = data; - struct kbasep_vinstr_wake_up_timer *timer; - - KBASE_DEBUG_ASSERT(vinstr_ctx); + int errcode; - timer = kmalloc(sizeof(*timer), GFP_KERNEL); + mutex_lock(&cli->vctx->lock); - if (!timer) { - dev_warn(vinstr_ctx->kbdev->dev, "Timer allocation failed!\n"); - return -ENOMEM; - } - - hrtimer_init(&timer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - - timer->hrtimer.function = kbasep_vinstr_wake_up_callback; - timer->vinstr_ctx = vinstr_ctx; - - while (!kthread_should_stop()) { - struct kbase_vinstr_client *cli = NULL; - struct kbase_vinstr_client *tmp; - int rcode; - unsigned long flags; + errcode = kbasep_vinstr_client_clear(cli); - u64 timestamp = kbasep_vinstr_get_timestamp(); - u64 dump_time = 0; - struct list_head expired_requests; - - /* Hold lock while performing operations on lists of clients. */ - mutex_lock(&vinstr_ctx->lock); - - /* Closing thread must not interact with client requests. */ - if (current == vinstr_ctx->thread) { - atomic_set(&vinstr_ctx->request_pending, 0); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - if (!list_empty(&vinstr_ctx->waiting_clients)) { - cli = list_first_entry( - &vinstr_ctx->waiting_clients, - struct kbase_vinstr_client, - list); - dump_time = cli->dump_time; - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - } - - if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) { - mutex_unlock(&vinstr_ctx->lock); - - /* Sleep until next dumping event or service request. */ - if (cli) { - u64 diff = dump_time - timestamp; - - hrtimer_start( - &timer->hrtimer, - ns_to_ktime(diff), - HRTIMER_MODE_REL); - } - wait_event( - vinstr_ctx->waitq, - atomic_read( - &vinstr_ctx->request_pending) || - kthread_should_stop()); - hrtimer_cancel(&timer->hrtimer); - continue; - } - - rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, - ×tamp); - - INIT_LIST_HEAD(&expired_requests); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - /* Find all expired requests. */ - list_for_each_entry_safe( - cli, - tmp, - &vinstr_ctx->waiting_clients, - list) { - s64 tdiff = - (s64)(timestamp + DUMPING_RESOLUTION) - - (s64)cli->dump_time; - if (tdiff >= 0ll) { - list_del(&cli->list); - list_add(&cli->list, &expired_requests); - } else { - break; - } - } - - /* Fill data for each request found. */ - while (!list_empty(&expired_requests)) { - cli = list_first_entry(&expired_requests, - struct kbase_vinstr_client, list); - - /* Ensure that legacy buffer will not be used from - * this kthread context. */ - BUG_ON(0 == cli->buffer_count); - /* Expect only periodically sampled clients. */ - BUG_ON(0 == cli->dump_interval); - - /* Release the spinlock, as filling the data in client's - * userspace buffer could result in page faults. */ - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - if (!rcode) - kbasep_vinstr_update_client( - cli, - timestamp, - BASE_HWCNT_READER_EVENT_PERIODIC); - spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags); - - /* This client got suspended, move to the next one. */ - if (cli->suspended) - continue; - - /* Set new dumping time. Drop missed probing times. */ - do { - cli->dump_time += cli->dump_interval; - } while (cli->dump_time < timestamp); - - list_del(&cli->list); - kbasep_vinstr_add_dump_request( - cli, - &vinstr_ctx->waiting_clients); - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - /* Reprogram counters set if required. */ - kbasep_vinstr_reprogram(vinstr_ctx); - - mutex_unlock(&vinstr_ctx->lock); - } - - kfree(timer); - - return 0; + mutex_unlock(&cli->vctx->lock); + return errcode; } -/*****************************************************************************/ - /** - * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers - * @cli: pointer to vinstr client structure + * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer() - Get buffer ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @buffer: Non-NULL pointer to userspace buffer. + * @size: Size of buffer. * - * Return: non-zero if client has at least one dumping buffer filled that was - * not notified to user yet - */ -static int kbasep_vinstr_hwcnt_reader_buffer_ready( - struct kbase_vinstr_client *cli) -{ - KBASE_DEBUG_ASSERT(cli); - return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @buffer: pointer to userspace buffer - * @size: size of buffer - * - * Return: zero on success + * Return: 0 on success, else error code. */ static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( - struct kbase_vinstr_client *cli, void __user *buffer, - size_t size) + struct kbase_vinstr_client *cli, + void __user *buffer, + size_t size) { unsigned int meta_idx = atomic_read(&cli->meta_idx); - unsigned int idx = meta_idx % cli->buffer_count; + unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt; - struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx]; + struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx]; /* Metadata sanity check. */ - KBASE_DEBUG_ASSERT(idx == meta->buffer_idx); + WARN_ON(idx != meta->buffer_idx); if (sizeof(struct kbase_hwcnt_reader_metadata) != size) return -EINVAL; @@ -1470,19 +693,20 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( } /** - * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @buffer: pointer to userspace buffer - * @size: size of buffer + * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer() - Put buffer ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @buffer: Non-NULL pointer to userspace buffer. + * @size: Size of buffer. * - * Return: zero on success + * Return: 0 on success, else error code. */ static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( - struct kbase_vinstr_client *cli, void __user *buffer, - size_t size) + struct kbase_vinstr_client *cli, + void __user *buffer, + size_t size) { unsigned int read_idx = atomic_read(&cli->read_idx); - unsigned int idx = read_idx % cli->buffer_count; + unsigned int idx = read_idx % cli->dump_bufs.buf_cnt; struct kbase_hwcnt_reader_metadata meta; @@ -1505,182 +729,126 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( } /** - * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @interval: periodic dumping interval (disable periodic dumping if zero) + * kbasep_vinstr_hwcnt_reader_ioctl_set_interval() - Set interval ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @interval: Periodic dumping interval (disable periodic dumping if 0). * - * Return: zero on success + * Return: 0 always. */ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( - struct kbase_vinstr_client *cli, u32 interval) -{ - struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; - unsigned long flags; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - mutex_lock(&vinstr_ctx->lock); - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - - if (cli->suspended) { - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - mutex_unlock(&vinstr_ctx->lock); - return -ENOMEM; - } - - list_del(&cli->list); - - cli->dump_interval = interval; - - /* If interval is non-zero, enable periodic dumping for this client. */ - if (cli->dump_interval) { - if (DUMPING_RESOLUTION > cli->dump_interval) - cli->dump_interval = DUMPING_RESOLUTION; - cli->dump_time = - kbasep_vinstr_get_timestamp() + cli->dump_interval; - - kbasep_vinstr_add_dump_request( - cli, &vinstr_ctx->waiting_clients); - - atomic_set(&vinstr_ctx->request_pending, 1); - wake_up_all(&vinstr_ctx->waitq); - } else { - list_add(&cli->list, &vinstr_ctx->idle_clients); - } + struct kbase_vinstr_client *cli, + u32 interval) +{ + mutex_lock(&cli->vctx->lock); + + if ((interval != 0) && (interval < DUMP_INTERVAL_MIN_NS)) + interval = DUMP_INTERVAL_MIN_NS; + /* Update the interval, and put in a dummy next dump time */ + cli->dump_interval_ns = interval; + cli->next_dump_time_ns = 0; + + /* + * If it's a periodic client, kick off the worker early to do a proper + * timer reschedule. Return value is ignored, as we don't care if the + * worker is already queued. + */ + if ((interval != 0) && (cli->vctx->suspend_count == 0)) +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &cli->vctx->dump_work); +#else + queue_work(system_highpri_wq, &cli->vctx->dump_work); +#endif - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - mutex_unlock(&vinstr_ctx->lock); + mutex_unlock(&cli->vctx->lock); return 0; } /** - * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id - * @event_id: id of event - * Return: event_mask or zero if event is not supported or maskable - */ -static u32 kbasep_vinstr_hwcnt_reader_event_mask( - enum base_hwcnt_reader_event event_id) -{ - u32 event_mask = 0; - - switch (event_id) { - case BASE_HWCNT_READER_EVENT_PREJOB: - case BASE_HWCNT_READER_EVENT_POSTJOB: - /* These event are maskable. */ - event_mask = (1 << event_id); - break; - - case BASE_HWCNT_READER_EVENT_MANUAL: - case BASE_HWCNT_READER_EVENT_PERIODIC: - /* These event are non-maskable. */ - default: - /* These event are not supported. */ - break; - } - - return event_mask; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @event_id: id of event to enable + * kbasep_vinstr_hwcnt_reader_ioctl_enable_event() - Enable event ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @event_id: ID of event to enable. * - * Return: zero on success + * Return: 0 always. */ static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( struct kbase_vinstr_client *cli, enum base_hwcnt_reader_event event_id) { - struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; - u32 event_mask; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id); - if (!event_mask) - return -EINVAL; - - mutex_lock(&vinstr_ctx->lock); - cli->event_mask |= event_mask; - mutex_unlock(&vinstr_ctx->lock); - + /* No-op, as events aren't supported */ return 0; } /** - * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @event_id: id of event to disable + * kbasep_vinstr_hwcnt_reader_ioctl_disable_event() - Disable event ioctl + * command. + * @cli: Non-NULL pointer to vinstr client. + * @event_id: ID of event to disable. * - * Return: zero on success + * Return: 0 always. */ static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( - struct kbase_vinstr_client *cli, - enum base_hwcnt_reader_event event_id) + struct kbase_vinstr_client *cli, + enum base_hwcnt_reader_event event_id) { - struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; - u32 event_mask; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id); - if (!event_mask) - return -EINVAL; - - mutex_lock(&vinstr_ctx->lock); - cli->event_mask &= ~event_mask; - mutex_unlock(&vinstr_ctx->lock); - + /* No-op, as events aren't supported */ return 0; } /** - * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @hwver: pointer to user buffer where hw version will be stored + * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver() - Get HW version ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @hwver: Non-NULL pointer to user buffer where HW version will be stored. * - * Return: zero on success + * Return: 0 on success, else error code. */ static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( - struct kbase_vinstr_client *cli, u32 __user *hwver) + struct kbase_vinstr_client *cli, + u32 __user *hwver) { -#ifndef CONFIG_MALI_NO_MALI - struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; -#endif - - u32 ver = 5; + u32 ver = 0; + const enum kbase_hwcnt_gpu_group_type type = + kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0); -#ifndef CONFIG_MALI_NO_MALI - KBASE_DEBUG_ASSERT(vinstr_ctx); - if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4)) + switch (type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: ver = 4; -#endif + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + ver = 5; + break; + default: + WARN_ON(true); + } - return put_user(ver, hwver); + if (ver != 0) { + return put_user(ver, hwver); + } else { + return -EINVAL; + } } /** - * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl - * @filp: pointer to file structure - * @cmd: user command - * @arg: command's argument + * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl. + * @filp: Non-NULL pointer to file structure. + * @cmd: User command. + * @arg: Command's argument. * - * Return: zero on success + * Return: 0 on success, else error code. */ -static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp, - unsigned int cmd, unsigned long arg) +static long kbasep_vinstr_hwcnt_reader_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long arg) { - long rcode = 0; + long rcode; struct kbase_vinstr_client *cli; - KBASE_DEBUG_ASSERT(filp); + if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER)) + return -EINVAL; cli = filp->private_data; - KBASE_DEBUG_ASSERT(cli); - - if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd))) + if (!cli) return -EINVAL; switch (cmd) { @@ -1689,42 +857,41 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp, break; case KBASE_HWCNT_READER_GET_HWVER: rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( - cli, (u32 __user *)arg); + cli, (u32 __user *)arg); break; case KBASE_HWCNT_READER_GET_BUFFER_SIZE: - KBASE_DEBUG_ASSERT(cli->vinstr_ctx); rcode = put_user( - (u32)cli->vinstr_ctx->dump_size, - (u32 __user *)arg); + (u32)cli->vctx->metadata->dump_buf_bytes, + (u32 __user *)arg); break; case KBASE_HWCNT_READER_DUMP: - rcode = kbase_vinstr_hwc_dump( - cli, BASE_HWCNT_READER_EVENT_MANUAL); + rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli); break; case KBASE_HWCNT_READER_CLEAR: - rcode = kbase_vinstr_hwc_clear(cli); + rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli); break; case KBASE_HWCNT_READER_GET_BUFFER: rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( - cli, (void __user *)arg, _IOC_SIZE(cmd)); + cli, (void __user *)arg, _IOC_SIZE(cmd)); break; case KBASE_HWCNT_READER_PUT_BUFFER: rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( - cli, (void __user *)arg, _IOC_SIZE(cmd)); + cli, (void __user *)arg, _IOC_SIZE(cmd)); break; case KBASE_HWCNT_READER_SET_INTERVAL: rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( - cli, (u32)arg); + cli, (u32)arg); break; case KBASE_HWCNT_READER_ENABLE_EVENT: rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( - cli, (enum base_hwcnt_reader_event)arg); + cli, (enum base_hwcnt_reader_event)arg); break; case KBASE_HWCNT_READER_DISABLE_EVENT: rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( - cli, (enum base_hwcnt_reader_event)arg); + cli, (enum base_hwcnt_reader_event)arg); break; default: + WARN_ON(true); rcode = -EINVAL; break; } @@ -1733,21 +900,25 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp, } /** - * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll - * @filp: pointer to file structure - * @wait: pointer to poll table - * Return: POLLIN if data can be read without blocking, otherwise zero + * kbasep_vinstr_hwcnt_reader_poll() - hwcnt reader's poll. + * @filp: Non-NULL pointer to file structure. + * @wait: Non-NULL pointer to poll table. + * + * Return: POLLIN if data can be read without blocking, 0 if data can not be + * read without blocking, else error code. */ -static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp, - poll_table *wait) +static unsigned int kbasep_vinstr_hwcnt_reader_poll( + struct file *filp, + poll_table *wait) { struct kbase_vinstr_client *cli; - KBASE_DEBUG_ASSERT(filp); - KBASE_DEBUG_ASSERT(wait); + if (!filp || !wait) + return -EINVAL; cli = filp->private_data; - KBASE_DEBUG_ASSERT(cli); + if (!cli) + return -EINVAL; poll_wait(filp, &cli->waitq, wait); if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) @@ -1756,25 +927,28 @@ static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp, } /** - * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap - * @filp: pointer to file structure - * @vma: pointer to vma structure - * Return: zero on success + * kbasep_vinstr_hwcnt_reader_mmap() - hwcnt reader's mmap. + * @filp: Non-NULL pointer to file structure. + * @vma: Non-NULL pointer to vma structure. + * + * Return: 0 on success, else error code. */ -static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, - struct vm_area_struct *vma) +static int kbasep_vinstr_hwcnt_reader_mmap( + struct file *filp, + struct vm_area_struct *vma) { struct kbase_vinstr_client *cli; - unsigned long size, addr, pfn, offset; - unsigned long vm_size = vma->vm_end - vma->vm_start; + unsigned long vm_size, size, addr, pfn, offset; - KBASE_DEBUG_ASSERT(filp); - KBASE_DEBUG_ASSERT(vma); + if (!filp || !vma) + return -EINVAL; cli = filp->private_data; - KBASE_DEBUG_ASSERT(cli); + if (!cli) + return -EINVAL; - size = cli->buffer_count * cli->dump_size; + vm_size = vma->vm_end - vma->vm_start; + size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes; if (vma->vm_pgoff > (size >> PAGE_SHIFT)) return -EINVAL; @@ -1783,579 +957,33 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, if (vm_size > size - offset) return -EINVAL; - addr = __pa((unsigned long)cli->dump_buffers + offset); + addr = __pa(cli->dump_bufs.page_addr + offset); pfn = addr >> PAGE_SHIFT; return remap_pfn_range( - vma, - vma->vm_start, - pfn, - vm_size, - vma->vm_page_prot); + vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot); } /** - * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release - * @inode: pointer to inode structure - * @filp: pointer to file structure - * Return always return zero + * kbasep_vinstr_hwcnt_reader_release() - hwcnt reader's release. + * @inode: Non-NULL pointer to inode structure. + * @filp: Non-NULL pointer to file structure. + * + * Return: 0 always. */ static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, - struct file *filp) -{ - struct kbase_vinstr_client *cli; - - KBASE_DEBUG_ASSERT(inode); - KBASE_DEBUG_ASSERT(filp); - - cli = filp->private_data; - KBASE_DEBUG_ASSERT(cli); - - kbase_vinstr_detach_client(cli); - return 0; -} - -/*****************************************************************************/ - -/** - * kbasep_vinstr_kick_scheduler - trigger scheduler cycle - * @kbdev: pointer to kbase device structure - */ -static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - unsigned long flags; - - down(&js_devdata->schedule_sem); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_backend_slot_update(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - up(&js_devdata->schedule_sem); -} - -/** - * kbasep_vinstr_suspend_worker - worker suspending vinstr module - * @data: pointer to work structure - */ -static void kbasep_vinstr_suspend_worker(struct work_struct *data) -{ - struct kbase_vinstr_context *vinstr_ctx; - unsigned long flags; - - vinstr_ctx = container_of(data, struct kbase_vinstr_context, - suspend_work); - - mutex_lock(&vinstr_ctx->lock); - - if (vinstr_ctx->kctx) - disable_hwcnt(vinstr_ctx); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->state = VINSTR_SUSPENDED; - wake_up_all(&vinstr_ctx->suspend_waitq); - - if (vinstr_ctx->need_resume) { - vinstr_ctx->need_resume = false; - vinstr_ctx->state = VINSTR_RESUMING; - schedule_work(&vinstr_ctx->resume_work); - - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - mutex_unlock(&vinstr_ctx->lock); - } else { - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - mutex_unlock(&vinstr_ctx->lock); - - /* Kick GPU scheduler to allow entering protected mode. - * This must happen after vinstr was suspended. - */ - kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); - } -} - -/** - * kbasep_vinstr_resume_worker - worker resuming vinstr module - * @data: pointer to work structure - */ -static void kbasep_vinstr_resume_worker(struct work_struct *data) + struct file *filp) { - struct kbase_vinstr_context *vinstr_ctx; - unsigned long flags; - - vinstr_ctx = container_of(data, struct kbase_vinstr_context, - resume_work); - - mutex_lock(&vinstr_ctx->lock); - - if (vinstr_ctx->kctx) - enable_hwcnt(vinstr_ctx); + struct kbase_vinstr_client *vcli = filp->private_data; - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->state = VINSTR_IDLE; - wake_up_all(&vinstr_ctx->suspend_waitq); + mutex_lock(&vcli->vctx->lock); - if (vinstr_ctx->need_suspend) { - vinstr_ctx->need_suspend = false; - vinstr_ctx->state = VINSTR_SUSPENDING; - schedule_work(&vinstr_ctx->suspend_work); + vcli->vctx->client_count--; + list_del(&vcli->node); - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + mutex_unlock(&vcli->vctx->lock); - mutex_unlock(&vinstr_ctx->lock); - } else { - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - mutex_unlock(&vinstr_ctx->lock); - - /* Kick GPU scheduler to allow entering protected mode. - * Note that scheduler state machine might requested re-entry to - * protected mode before vinstr was resumed. - * This must happen after vinstr was release. - */ - kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); - } -} - -/*****************************************************************************/ - -struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) -{ - struct kbase_vinstr_context *vinstr_ctx; - - vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL); - if (!vinstr_ctx) - return NULL; - - INIT_LIST_HEAD(&vinstr_ctx->idle_clients); - INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); - INIT_LIST_HEAD(&vinstr_ctx->suspended_clients); - mutex_init(&vinstr_ctx->lock); - spin_lock_init(&vinstr_ctx->state_lock); - vinstr_ctx->kbdev = kbdev; - vinstr_ctx->thread = NULL; - vinstr_ctx->state = VINSTR_IDLE; - vinstr_ctx->suspend_cnt = 0; - INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker); - INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker); - init_waitqueue_head(&vinstr_ctx->suspend_waitq); - - atomic_set(&vinstr_ctx->request_pending, 0); - init_waitqueue_head(&vinstr_ctx->waitq); - - return vinstr_ctx; -} - -void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx) -{ - struct kbase_vinstr_client *cli; - - /* Stop service thread first. */ - if (vinstr_ctx->thread) - kthread_stop(vinstr_ctx->thread); - - /* Wait for workers. */ - flush_work(&vinstr_ctx->suspend_work); - flush_work(&vinstr_ctx->resume_work); - - while (1) { - struct list_head *list = &vinstr_ctx->idle_clients; - - if (list_empty(list)) { - list = &vinstr_ctx->waiting_clients; - if (list_empty(list)) { - list = &vinstr_ctx->suspended_clients; - if (list_empty(list)) - break; - } - } - - cli = list_first_entry(list, struct kbase_vinstr_client, list); - list_del(&cli->list); - if (!cli->suspended) - vinstr_ctx->nclients--; - else - vinstr_ctx->nclients_suspended--; - kfree(cli->accum_buffer); - kfree(cli); - } - KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients); - KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients_suspended); - if (vinstr_ctx->kctx) - kbasep_vinstr_destroy_kctx(vinstr_ctx); - kfree(vinstr_ctx); -} - -int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx, - struct kbase_ioctl_hwcnt_reader_setup *setup) -{ - struct kbase_vinstr_client *cli; - u32 bitmap[4]; - int fd; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - KBASE_DEBUG_ASSERT(setup); - KBASE_DEBUG_ASSERT(setup->buffer_count); - - bitmap[SHADER_HWCNT_BM] = setup->shader_bm; - bitmap[TILER_HWCNT_BM] = setup->tiler_bm; - bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; - bitmap[JM_HWCNT_BM] = setup->jm_bm; - - cli = kbasep_vinstr_attach_client( - vinstr_ctx, - setup->buffer_count, - bitmap, - &fd, - NULL); - - if (!cli) - return -ENOMEM; - - kbase_vinstr_wait_for_ready(vinstr_ctx); - return fd; -} - -int kbase_vinstr_legacy_hwc_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_vinstr_client **cli, - struct kbase_ioctl_hwcnt_enable *enable) -{ - KBASE_DEBUG_ASSERT(vinstr_ctx); - KBASE_DEBUG_ASSERT(enable); - KBASE_DEBUG_ASSERT(cli); - - if (enable->dump_buffer) { - u32 bitmap[4]; - - bitmap[SHADER_HWCNT_BM] = enable->shader_bm; - bitmap[TILER_HWCNT_BM] = enable->tiler_bm; - bitmap[MMU_L2_HWCNT_BM] = enable->mmu_l2_bm; - bitmap[JM_HWCNT_BM] = enable->jm_bm; - - if (*cli) - return -EBUSY; - - *cli = kbasep_vinstr_attach_client( - vinstr_ctx, - 0, - bitmap, - (void *)(uintptr_t)enable->dump_buffer, - NULL); - - if (!(*cli)) - return -ENOMEM; - - kbase_vinstr_wait_for_ready(vinstr_ctx); - } else { - if (!*cli) - return -EINVAL; - - kbase_vinstr_detach_client(*cli); - *cli = NULL; - } + kbasep_vinstr_client_destroy(vcli); return 0; } - -struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_ioctl_hwcnt_reader_setup *setup, - void *kernel_buffer) -{ - struct kbase_vinstr_client *kernel_client; - u32 bitmap[4]; - - if (!vinstr_ctx || !setup || !kernel_buffer) - return NULL; - - bitmap[SHADER_HWCNT_BM] = setup->shader_bm; - bitmap[TILER_HWCNT_BM] = setup->tiler_bm; - bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; - bitmap[JM_HWCNT_BM] = setup->jm_bm; - - kernel_client = kbasep_vinstr_attach_client( - vinstr_ctx, - 0, - bitmap, - NULL, - kernel_buffer); - - if (kernel_client) - kbase_vinstr_wait_for_ready(vinstr_ctx); - - return kernel_client; -} -KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup); - -int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli, - enum base_hwcnt_reader_event event_id) -{ - int rcode = 0; - struct kbase_vinstr_context *vinstr_ctx; - u64 timestamp; - u32 event_mask; - - if (!cli) - return -EINVAL; - - vinstr_ctx = cli->vinstr_ctx; - KBASE_DEBUG_ASSERT(vinstr_ctx); - - KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT); - event_mask = 1 << event_id; - - mutex_lock(&vinstr_ctx->lock); - - if (event_mask & cli->event_mask) { - rcode = kbasep_vinstr_collect_and_accumulate( - vinstr_ctx, - ×tamp); - if (rcode) - goto exit; - - rcode = kbasep_vinstr_update_client(cli, timestamp, event_id); - if (rcode) - goto exit; - - kbasep_vinstr_reprogram(vinstr_ctx); - } - -exit: - mutex_unlock(&vinstr_ctx->lock); - - return rcode; -} -KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump); - -int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli) -{ - struct kbase_vinstr_context *vinstr_ctx; - int rcode; - u64 unused; - - if (!cli) - return -EINVAL; - - vinstr_ctx = cli->vinstr_ctx; - KBASE_DEBUG_ASSERT(vinstr_ctx); - - mutex_lock(&vinstr_ctx->lock); - - rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); - if (rcode) - goto exit; - rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx); - if (rcode) - goto exit; - memset(cli->accum_buffer, 0, cli->dump_size); - - kbasep_vinstr_reprogram(vinstr_ctx); - -exit: - mutex_unlock(&vinstr_ctx->lock); - - return rcode; -} - -int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx) -{ - unsigned long flags; - int ret = -EAGAIN; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->forced_suspend = true; - switch (vinstr_ctx->state) { - case VINSTR_SUSPENDED: - vinstr_ctx->suspend_cnt++; - /* overflow shall not happen */ - BUG_ON(0 == vinstr_ctx->suspend_cnt); - ret = 0; - break; - - case VINSTR_IDLE: - if (vinstr_ctx->clients_present) { - vinstr_ctx->state = VINSTR_SUSPENDING; - schedule_work(&vinstr_ctx->suspend_work); - } else { - vinstr_ctx->state = VINSTR_SUSPENDED; - - vinstr_ctx->suspend_cnt++; - /* overflow shall not happen */ - WARN_ON(0 == vinstr_ctx->suspend_cnt); - ret = 0; - } - break; - - case VINSTR_DUMPING: - vinstr_ctx->state = VINSTR_SUSPENDING; - break; - - case VINSTR_RESUMING: - vinstr_ctx->need_suspend = true; - break; - - case VINSTR_SUSPENDING: - break; - - default: - KBASE_DEBUG_ASSERT(0); - break; - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - return ret; -} - -static int kbase_vinstr_is_ready(struct kbase_vinstr_context *vinstr_ctx) -{ - unsigned long flags; - int ret = -EAGAIN; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - switch (vinstr_ctx->state) { - case VINSTR_SUSPENDED: - case VINSTR_RESUMING: - case VINSTR_SUSPENDING: - break; - - case VINSTR_IDLE: - case VINSTR_DUMPING: - ret = 0; - break; - default: - KBASE_DEBUG_ASSERT(0); - break; - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - return ret; -} - -void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx) -{ - wait_event(vinstr_ctx->suspend_waitq, - (0 == kbase_vinstr_try_suspend(vinstr_ctx))); -} - -void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx) -{ - wait_event(vinstr_ctx->suspend_waitq, - (0 == kbase_vinstr_is_ready(vinstr_ctx))); -} -KBASE_EXPORT_TEST_API(kbase_vinstr_wait_for_ready); - -/** - * kbase_vinstr_update_suspend - Update vinstr suspend/resume status depending - * on nclients - * @vinstr_ctx: vinstr context pointer - * - * This function should be called whenever vinstr_ctx->nclients changes. This - * may cause vinstr to be suspended or resumed, depending on the number of - * clients and whether IPA is suspended or not. - */ -static void kbase_vinstr_update_suspend(struct kbase_vinstr_context *vinstr_ctx) -{ - lockdep_assert_held(&vinstr_ctx->state_lock); - - switch (vinstr_ctx->state) { - case VINSTR_SUSPENDED: - if ((vinstr_ctx->nclients) && (0 == vinstr_ctx->suspend_cnt)) { - vinstr_ctx->state = VINSTR_RESUMING; - schedule_work(&vinstr_ctx->resume_work); - } - break; - - case VINSTR_SUSPENDING: - if ((vinstr_ctx->nclients) && (!vinstr_ctx->forced_suspend)) - vinstr_ctx->need_resume = true; - break; - - case VINSTR_IDLE: - if (!vinstr_ctx->nclients) { - vinstr_ctx->state = VINSTR_SUSPENDING; - schedule_work(&vinstr_ctx->suspend_work); - } - break; - - case VINSTR_DUMPING: - if (!vinstr_ctx->nclients) - vinstr_ctx->state = VINSTR_SUSPENDING; - break; - - case VINSTR_RESUMING: - if (!vinstr_ctx->nclients) - vinstr_ctx->need_suspend = true; - break; - } -} - -void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx) -{ - unsigned long flags; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state); - if (VINSTR_SUSPENDED == vinstr_ctx->state) { - BUG_ON(0 == vinstr_ctx->suspend_cnt); - vinstr_ctx->suspend_cnt--; - if (0 == vinstr_ctx->suspend_cnt) { - vinstr_ctx->forced_suspend = false; - if (vinstr_ctx->clients_present) { - vinstr_ctx->state = VINSTR_RESUMING; - schedule_work(&vinstr_ctx->resume_work); - } else { - vinstr_ctx->state = VINSTR_IDLE; - } - } - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); -} - -void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client) -{ - struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx; - unsigned long flags; - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - - if (!client->suspended) { - list_del(&client->list); - list_add(&client->list, &vinstr_ctx->suspended_clients); - - vinstr_ctx->nclients--; - vinstr_ctx->nclients_suspended++; - kbase_vinstr_update_suspend(vinstr_ctx); - - client->suspended = true; - } - - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); -} - -void kbase_vinstr_resume_client(struct kbase_vinstr_client *client) -{ - struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx; - unsigned long flags; - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - - if (client->suspended) { - list_del(&client->list); - list_add(&client->list, &vinstr_ctx->idle_clients); - - vinstr_ctx->nclients++; - vinstr_ctx->nclients_suspended--; - kbase_vinstr_update_suspend(vinstr_ctx); - - client->suspended = false; - } - - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); -} diff --git a/mali_kbase/mali_kbase_vinstr.h b/mali_kbase/mali_kbase_vinstr.h index d32799f..81d315f 100644 --- a/mali_kbase/mali_kbase_vinstr.h +++ b/mali_kbase/mali_kbase_vinstr.h @@ -20,163 +20,72 @@ * */ +/* + * Vinstr, used to provide an ioctl for userspace access to periodic hardware + * counters. + */ + #ifndef _KBASE_VINSTR_H_ #define _KBASE_VINSTR_H_ -#include <mali_kbase_hwcnt_reader.h> -#include <mali_kbase_ioctl.h> - -/*****************************************************************************/ - struct kbase_vinstr_context; -struct kbase_vinstr_client; - -/*****************************************************************************/ - -/** - * kbase_vinstr_init() - initialize the vinstr core - * @kbdev: kbase device - * - * Return: pointer to the vinstr context on success or NULL on failure - */ -struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev); - -/** - * kbase_vinstr_term() - terminate the vinstr core - * @vinstr_ctx: vinstr context - */ -void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx); - -/** - * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader - * @vinstr_ctx: vinstr context - * @setup: reader's configuration - * - * Return: file descriptor on success and a (negative) error code otherwise - */ -int kbase_vinstr_hwcnt_reader_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_ioctl_hwcnt_reader_setup *setup); +struct kbase_hwcnt_virtualizer; +struct kbase_ioctl_hwcnt_reader_setup; /** - * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping - * @vinstr_ctx: vinstr context - * @cli: pointer where to store pointer to new vinstr client structure - * @enable: hwc configuration + * kbase_vinstr_init() - Initialise a vinstr context. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @out_vctx: Non-NULL pointer to where the pointer to the created vinstr + * context will be stored on success. * - * Return: zero on success - */ -int kbase_vinstr_legacy_hwc_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_vinstr_client **cli, - struct kbase_ioctl_hwcnt_enable *enable); - -/** - * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side - * client - * @vinstr_ctx: vinstr context - * @setup: reader's configuration - * @kernel_buffer: pointer to dump buffer + * On creation, the suspend count of the context will be 0. * - * setup->buffer_count is not used for kernel side clients. - * - * Return: pointer to client structure, or NULL on failure + * Return: 0 on success, else error code. */ -struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_ioctl_hwcnt_reader_setup *setup, - void *kernel_buffer); +int kbase_vinstr_init( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_vinstr_context **out_vctx); /** - * kbase_vinstr_hwc_dump - issue counter dump for vinstr client - * @cli: pointer to vinstr client - * @event_id: id of event that triggered hwcnt dump - * - * Return: zero on success + * kbase_vinstr_term() - Terminate a vinstr context. + * @vctx: Pointer to the vinstr context to be terminated. */ -int kbase_vinstr_hwc_dump( - struct kbase_vinstr_client *cli, - enum base_hwcnt_reader_event event_id); +void kbase_vinstr_term(struct kbase_vinstr_context *vctx); /** - * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for - * a given kbase context - * @cli: pointer to vinstr client + * kbase_vinstr_suspend() - Increment the suspend count of the context. + * @vctx: Non-NULL pointer to the vinstr context to be suspended. * - * Return: zero on success + * After this function call returns, it is guaranteed that all timers and + * workers in vinstr will be cancelled, and will not be re-triggered until + * after the context has been resumed. In effect, this means no new counter + * dumps will occur for any existing or subsequently added periodic clients. */ -int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli); +void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx); /** - * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context - * @vinstr_ctx: vinstr context - * - * Return: 0 on success, or negative if state change is in progress + * kbase_vinstr_resume() - Decrement the suspend count of the context. + * @vctx: Non-NULL pointer to the vinstr context to be resumed. * - * Warning: This API call is non-generic. It is meant to be used only by - * job scheduler state machine. + * If a call to this function decrements the suspend count from 1 to 0, then + * normal operation of vinstr will be resumed (i.e. counter dumps will once + * again be automatically triggered for all periodic clients). * - * Function initiates vinstr switch to suspended state. Once it was called - * vinstr enters suspending state. If function return non-zero value, it - * indicates that state switch is not complete and function must be called - * again. On state switch vinstr will trigger job scheduler state machine - * cycle. - */ -int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx); - -/** - * kbase_vinstr_suspend - suspends operation of a given vinstr context - * @vinstr_ctx: vinstr context - * - * Function initiates vinstr switch to suspended state. Then it blocks until - * operation is completed. - */ -void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx); - -/** - * kbase_vinstr_wait_for_ready - waits for the vinstr context to get ready - * @vinstr_ctx: vinstr context - * - * Function waits for the vinstr to become ready for dumping. It can be in the - * resuming state after the client was attached but the client currently expects - * that vinstr is ready for dumping immediately post attach. - */ -void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx); - -/** - * kbase_vinstr_resume - resumes operation of a given vinstr context - * @vinstr_ctx: vinstr context - * - * Function can be called only if it was preceded by a successful call + * It is only valid to call this function one time for each prior returned call * to kbase_vinstr_suspend. */ -void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx); +void kbase_vinstr_resume(struct kbase_vinstr_context *vctx); /** - * kbase_vinstr_dump_size - Return required size of dump buffer - * @kbdev: device pointer + * kbase_vinstr_hwcnt_reader_setup() - Set up a new hardware counter reader + * client. + * @vinstr_ctx: Non-NULL pointer to the vinstr context. + * @setup: Non-NULL pointer to the hwcnt reader configuration. * - * Return : buffer size in bytes + * Return: file descriptor on success, else a (negative) error code. */ -size_t kbase_vinstr_dump_size(struct kbase_device *kbdev); - -/** - * kbase_vinstr_detach_client - Detach a client from the vinstr core - * @cli: pointer to vinstr client - */ -void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli); - -/** - * kbase_vinstr_suspend_client - Suspend vinstr client - * @client: pointer to vinstr client - */ -void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client); - -/** - * kbase_vinstr_resume_client - Resume vinstr client - * @client: pointer to vinstr client - */ -void kbase_vinstr_resume_client(struct kbase_vinstr_client *client); +int kbase_vinstr_hwcnt_reader_setup( + struct kbase_vinstr_context *vinstr_ctx, + struct kbase_ioctl_hwcnt_reader_setup *setup); #endif /* _KBASE_VINSTR_H_ */ - diff --git a/mali_kbase/mali_linux_kbase_trace.h b/mali_kbase/mali_linux_kbase_trace.h index 920562e..6c6a8c6 100644 --- a/mali_kbase/mali_linux_kbase_trace.h +++ b/mali_kbase/mali_linux_kbase_trace.h @@ -154,7 +154,6 @@ DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED); DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE); DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET); DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE); -DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX); DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); diff --git a/mali_kbase/mali_malisw.h b/mali_kbase/mali_malisw.h index f17bd5e..3a4db10 100644 --- a/mali_kbase/mali_malisw.h +++ b/mali_kbase/mali_malisw.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -83,15 +83,6 @@ #define CSTD_NOP(...) ((void)#__VA_ARGS__) /** - * Function-like macro for converting a pointer in to a u64 for storing into - * an external data structure. This is commonly used when pairing a 32-bit - * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion - * is complex and a straight cast does not work reliably as pointers are - * often considered as signed. - */ -#define PTR_TO_U64(x) ((uint64_t)((uintptr_t)(x))) - -/** * @hideinitializer * Function-like macro for stringizing a single level macro. * @code @@ -115,22 +106,4 @@ */ #define CSTD_STR2(x) CSTD_STR1(x) -/** - * Specify an assertion value which is evaluated at compile time. Recommended - * usage is specification of a @c static @c INLINE function containing all of - * the assertions thus: - * - * @code - * static INLINE [module]_compile_time_assertions( void ) - * { - * COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) ); - * } - * @endcode - * - * @note Use @c static not @c STATIC. We never want to turn off this @c static - * specification for testing purposes. - */ -#define CSTD_COMPILE_TIME_ASSERT(expr) \ - do { switch (0) { case 0: case (expr):; } } while (false) - #endif /* _MALISW_H_ */ diff --git a/mali_kbase/mali_midg_regmap.h b/mali_kbase/mali_midg_regmap.h index 8d9f7b6..0f03e8d 100644 --- a/mali_kbase/mali_midg_regmap.h +++ b/mali_kbase/mali_midg_regmap.h @@ -217,7 +217,7 @@ #define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ /* JOB IRQ flags */ -#define JOB_IRQ_GLOBAL_IF (1 << 18) /* Global interface interrupt received */ +#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */ #define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ #define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ @@ -381,14 +381,14 @@ /* * Begin TRANSCFG register values */ -#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24) -#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24) -#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24) - -#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28)) -#define AS_TRANSCFG_PTW_SH_OS (2 << 28) -#define AS_TRANSCFG_PTW_SH_IS (3 << 28) - +#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) + +#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) +#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) +#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) +#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) /* * Begin Command Values */ diff --git a/mali_kbase/mali_uk.h b/mali_kbase/mali_uk.h index c81f404..701f390 100644 --- a/mali_kbase/mali_uk.h +++ b/mali_kbase/mali_uk.h @@ -74,68 +74,6 @@ enum uk_client_id { UK_CLIENT_COUNT }; -/** - * Each function callable through the UK interface has a unique number. - * Functions provided by UK clients start from number UK_FUNC_ID. - * Numbers below UK_FUNC_ID are used for internal UK functions. - */ -enum uk_func { - UKP_FUNC_ID_CHECK_VERSION, /**< UKK Core internal function */ - /** - * Each UK client numbers the functions they provide starting from - * number UK_FUNC_ID. This number is then eventually assigned to the - * id field of the union uk_header structure when preparing to make a - * UK call. See your UK client for a list of their function numbers. - */ - UK_FUNC_ID = 512 -}; - -/** - * Arguments for a UK call are stored in a structure. This structure consists - * of a fixed size header and a payload. The header carries a 32-bit number - * identifying the UK function to be called (see uk_func). When the UKK client - * receives this header and executed the requested UK function, it will use - * the same header to store the result of the function in the form of a - * int return code. The size of this structure is such that the - * first member of the payload following the header can be accessed efficiently - * on a 32 and 64-bit kernel and the structure has the same size regardless - * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined - * accordingly in the OS specific mali_uk_os.h header file. - */ -union uk_header { - /** - * 32-bit number identifying the UK function to be called. - * Also see uk_func. - */ - u32 id; - /** - * The int return code returned by the called UK function. - * See the specification of the particular UK function you are - * calling for the meaning of the error codes returned. All - * UK functions return 0 on success. - */ - u32 ret; - /* - * Used to ensure 64-bit alignment of this union. Do not remove. - * This field is used for padding and does not need to be initialized. - */ - u64 sizer; -}; - -/** - * This structure carries a 16-bit major and minor number and is sent along with an internal UK call - * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side. - */ -struct uku_version_check_args { - union uk_header header; - /**< UK call header */ - u16 major; - /**< This field carries the user-side major version on input and the kernel-side major version on output */ - u16 minor; - /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */ - u8 padding[4]; -}; - /** @} end group uk_api */ /** @} *//* end group base_api */ diff --git a/mali_kbase/sconscript b/mali_kbase/sconscript index 01c7589..f9d9c1b 100644 --- a/mali_kbase/sconscript +++ b/mali_kbase/sconscript @@ -50,7 +50,6 @@ make_args = env.kernel_get_config_defines(ret_list = True) + [ 'MALI_KERNEL_TEST_API=%s' % env['debug'], 'MALI_UNIT_TEST=%s' % env['unit'], 'MALI_RELEASE_NAME=%s' % env['mali_release_name'], - 'MALI_MOCK_TEST=%s' % mock_test, 'MALI_CUSTOMER_RELEASE=%s' % env['release'], 'MALI_USE_CSF=%s' % env['csf'], 'MALI_COVERAGE=%s' % env['coverage'], diff --git a/mali_kbase/tests/Mconfig b/mali_kbase/tests/Mconfig index ddd7630..af4e383 100644 --- a/mali_kbase/tests/Mconfig +++ b/mali_kbase/tests/Mconfig @@ -21,6 +21,11 @@ config BUILD_IPA_TESTS default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ default n +config BUILD_IPA_UNIT_TESTS + bool + default y if NO_MALI && BUILD_IPA_TESTS + default n + config BUILD_CSF_TESTS bool default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF diff --git a/mali_kbase/tests/mali_kutf_irq_test/build.bp b/mali_kbase/tests/mali_kutf_irq_test/build.bp index a6669af..66f4eb3 100644 --- a/mali_kbase/tests/mali_kutf_irq_test/build.bp +++ b/mali_kbase/tests/mali_kutf_irq_test/build.bp @@ -21,7 +21,6 @@ bob_kernel_module { "mali_kbase", "kutf", ], - install_group: "IG_tests", enabled: false, base_build_kutf: { enabled: true, diff --git a/mali_kbase/tests/sconscript b/mali_kbase/tests/sconscript index 0bd24a5..ca64e83 100644 --- a/mali_kbase/tests/sconscript +++ b/mali_kbase/tests/sconscript @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010-2011, 2013, 2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2011, 2013, 2017-2018 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -39,6 +39,5 @@ if kutf_env['debug'] == '1': SConscript('kutf_test_runner/sconscript') if env['unit'] == '1': - SConscript('mali_kutf_ipa_test/sconscript') SConscript('mali_kutf_ipa_unit_test/sconscript') SConscript('mali_kutf_vinstr_test/sconscript') |