summaryrefslogtreecommitdiff
path: root/mali_kbase/ipa
diff options
context:
space:
mode:
authorSidath Senanayake <sidaths@google.com>2018-04-27 13:23:04 +0200
committerSidath Senanayake <sidaths@google.com>2018-04-27 13:23:04 +0200
commit3fe808a3e4ba33fa6fc47255b6ec14611e8ef8de (patch)
tree8a23baaae16dae4ca0431e002cb736a1034039c2 /mali_kbase/ipa
parent8946bcdee4c36dbc82b8c2a2abcf9c2f5eab5ae0 (diff)
downloadgpu-3fe808a3e4ba33fa6fc47255b6ec14611e8ef8de.tar.gz
Mali Bifrost DDK r12p0 KMD
Provenance: 875d9aa9b (collaborate/EAC/b_r12p0) BX304L01B-BU-00000-r12p0-01rel0 BX304L06A-BU-00000-r12p0-01rel0 BX304X07X-BU-00000-r12p0-01rel0 Signed-off-by: Sidath Senanayake <sidaths@google.com> Change-Id: Id91cbb43f407e908f71a977fd139ea1e3a4f6b6f
Diffstat (limited to 'mali_kbase/ipa')
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa.c62
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa.h17
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_simple.c4
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c155
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h47
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c22
6 files changed, 160 insertions, 147 deletions
diff --git a/mali_kbase/ipa/mali_kbase_ipa.c b/mali_kbase/ipa/mali_kbase_ipa.c
index db5d9cf..254c1a8 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.c
+++ b/mali_kbase/ipa/mali_kbase_ipa.c
@@ -26,6 +26,7 @@
#include "mali_kbase_ipa.h"
#include "mali_kbase_ipa_debugfs.h"
#include "mali_kbase_ipa_simple.h"
+#include "backend/gpu/mali_kbase_pm_internal.h"
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
#include <linux/pm_opp.h>
@@ -318,14 +319,6 @@ int kbase_ipa_init(struct kbase_device *kbdev)
/* The simple IPA model must *always* be present.*/
ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME);
- if (!ops->do_utilization_scaling_in_framework) {
- dev_err(kbdev->dev,
- "Fallback IPA model %s should not account for utilization\n",
- ops->name);
- err = -EINVAL;
- goto end;
- }
-
default_model = kbase_ipa_init_model(kbdev, ops);
if (!default_model) {
err = -EINVAL;
@@ -545,7 +538,7 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq,
model = kbdev->ipa.fallback_model;
- err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+ err = model->ops->get_dynamic_coeff(model, &power_coeff);
if (!err)
power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
@@ -564,48 +557,63 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq,
return power;
}
-int kbase_get_real_power(struct devfreq *df, u32 *power,
+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
unsigned long freq,
unsigned long voltage)
{
struct kbase_ipa_model *model;
u32 power_coeff = 0;
int err = 0;
- struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+ struct kbasep_pm_metrics diff;
+ u64 total_time;
- mutex_lock(&kbdev->ipa.lock);
+ lockdep_assert_held(&kbdev->ipa.lock);
+
+ kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff);
model = get_current_model(kbdev);
- err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+ err = model->ops->get_dynamic_coeff(model, &power_coeff);
- /* If we switch to protected model between get_current_model() and
- * get_dynamic_coeff(), counter reading could fail. If that happens
- * (unlikely, but possible), revert to the fallback model. */
+ /* If the counter model returns an error (e.g. switching back to
+ * protected mode and failing to read counters, or a counter sample
+ * with too few cycles), revert to the fallback model.
+ */
if (err && model != kbdev->ipa.fallback_model) {
model = kbdev->ipa.fallback_model;
- err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+ err = model->ops->get_dynamic_coeff(model, &power_coeff);
}
if (err)
- goto exit_unlock;
+ return err;
*power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
- if (model->ops->do_utilization_scaling_in_framework) {
- struct devfreq_dev_status *status = &df->last_status;
- unsigned long total_time = max(status->total_time, 1ul);
- u64 busy_time = min(status->busy_time, total_time);
-
- *power = div_u64((u64) *power * (u64) busy_time, total_time);
- }
+ /* time_busy / total_time cannot be >1, so assigning the 64-bit
+ * result of div_u64 to *power cannot overflow.
+ */
+ total_time = diff.time_busy + (u64) diff.time_idle;
+ *power = div_u64(*power * (u64) diff.time_busy,
+ max(total_time, 1ull));
*power += get_static_power_locked(kbdev, model, voltage);
-exit_unlock:
+ return err;
+}
+KBASE_EXPORT_TEST_API(kbase_get_real_power_locked);
+
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+ unsigned long freq,
+ unsigned long voltage)
+{
+ int ret;
+ struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+
+ mutex_lock(&kbdev->ipa.lock);
+ ret = kbase_get_real_power_locked(kbdev, power, freq, voltage);
mutex_unlock(&kbdev->ipa.lock);
- return err;
+ return ret;
}
KBASE_EXPORT_TEST_API(kbase_get_real_power);
diff --git a/mali_kbase/ipa/mali_kbase_ipa.h b/mali_kbase/ipa/mali_kbase_ipa.h
index 59521ad..e215c2c 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.h
+++ b/mali_kbase/ipa/mali_kbase_ipa.h
@@ -96,8 +96,6 @@ struct kbase_ipa_model_ops {
* get_dynamic_coeff() - calculate dynamic power coefficient
* @model: pointer to model
* @coeffp: pointer to return value location
- * @current_freq: frequency the GPU has been running at for the
- * previous sampling period.
*
* Calculate a dynamic power coefficient, with units pW/(Hz V^2), which
* is then scaled by the IPA framework according to the current OPP's
@@ -105,8 +103,7 @@ struct kbase_ipa_model_ops {
*
* Return: 0 on success, or an error code.
*/
- int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp,
- u32 current_freq);
+ int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
/*
* get_static_coeff() - calculate static power coefficient
* @model: pointer to model
@@ -118,11 +115,6 @@ struct kbase_ipa_model_ops {
* Return: 0 on success, or an error code.
*/
int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
- /* If false, the model's get_dynamic_coeff() method accounts for how
- * long the GPU was active over the sample period. If true, the
- * framework will scale the calculated power according to the
- * utilization stats recorded by devfreq in get_real_power(). */
- bool do_utilization_scaling_in_framework;
};
/**
@@ -208,6 +200,13 @@ extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops;
int kbase_get_real_power(struct devfreq *df, u32 *power,
unsigned long freq,
unsigned long voltage);
+
+/* Called by kbase_get_real_power() to invoke the power models.
+ * Must be called with kbdev->ipa.lock held.
+ */
+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
+ unsigned long freq,
+ unsigned long voltage);
#endif /* MALI_UNIT_TEST */
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
diff --git a/mali_kbase/ipa/mali_kbase_ipa_simple.c b/mali_kbase/ipa/mali_kbase_ipa_simple.c
index 7dd2ae2..e684df4 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_simple.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_simple.c
@@ -203,8 +203,7 @@ static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp)
return 0;
}
-static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
- u32 current_freq)
+static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
{
struct kbase_ipa_model_simple_data *model_data =
(struct kbase_ipa_model_simple_data *) model->model_data;
@@ -347,6 +346,5 @@ struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = {
.term = &kbase_simple_power_model_term,
.get_dynamic_coeff = &model_dynamic_coeff,
.get_static_coeff = &model_static_coeff,
- .do_utilization_scaling_in_framework = true,
};
KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops);
diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c
index 3917fb8..4019657 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c
@@ -23,34 +23,13 @@
#include "mali_kbase_ipa_vinstr_common.h"
#include "mali_kbase_ipa_debugfs.h"
-#if MALI_UNIT_TEST
-static ktime_t dummy_time;
+#define DEFAULT_SCALING_FACTOR 5
-/* Intercept calls to the kernel function using a macro */
-#ifdef ktime_get
-#undef ktime_get
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
-#define ktime_get() (READ_ONCE(dummy_time))
-
-void kbase_ipa_set_dummy_time(ktime_t t)
-{
- WRITE_ONCE(dummy_time, t);
-}
-KBASE_EXPORT_TEST_API(kbase_ipa_set_dummy_time);
-#else
-#define ktime_get() (READ_ONCE(dummy_time))
-
-void kbase_ipa_set_dummy_time(ktime_t t)
-{
- WRITE_ONCE(dummy_time, t);
-}
-KBASE_EXPORT_TEST_API(kbase_ipa_set_dummy_time);
-
-#endif
-
-#endif /* MALI_UNIT_TEST */
+/* If the value of GPU_ACTIVE is below this, use the simple model
+ * instead, to avoid extrapolating small amounts of counter data across
+ * large sample periods.
+ */
+#define DEFAULT_MIN_SAMPLE_CYCLES 10000
/**
* read_hwcnt() - read a counter value
@@ -100,10 +79,8 @@ s64 kbase_ipa_sum_all_shader_cores(
core_mask >>= 1;
}
- /* Range: -2^54 < ret < 2^54 */
- ret *= coeff;
-
- return div_s64(ret, 1000000);
+ /* Range: -2^54 < ret * coeff < 2^54 */
+ return ret * coeff;
}
s64 kbase_ipa_single_counter(
@@ -114,10 +91,7 @@ s64 kbase_ipa_single_counter(
const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter);
/* Range: -2^49 < ret < 2^49 */
- const s64 multiplied = (s64) counter_value * (s64) coeff;
-
- /* Range: -2^29 < return < 2^29 */
- return div_s64(multiplied, 1000000);
+ return counter_value * (s64) coeff;
}
/**
@@ -182,7 +156,6 @@ int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
return -1;
}
- model_data->last_sample_read_time = ktime_get();
kbase_vinstr_hwc_clear(model_data->vinstr_cli);
kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active;
@@ -214,21 +187,15 @@ void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
model_data->vinstr_buffer = NULL;
}
-int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
- u32 current_freq)
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
{
struct kbase_ipa_model_vinstr_data *model_data =
(struct kbase_ipa_model_vinstr_data *)model->model_data;
struct kbase_device *kbdev = model_data->kbdev;
s64 energy = 0;
size_t i;
- ktime_t now = ktime_get();
- ktime_t time_since_last_sample =
- ktime_sub(now, model_data->last_sample_read_time);
- /* Range: 2^0 < time_since_last_sample_ms < 2^10 (1-1000ms) */
- s64 time_since_last_sample_ms = ktime_to_ms(time_since_last_sample);
- u64 coeff = 0;
- u64 num_cycles;
+ u64 coeff = 0, coeff_mul = 0;
+ u32 active_cycles;
int err = 0;
if (!kbdev->ipa.vinstr_active)
@@ -239,66 +206,82 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
if (err)
goto err0;
- model_data->last_sample_read_time = now;
+ /* Range: 0 (GPU not used at all), to the max sampling interval, say
+ * 1s, * max GPU frequency (GPU 100% utilized).
+ * 0 <= active_cycles <= 1 * ~2GHz
+ * 0 <= active_cycles < 2^31
+ */
+ active_cycles = model_data->get_active_cycles(model_data);
+
+ if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) {
+ err = -ENODATA;
+ goto err0;
+ }
- /* Range of 'energy' is +/- 2^34 * number of IPA groups, so around
- * -2^38 < energy < 2^38 */
+ /* Range: 1 <= active_cycles < 2^31 */
+ active_cycles = max(1u, active_cycles);
+
+ /* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around
+ * -2^57 < energy < 2^57
+ */
for (i = 0; i < model_data->groups_def_num; i++) {
const struct kbase_ipa_group *group = &model_data->groups_def[i];
- s32 coeff, group_energy;
-
- coeff = model_data->group_values[i];
- group_energy = group->op(model_data, coeff, group->counter_block_offset);
+ s32 coeff = model_data->group_values[i];
+ s64 group_energy = group->op(model_data, coeff,
+ group->counter_block_offset);
energy = kbase_ipa_add_saturate(energy, group_energy);
}
- /* Range: 0 <= coeff < 2^38 */
+ /* Range: 0 <= coeff < 2^57 */
if (energy > 0)
coeff = energy;
- /* Scale by user-specified factor and divide by 1000. But actually
- * cancel the division out, because we want the num_cycles in KHz and
- * don't want to lose precision. */
-
- /* Range: 0 < coeff < 2^53 */
- coeff = coeff * model_data->scaling_factor;
-
- if (time_since_last_sample_ms == 0) {
- time_since_last_sample_ms = 1;
- } else if (time_since_last_sample_ms < 0) {
- err = -ERANGE;
- goto err0;
- }
-
- /* Range: 2^20 < num_cycles < 2^40 mCycles */
- num_cycles = (u64) current_freq * (u64) time_since_last_sample_ms;
- /* Range: 2^10 < num_cycles < 2^30 Cycles */
- num_cycles = div_u64(num_cycles, 1000000);
+ /* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this
+ * can be constrained further: Counter values can only be increased by
+ * a theoretical maximum of about 64k per clock cycle. Beyond this,
+ * we'd have to sample every 1ms to avoid them overflowing at the
+ * lowest clock frequency (say 100MHz). Therefore, we can write the
+ * range of 'coeff' in terms of active_cycles:
+ *
+ * coeff = SUM(coeffN * counterN * num_cores_for_counterN)
+ * coeff <= SUM(coeffN * counterN) * max_num_cores
+ * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores
+ * (substitute max_counter = 2^16 * active_cycles)
+ * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores
+ * coeff <= 2^3 * 2^22 * 2^16 * active_cycles * 2^5
+ * coeff <= 2^46 * active_cycles
+ *
+ * So after the division: 0 <= coeff <= 2^46
+ */
+ coeff = div_u64(coeff, active_cycles);
- /* num_cycles should never be 0 in _normal_ usage (because we expect
- * frequencies on the order of MHz and >10ms polling intervals), but
- * protect against divide-by-zero anyway. */
- if (num_cycles == 0)
- num_cycles = 1;
+ /* Scale by user-specified factor (where unity is 1000).
+ * Range: 0 <= coeff_mul < 2^61
+ */
+ coeff_mul = coeff * model_data->scaling_factor;
- /* Range: 0 < coeff < 2^43 */
- coeff = div_u64(coeff, num_cycles);
+ /* Range: 0 <= coeff_mul < 2^51 */
+ coeff_mul = div_u64(coeff_mul, 1000u);
err0:
/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
- *coeffp = clamp(coeff, (u64) 0, (u64) 1 << 16);
+ *coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16);
return err;
}
int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
- const struct kbase_ipa_group *ipa_groups_def,
- size_t ipa_group_size)
+ const struct kbase_ipa_group *ipa_groups_def,
+ size_t ipa_group_size,
+ kbase_ipa_get_active_cycles_callback get_active_cycles)
{
int err = 0;
size_t i;
struct kbase_ipa_model_vinstr_data *model_data;
+ if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles)
+ return -EINVAL;
+
model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
if (!model_data)
return -ENOMEM;
@@ -306,6 +289,7 @@ int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
model_data->kbdev = model->kbdev;
model_data->groups_def = ipa_groups_def;
model_data->groups_def_num = ipa_group_size;
+ model_data->get_active_cycles = get_active_cycles;
model->model_data = (void *) model_data;
@@ -320,13 +304,20 @@ int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
goto exit;
}
- model_data->scaling_factor = 5;
+ model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
err = kbase_ipa_model_add_param_s32(model, "scale",
&model_data->scaling_factor,
1, false);
if (err)
goto exit;
+ model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
+ err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
+ &model_data->min_sample_cycles,
+ 1, false);
+ if (err)
+ goto exit;
+
err = kbase_ipa_attach_vinstr(model_data);
exit:
diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h
index 18c30fe..c9288e8 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h
+++ b/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h
@@ -38,28 +38,41 @@
#define KBASE_IPA_NR_BYTES_PER_BLOCK \
(KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT)
+struct kbase_ipa_model_vinstr_data;
+
+typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinstr_data *);
+
/**
* struct kbase_ipa_model_vinstr_data - IPA context per device
* @kbdev: pointer to kbase device
* @groups_def: Array of IPA groups.
* @groups_def_num: Number of elements in the array of IPA groups.
+ * @get_active_cycles: Callback to return number of active cycles during
+ * counter sample period
* @vinstr_cli: vinstr client handle
* @vinstr_buffer: buffer to dump hardware counters onto
- * @last_sample_read_time: timestamp of last vinstr buffer read
* @scaling_factor: user-specified power scaling factor. This is
* interpreted as a fraction where the denominator is
* 1000. Range approx 0.0-32.0:
* 0 < scaling_factor < 2^15
+ * @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of
+ * cycles the GPU was working) is less than
+ * min_sample_cycles, the counter model will return an
+ * error, causing the IPA framework to approximate using
+ * the cached simple model results instead. This may be
+ * more accurate than extrapolating using a very small
+ * counter dump.
*/
struct kbase_ipa_model_vinstr_data {
struct kbase_device *kbdev;
s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM];
const struct kbase_ipa_group *groups_def;
size_t groups_def_num;
+ kbase_ipa_get_active_cycles_callback get_active_cycles;
struct kbase_vinstr_client *vinstr_cli;
void *vinstr_buffer;
- ktime_t last_sample_read_time;
s32 scaling_factor;
+ s32 min_sample_cycles;
};
/**
@@ -88,7 +101,7 @@ struct kbase_ipa_group {
* Calculate energy estimation based on hardware counter `counter'
* across all shader cores.
*
- * Return: Sum of counter values. Range: -2^34 < ret < 2^34
+ * Return: Sum of counter values. Range: -2^54 < ret < 2^54
*/
s64 kbase_ipa_sum_all_shader_cores(
struct kbase_ipa_model_vinstr_data *model_data,
@@ -103,7 +116,7 @@ s64 kbase_ipa_sum_all_shader_cores(
*
* Calculate energy estimation based on hardware counter `counter'.
*
- * Return: Counter value. Range: -2^34 < ret < 2^34
+ * Return: Counter value. Range: -2^49 < ret < 2^49
*/
s64 kbase_ipa_single_counter(
struct kbase_ipa_model_vinstr_data *model_data,
@@ -134,9 +147,6 @@ void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
* @model: pointer to instantiated model
* @coeffp: pointer to location where calculated power, in
* pW/(Hz V^2), is stored.
- * @current_freq: frequency the GPU has been running at over the sample
- * period. In Hz. Range: 10 MHz < 1GHz,
- * 2^20 < current_freq < 2^30
*
* This is a GPU-agnostic implementation of the get_dynamic_coeff()
* function of an IPA model. It relies on the model being populated
@@ -144,8 +154,7 @@ void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
*
* Return: 0 on success, or an error code.
*/
-int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
- u32 current_freq);
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp);
/**
* kbase_ipa_vinstr_common_model_init() - initialize ipa power model
@@ -153,6 +162,8 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
* @ipa_groups_def: array of ipa groups which sets coefficients for
* the corresponding counters used in the ipa model
* @ipa_group_size: number of elements in the array @ipa_groups_def
+ * @get_active_cycles: callback to return the number of cycles the GPU was
+ * active during the counter sample period.
*
* This initialization function performs initialization steps common
* for ipa models based on counter values. In each call, the model
@@ -162,8 +173,9 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
* Return: 0 on success, error code otherwise
*/
int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
- const struct kbase_ipa_group *ipa_groups_def,
- size_t ipa_group_size);
+ const struct kbase_ipa_group *ipa_groups_def,
+ size_t ipa_group_size,
+ kbase_ipa_get_active_cycles_callback get_active_cycles);
/**
* kbase_ipa_vinstr_common_model_term() - terminate ipa power model
@@ -174,17 +186,4 @@ int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
*/
void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model);
-#if MALI_UNIT_TEST
-/**
- * kbase_ipa_set_dummy_time() - set a dummy monotonic time value
- * @t: a monotonic time value
- *
- * This is only intended for use in unit tests, to ensure that the kernel time
- * values used by a power model are predictable. Deterministic behavior is
- * necessary to allow validation of the dynamic power values computed by the
- * model.
- */
-void kbase_ipa_set_dummy_time(ktime_t t);
-#endif /* MALI_UNIT_TEST */
-
#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */
diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c b/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c
index 81137ea..7951b74 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c
@@ -164,6 +164,24 @@ static s64 kbase_g7x_jm_single_counter(
return kbase_ipa_single_counter(model_data, coeff, counter);
}
+/**
+ * get_active_cycles() - return the GPU_ACTIVE counter
+ * @model_data: pointer to GPU model data.
+ *
+ * Return: the number of cycles the GPU was active during the counter sampling
+ * period.
+ */
+static u32 kbase_g7x_get_active_cycles(
+ struct kbase_ipa_model_vinstr_data *model_data)
+{
+ u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE);
+
+ /* Counters are only 32-bit, so we can safely multiply by 1 then cast
+ * the 64-bit result back to a u32.
+ */
+ return kbase_ipa_single_counter(model_data, 1, counter);
+}
+
/** Table of IPA group definitions.
*
* For each IPA group, this table defines a function to access the given performance block counter (or counters,
@@ -277,14 +295,14 @@ static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
KBASE_IPA_MAX_GROUP_DEF_NUM); \
return kbase_ipa_vinstr_common_model_init(model, \
ipa_groups_def_ ## gpu, \
- ARRAY_SIZE(ipa_groups_def_ ## gpu)); \
+ ARRAY_SIZE(ipa_groups_def_ ## gpu), \
+ kbase_g7x_get_active_cycles); \
} \
struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
.name = "mali-" #gpu "-power-model", \
.init = kbase_ ## gpu ## _power_model_init, \
.term = kbase_ipa_vinstr_common_model_term, \
.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
- .do_utilization_scaling_in_framework = false \
}; \
KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)