drivers: performance: Add Google Perf Mon

Adds tick driven PMU/AMU counter reader. This monitor is serviced by the arch_timer and updates the performance counters with on a fixed cadence. Test: Flash + Perfetto on Burncycles + gb5 Bug: 325274590 Bug: 262894231 Change-Id: Ic5ee631cfd93e74e49f519c33db95436dbc29338 Signed-off-by: Will Song <jinpengsong@google.com>
author: Will Song <jinpengsong@google.com> 2024-01-30 14:43:10 -0800
committer: Will Song <jinpengsong@google.com> 2024-03-04 19:09:06 +0000
commit: 004df3b16589abb6c9faf61f4626244b2004338a (patch)
tree: 0b464de96e8932da7182d19fafda534af5b5cf9a
parent: fa879c4c56d10069e37d84e0b1aa264cd952a8a0 (diff)
download: gs-004df3b16589abb6c9faf61f4626244b2004338a.tar.gz
3 files changed, 1260 insertions, 0 deletions
diff --git a/drivers/performance/gs_perf_mon/gs_perf_mon.c b/drivers/performance/gs_perf_mon/gs_perf_mon.c
new file mode 100644
index 000000000..fe4ed846e
--- /dev/null
+++ b/drivers/performance/gs_perf_mon/gs_perf_mon.c
@@ -0,0 +1,950 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Google LLC.
+ *
+ * Module for Performance Monitoring.
+ */
+#define pr_fmt(fmt) "gs_perf_mon: " fmt
+
+#include <linux/cpuidle.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+#include <linux/cpu_pm.h>
+#include <linux/cpu.h>
+#include <linux/of_fdt.h>
+#include <linux/perf_event.h>
+#include <linux/of_device.h>
+#include <linux/mutex.h>
+#include <trace/hooks/cpuidle.h>
+#include <linux/spinlock.h>
+#include <trace/events/power.h>
+#include <uapi/linux/sched/types.h>
+
+#include <performance/gs_perf_mon/gs_perf_mon.h>
+#include "gs_perf_mon_priv.h"
+
+static struct gs_perf_mon_config perf_mon_config;
+static struct gs_perf_mon_state perf_mon_metadata;
+
+int gs_perf_mon_get_data(unsigned int cpu, struct gs_cpu_perf_data *data_dest)
+{
+	int perf_idx;
+	int ret = 0;
+	struct cpu_perf_info *cpu_data;
+	unsigned long flags;
+
+	/* If this function gets called before we probe. */
+	if (!perf_mon_metadata.perf_monitor_initialized)
+		return -EINVAL;
+
+	cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+
+	spin_lock_irqsave(&cpu_data->cpu_perf_lock, flags);
+
+	/* If monitor not active, return error. */
+	if (!cpu_data->mon_active) {
+		spin_unlock_irqrestore(&cpu_data->cpu_perf_lock, flags);
+		return -ENODATA;
+	}
+
+	/* Inform caller of monitor status. */
+	data_dest->cpu_mon_on = true;
+
+	for (perf_idx = 0; perf_idx < PERF_NUM_COMMON_EVS; perf_idx++) {
+		data_dest->perf_ev_last_delta[perf_idx] =
+			cpu_data->perf_ev_data[perf_idx].last_delta;
+	}
+
+	/* Copy over cpu metadata. */
+	data_dest->time_delta_us = cpu_data->time_delta_us;
+	data_dest->cpu_idle_state = READ_ONCE(cpu_data->idle_state);
+
+	spin_unlock_irqrestore(&cpu_data->cpu_perf_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(gs_perf_mon_get_data);
+
+static int gs_perf_mon_start(void);
+static void gs_perf_mon_stop(void);
+static void disable_perf_events(int cpu);
+
+/**
+ * read_perf_event - Reads PMU or AMU event from current cpu.
+ *
+ * Inputs:
+ * @event:		The perf event to read.
+ * @event_total:	Container for read result.
+ *
+ * Returns:		Non-zero on error.
+ */
+static inline int read_perf_event(struct gs_event_data *event, u64 *event_total)
+{
+	int ret = 0;
+
+	if (event->counter_type == PMU) {
+		/* Read this event from PMU. */
+		ret = perf_event_read_local(event->pevent, event_total, NULL, NULL);
+		if (ret) {
+			*event_total = 0;
+			return ret;
+		}
+	} else {
+		/* Read this event from AMU. */
+		switch (event->element_idx) {
+		case PERF_CYCLE_IDX:
+			*event_total = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
+			break;
+
+		case PERF_INST_IDX:
+			*event_total = read_sysreg_s(SYS_AMEVCNTR0_INST_RET_EL0);
+			break;
+
+		case PERF_STALL_BACKEND_MEM_IDX:
+			*event_total = read_sysreg_s(SYS_AMEVCNTR0_MEM_STALL);
+			break;
+
+		default:
+			*event_total = 0;
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/* TODO: b/323458771. Rename to be global namespace appropriate and remove cpu field. */
+int read_perf_event_local(int cpu, unsigned int event_id, u64 *count)
+{
+	struct gs_event_data *event;
+	struct cpu_perf_info *cpu_data;
+	int mon_active;
+
+	/* Ignoring input cpu parameter. */
+	cpu = raw_smp_processor_id();
+
+	if (!perf_mon_metadata.perf_monitor_initialized)
+		return -EINVAL;
+
+	cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+
+	spin_lock(&cpu_data->cpu_perf_lock);
+	mon_active = cpu_data->mon_active;
+
+	if (!mon_active) {
+		spin_unlock(&cpu_data->cpu_perf_lock);
+		return -ENODATA;
+	}
+
+	event = &cpu_data->perf_ev_data[event_id];
+	read_perf_event(event, count);
+	spin_unlock(&cpu_data->cpu_perf_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(read_perf_event_local);
+
+int gs_perf_mon_add_client(struct gs_perf_mon_client *client)
+{
+	if (!client)
+		return -EINVAL;
+
+	mutex_lock(&perf_mon_metadata.client_list_lock);
+
+	/* Start the perf monitor on registration of first client. */
+	if (perf_mon_metadata.perf_monitor_initialized &&
+	    list_empty(&perf_mon_metadata.client_list))
+		gs_perf_mon_start();
+
+	INIT_LIST_HEAD(&client->node);
+	list_add(&client->node, &perf_mon_metadata.client_list);
+
+	mutex_unlock(&perf_mon_metadata.client_list_lock);
+	return 0;
+}
+EXPORT_SYMBOL(gs_perf_mon_add_client);
+
+void gs_perf_mon_remove_client(struct gs_perf_mon_client *client)
+{
+	if (!client)
+		return;
+	mutex_lock(&perf_mon_metadata.client_list_lock);
+	list_del(&client->node);
+	if (perf_mon_metadata.perf_monitor_initialized &&
+	    list_empty(&perf_mon_metadata.client_list))
+		gs_perf_mon_stop();
+	mutex_unlock(&perf_mon_metadata.client_list_lock);
+}
+EXPORT_SYMBOL(gs_perf_mon_remove_client);
+
+void gs_perf_mon_tick_update_counters(void)
+{
+	unsigned int perf_idx;
+	unsigned int cpu = raw_smp_processor_id();
+	u64 total;
+	struct cpu_perf_info *cpu_data;
+	struct gs_event_data *ev_data;
+	ktime_t now = ktime_get();
+	ktime_t last_update_client_ts;
+	unsigned long time_delta_us;
+
+	/* If the node is not probed yet, do nothing. */
+	if (!perf_mon_metadata.perf_monitor_initialized)
+		return;
+
+	cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+
+	spin_lock(&cpu_data->cpu_perf_lock);
+
+	/* If this CPU is not monitored, do nothing. */
+	if (!cpu_data->mon_active || perf_mon_metadata.is_active == false) {
+		spin_unlock(&cpu_data->cpu_perf_lock);
+		return;
+	}
+
+	time_delta_us = ktime_us_delta(now, cpu_data->last_update_ts);
+	cpu_data->ticks_since_update += 1;
+
+	/* Check if its time to poll. */
+	if (cpu_data->ticks_since_update >= perf_mon_config.param_ticks_per_counter_update ||
+	    time_delta_us > perf_mon_config.param_ticks_per_counter_update * USECS_PER_TICK) {
+		/* Loop over all AMU/PMU counters and read them. */
+		for (perf_idx = 0; perf_idx < PERF_NUM_COMMON_EVS; perf_idx++) {
+			ev_data = &cpu_data->perf_ev_data[perf_idx];
+			if (read_perf_event(ev_data, &total)) {
+				pr_err("Perf event read failed on cpu=%u for event_idx=%u", cpu,
+				       perf_idx);
+				continue;
+			}
+			ev_data->prev_count = ev_data->curr_count;
+			ev_data->curr_count = total;
+			ev_data->last_delta = ev_data->curr_count - ev_data->prev_count;
+		}
+		cpu_data->time_delta_us = ktime_us_delta(now, cpu_data->last_update_ts);
+		cpu_data->last_update_ts = now;
+		cpu_data->ticks_since_update = 0;
+	}
+
+	spin_unlock(&cpu_data->cpu_perf_lock);
+
+	/* Check if we need to wakeup backup client handling work. */
+	last_update_client_ts = READ_ONCE(perf_mon_metadata.last_client_update_ts);
+	time_delta_us = ktime_us_delta(now, last_update_client_ts);
+	if (time_delta_us > perf_mon_config.client_update_backup_us)
+		wake_up_process(perf_mon_metadata.perf_mon_task);
+}
+EXPORT_SYMBOL(gs_perf_mon_tick_update_counters);
+
+/**
+ * delete_event - Deallocates an event.
+ *
+ * Input:
+ * @event:	The performance event to deallocate.
+*/
+static void delete_event(struct gs_event_data *event)
+{
+	if (event->pevent) {
+		perf_event_release_kernel(event->pevent);
+		event->pevent = NULL;
+	}
+}
+
+/**
+ * init_event - Initialize a single perf event.
+ *
+ * Input:
+ * @event:	The event identifier to allocate.
+ * @cpu:	The cpu to allocate this event on.
+ *
+ * Returns:	Non-zero on error.
+*/
+static int init_event(struct gs_event_data *event, unsigned int cpu)
+{
+	struct perf_event *pevent;
+	unsigned int event_id = event->raw_event_id;
+	struct perf_event_attr attr;
+
+	attr.type = PERF_TYPE_RAW;
+	attr.size = sizeof(struct perf_event_attr);
+	attr.pinned = 1;
+	attr.exclude_idle = 0;
+	attr.config = event_id;
+
+
+	/* The following allocation steps are only needed for PMU events. */
+	if (event->counter_type == PMU) {
+		/* Allocate the event from kernel. */
+		pevent = perf_event_create_kernel_counter(&attr, cpu, NULL, NULL, NULL);
+		if (IS_ERR(pevent))
+			return PTR_ERR(pevent);
+
+		event->pevent = pevent;
+
+		/* Enable the event. */
+		perf_event_enable(pevent);
+	}
+	return 0;
+}
+
+/**
+ * enable_perf_events - Enables all pmu events on a cpu.
+ *
+ * Input:
+ * @cpu:	Which CPU to allocate events for?
+ *
+ * Returns:	Non-zero on error.
+ *
+ * Synchronization: Must have perf_allocation_lock held.
+*/
+static int enable_perf_events(unsigned int cpu)
+{
+	struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+	struct gs_event_data *ev_data;
+	unsigned int perf_idx;
+	int ret = 0;
+	unsigned long flags;
+
+	/* Loop on all events and initialize them.*/
+	for (perf_idx = 0; perf_idx < PERF_NUM_COMMON_EVS; perf_idx++) {
+		ev_data = &cpu_data->perf_ev_data[perf_idx];
+		ret = init_event(ev_data, cpu);
+		if (WARN_ON(ret))
+			goto err_init;
+	}
+
+	spin_lock_irqsave(&cpu_data->cpu_perf_lock, flags);
+	cpu_data->mon_active = true;
+	spin_unlock_irqrestore(&cpu_data->cpu_perf_lock, flags);
+	return 0;
+
+err_init:
+	/* On error of enabling any event, deallocate all events. */
+	disable_perf_events(cpu);
+	return ret;
+}
+
+/**
+ * disable_perf_events - Disables all pmu events on a cpu.
+ *
+ * Input:
+ * @cpu:	Which CPU to disable events for.
+ *
+ * Synchronization: Caller must hold perf_allocation_lock.
+*/
+static void disable_perf_events(int cpu)
+{
+	struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+	struct gs_event_data *ev_data;
+	unsigned int perf_idx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cpu_data->cpu_perf_lock, flags);
+	cpu_data->mon_active = false;
+	spin_unlock_irqrestore(&cpu_data->cpu_perf_lock, flags);
+	for (perf_idx = 0; perf_idx < PERF_NUM_COMMON_EVS; perf_idx++) {
+		ev_data = &cpu_data->perf_ev_data[perf_idx];
+		delete_event(ev_data);
+	}
+}
+
+/**
+ * vendor_update_event_cpu_idle_enter - idle hook.
+ *
+ * Vendor hook for entering CPU idle.
+ *
+ * Input:
+ * @data:	Unused.
+ * @state:	The idle state this cpu transitioned to.
+ * @dev:	Unused.
+ *
+ * Output:	Sets idle_state to:
+ *		0 = PERF_CPU_IDLE_C1, 1 = PERF_CPU_IDLE_C2
+*/
+static void vendor_update_event_cpu_idle_enter(void *data, int *state, struct cpuidle_device *dev)
+{
+	unsigned int cpu = raw_smp_processor_id();
+	struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+	int idle_state = *state;
+
+	if (idle_state == 0)
+		WRITE_ONCE(cpu_data->idle_state, PERF_CPU_IDLE_C1);
+	else if (idle_state == 1)
+		WRITE_ONCE(cpu_data->idle_state, PERF_CPU_IDLE_C2);
+}
+
+/**
+ * vendor_update_event_cpu_idle_exit - idle exit hook.
+ *
+ * Sets idle_state to PERF_CPU_ACTIVE to indicate busy.
+*/
+static void vendor_update_event_cpu_idle_exit(void *data, int state, struct cpuidle_device *dev)
+{
+	unsigned int cpu = raw_smp_processor_id();
+	struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+	WRITE_ONCE(cpu_data->idle_state, PERF_CPU_ACTIVE);
+}
+
+/**
+ * gs_perf_mon_update_clients - Main function for updating clients.
+ *
+ * This is the function loops over client_list and services
+ * their callbacks on a fixed interval.
+ *
+*/
+void gs_perf_mon_update_clients(void)
+{
+	unsigned int cpu;
+	struct gs_perf_mon_client *curr_client;
+	ktime_t last_update_client_ts;
+	unsigned long delta_us;
+	ktime_t now = ktime_get();
+	int ret = 0;
+
+	if (!perf_mon_metadata.perf_monitor_initialized)
+		return;
+
+	/* Update the current cpu's performance counters. */
+	if (mutex_trylock(&perf_mon_metadata.client_list_lock)) {
+		/* Update last client service update ts and metadata. */
+		last_update_client_ts = READ_ONCE(perf_mon_metadata.last_client_update_ts);
+		delta_us = ktime_us_delta(now, last_update_client_ts);
+
+		/* For spurious or stale wakeups. */
+		if (!perf_mon_metadata.is_active ||
+		    delta_us < perf_mon_config.client_update_interval_us) {
+			mutex_unlock(&perf_mon_metadata.client_list_lock);
+			return;
+		}
+
+		WRITE_ONCE(perf_mon_metadata.last_client_update_ts, now);
+
+		/* Copy over all the performance information for all cpus. */
+		for_each_possible_cpu (cpu) {
+			ret = gs_perf_mon_get_data(cpu, &perf_mon_metadata.client_shared_data[cpu]);
+			if (ret)
+				perf_mon_metadata.client_shared_data[cpu].cpu_mon_on = false;
+		}
+
+		/* Update all clients supplying a callback pointer to monitor data. */
+		list_for_each_entry (curr_client, &perf_mon_metadata.client_list, node) {
+			if (curr_client->client_callback)
+				curr_client->client_callback(perf_mon_metadata.client_shared_data,
+							     curr_client->private_data);
+		}
+		mutex_unlock(&perf_mon_metadata.client_list_lock);
+	}
+}
+EXPORT_SYMBOL(gs_perf_mon_update_clients);
+
+/**
+ * gs_perf_mon_cpuhp_up - re-enables perf monitoring on cpu-up.
+ *
+ * Input:
+ * @cpu:	Whichever CPU just came back online.
+*/
+static int gs_perf_mon_cpuhp_up(unsigned int cpu)
+{
+	int ret = 0;
+	int mon_active;
+	unsigned long flags;
+	struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+
+	mutex_lock(&cpu_data->perf_allocation_lock);
+	spin_lock_irqsave(&cpu_data->cpu_perf_lock, flags);
+	mon_active = cpu_data->mon_active;
+	spin_unlock_irqrestore(&cpu_data->cpu_perf_lock, flags);
+
+	/* Do nothing if already on. */
+	if (mon_active) {
+		mutex_unlock(&cpu_data->perf_allocation_lock);
+		return 0;
+	}
+
+	/* Enable the events. */
+	ret = enable_perf_events(cpu);
+	mutex_unlock(&cpu_data->perf_allocation_lock);
+
+	/*
+	 * If we were unable to restart the performance events, we
+	 * should stop the perf monitor entirely.
+	 */
+	if (WARN_ON(ret))
+		gs_perf_mon_stop();
+	return ret;
+}
+
+/**
+ * gs_perf_mon_cpuhp_down - disables perf monitoring on cpu-down.
+ *
+ * Input:
+ * @cpu:	Whichever CPU just went down.
+*/
+static int gs_perf_mon_cpuhp_down(unsigned int cpu)
+{
+	struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+	mutex_lock(&cpu_data->perf_allocation_lock);
+	disable_perf_events(cpu);
+	mutex_unlock(&cpu_data->perf_allocation_lock);
+
+	return 0;
+}
+
+/* Initializes CPU hotplugs*/
+static int gs_init_perf_mon_cpuhp(void)
+{
+	int ret = 0;
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "gs_perf_mon", gs_perf_mon_cpuhp_up,
+					gs_perf_mon_cpuhp_down);
+	if (ret < 0)
+		pr_err("init cpuhp fail:%d\n", ret);
+
+	return ret;
+}
+
+/**
+ * gs_perf_mon_start - Start and perf monitoring.
+ *
+ * This function will allocate PMU units and queue
+ * monitoring work.
+*/
+static int gs_perf_mon_start(void)
+{
+	unsigned int cpu;
+	struct cpu_perf_info *cpu_data;
+	int ret = 0;
+
+	mutex_lock(&perf_mon_metadata.active_state_lock);
+	if (perf_mon_metadata.is_active)
+		goto unlock_out;
+
+	/* Allocate and enable perf events. */
+	for_each_possible_cpu (cpu) {
+		cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+		mutex_lock(&cpu_data->perf_allocation_lock);
+		ret = enable_perf_events(cpu);
+		mutex_unlock(&cpu_data->perf_allocation_lock);
+		if (ret)
+			goto unlock_out;
+	}
+
+	perf_mon_metadata.is_active = true;
+unlock_out:
+	mutex_unlock(&perf_mon_metadata.active_state_lock);
+	return ret;
+}
+
+/**
+ * gs_perf_mon_stop - Stop perf monitoring.
+ *
+ * This function will deallocate PMU units and cancel
+ * monitor work.
+*/
+static void gs_perf_mon_stop(void)
+{
+	unsigned int cpu;
+	struct cpu_perf_info *cpu_data;
+	mutex_lock(&perf_mon_metadata.active_state_lock);
+
+	/* If the monitor is already disabled. */
+	if (!perf_mon_metadata.is_active)
+		goto unlock_out;
+
+	perf_mon_metadata.is_active = false;
+	for_each_possible_cpu (cpu) {
+		/* Deallocate all the perf events. */
+		cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+		mutex_lock(&cpu_data->perf_allocation_lock);
+		disable_perf_events(cpu);
+		mutex_unlock(&cpu_data->perf_allocation_lock);
+	}
+unlock_out:
+	mutex_unlock(&perf_mon_metadata.active_state_lock);
+}
+
+/* Helper function for parsing a single perf event. */
+static int parse_perf_event(struct device_node *counter_type, struct gs_event_data *cpu_event,
+			    enum gs_counter_type unit_id, enum gs_perf_event_idx ev_idx, char *prop_name)
+{
+	int ret = 0;
+	unsigned int event_id;
+	ret = of_property_read_u32(counter_type, prop_name, &event_id);
+	if (ret) {
+		return -EINVAL;
+	} else {
+		cpu_event->counter_type = unit_id;
+		cpu_event->raw_event_id = event_id;
+		cpu_event->element_idx = ev_idx;
+	}
+	return ret;
+}
+
+/**
+ * parse_perf_counters - Populates the values for the performance unit.
+ *
+ * Inputs:
+ * @dev:		The device for error messages.
+ * @counter_type:	The perf node.
+ * @cpu_events:		Container for parsed data.
+ * @unit_id:		Processing AMU or PMU?
+ *
+ * Returns:		Non-zero if any perf event not allocateable.
+*/
+static int parse_perf_counters(struct device *dev, struct device_node *counter_type,
+			       struct gs_event_data *cpu_events, enum gs_counter_type unit_id)
+{
+	int ret = 0;
+
+	ret = parse_perf_event(counter_type, &cpu_events[PERF_L2D_CACHE_REFILL_IDX], unit_id,
+			       PERF_L2D_CACHE_REFILL_IDX, "l2-cachemiss-ev");
+	if (ret)
+		dev_dbg(dev, "l2-cachemiss-ev event not specified. Skipping.\n");
+
+	ret = parse_perf_event(counter_type, &cpu_events[PERF_STALL_BACKEND_MEM_IDX], unit_id,
+			       PERF_STALL_BACKEND_MEM_IDX, "stall-backend-mem-ev");
+	if (ret)
+		dev_dbg(dev, "stall-backend-mem-event not specified. Skipping.\n");
+
+	ret = parse_perf_event(counter_type, &cpu_events[PERF_INST_IDX], unit_id, PERF_INST_IDX,
+			       "inst-ev");
+	if (ret)
+		dev_dbg(dev, "inst-ev not specified. Skipping.\n");
+
+	ret = parse_perf_event(counter_type, &cpu_events[PERF_CYCLE_IDX], unit_id, PERF_CYCLE_IDX,
+			       "cyc-ev");
+	if (ret)
+		dev_dbg(dev, "cyc-ev not specified. Skipping.\n");
+
+	ret = parse_perf_event(counter_type, &cpu_events[PERF_L3_CACHE_MISS_IDX], unit_id,
+			       PERF_L3_CACHE_MISS_IDX, "l3-cachemiss-ev");
+	if (ret)
+		dev_dbg(dev, "l3-cachemiss-ev not specified. Skipping.\n");
+
+	return 0;
+}
+
+/**
+ * initialize_cpu_data_info
+ *
+ * Inputs:
+ * @dev:	Device for error reporting and node reading.
+ * @cpu_node:	The individual CPU node to populate.
+ * @cpu_data:	The container for the CPU data.
+ *
+ * Returns:	Non-zero on error.
+*/
+static int initialize_cpu_data_info(struct device *dev, struct device_node *cpu_node,
+				    struct cpu_perf_info *cpu_data)
+{
+	struct device_node *pmu_node;
+	struct device_node *amu_node;
+	struct gs_event_data *cpu_event_data = cpu_data->perf_ev_data;
+	unsigned int event_idx;
+	int ret = 0;
+
+	spin_lock_init(&cpu_data->cpu_perf_lock);
+	mutex_init(&cpu_data->perf_allocation_lock);
+
+	/* Default events to uninitialized. */
+	for (event_idx = 0; event_idx < PERF_NUM_COMMON_EVS; event_idx++)
+		cpu_event_data[event_idx].raw_event_id = UINT_MAX;
+
+	/* Find and populate the pmu data. */
+	pmu_node = of_get_child_by_name(cpu_node, "pmu_events");
+	ret = parse_perf_counters(dev, pmu_node, cpu_event_data, PMU);
+	if (ret) {
+		dev_err(dev, "Couldn't parse pmu_node, skipping performance monitoring.\n");
+		return ret;
+	}
+
+	/* Find and populate the amu data. */
+	amu_node = of_get_child_by_name(cpu_node, "amu_events");
+	ret = parse_perf_counters(dev, amu_node, cpu_event_data, AMU);
+	if (ret) {
+		dev_err(dev, "Couldn't parse amu_node, skipping performance monitoring.\n");
+		return ret;
+	}
+
+	/* Check that every event is supported from at least one of PMU or AMU. */
+	for (event_idx = 0; event_idx < PERF_NUM_COMMON_EVS; event_idx++) {
+		if (cpu_event_data[event_idx].raw_event_id == UINT_MAX) {
+			dev_err(dev, "Event at index %u is unsupported. Stopping probe.\n",
+				event_idx);
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * gs_perf_mon_parse_dt - Main parse function for the perf monitor.
+ */
+static int gs_perf_mon_parse_dt(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct cpu_perf_info *cpu_data;
+
+	/* The node we currently have. */
+	struct device_node *cpus_data_np;
+
+	/* Containers for children nodes. */
+	struct device_node *cpu_node;
+	unsigned int cpu_idx;
+	unsigned int num_children;
+	int ret = 0;
+
+	/* Populate metadata. */
+	if (ret) {
+		dev_err(dev, "num_cpu invalid, skipping performance monitoring.\n");
+		return -EINVAL;
+	}
+
+	/* Set monitor's performance counter update interval in us. */
+	ret = of_property_read_u32(dev->of_node, "param_ticks_per_counter_update",
+				   &perf_mon_config.param_ticks_per_counter_update);
+	if (ret) {
+		dev_err(dev, "param_ticks_per_counter_update unspecified, using default value.\n");
+		ret = 0;
+		perf_mon_config.param_ticks_per_counter_update = DEFAULT_TICKS_PER_COUNTER_UPDATE;
+	}
+
+	/* Set monitor's tick client update interval in us. */
+	ret = of_property_read_u32(dev->of_node, "client_update_backup_us",
+				   &perf_mon_config.client_update_backup_us);
+	if (ret) {
+		dev_err(dev, "client_update_backup_us unspecified, using default value.\n");
+		ret = 0;
+		perf_mon_config.client_update_backup_us =
+			DEFAULT_TICKS_PER_COUNTER_UPDATE * USECS_PER_TICK;
+	}
+
+	/* Set monitor's opportunistic client update interval in us. */
+	ret = of_property_read_u32(dev->of_node, "client_update_interval_us",
+				   &perf_mon_config.client_update_interval_us);
+	if (ret) {
+		dev_err(dev, "client_update_interval_us unspecified, using default value.\n");
+		ret = 0;
+		perf_mon_config.client_update_interval_us =
+			DEFAULT_TICKS_PER_COUNTER_UPDATE * USECS_PER_TICK;
+	}
+
+	/* Allocate memory for cpu state representations. */
+	perf_mon_metadata.cpu_data_arr =
+		devm_kzalloc(dev, sizeof(struct cpu_perf_info) * num_possible_cpus(), GFP_KERNEL);
+	if (!perf_mon_metadata.cpu_data_arr) {
+		dev_err(dev, "Insufficient memory for cpu_data_arr. Aborting\n");
+		return -ENOMEM;
+	}
+
+	/* Populate the cpu state representations. */
+	cpu_idx = 0;
+	cpu_node = NULL;
+	cpus_data_np = of_get_child_by_name(dev->of_node, "gs_perf_cpu");
+	if (!cpus_data_np) {
+		dev_err(dev, "gs_perf_cpu invalid skipping performance monitoring.\n");
+		ret = -EINVAL;
+		goto err_probe;
+	}
+
+	num_children = of_get_child_count(cpus_data_np);
+
+	/* Loop over all children nodes we account for. */
+	while ((cpu_node = of_get_next_child(cpus_data_np, cpu_node)) != NULL) {
+		/* Find the CPU index. */
+		ret = of_property_read_u32(cpu_node, "cpu_idx", &cpu_idx);
+		if (ret || cpu_idx > num_possible_cpus() - 1) {
+			dev_err(dev, "cpu_idx invalid, skipping performance monitoring.\n");
+			ret = -EINVAL;
+			goto err_probe;
+		}
+
+		/* Retrieve probe target. */
+		cpu_data = &perf_mon_metadata.cpu_data_arr[cpu_idx];
+		cpu_data->cpu_id = cpu_idx;
+
+		/* Initialize cpu data. */
+		ret = initialize_cpu_data_info(dev, cpu_node, cpu_data);
+		if (ret)
+			goto err_probe;
+	}
+
+	return 0;
+
+err_probe:
+	kfree(perf_mon_metadata.cpu_data_arr);
+	return ret;
+}
+
+/* Thread for servicing the latency governors.*/
+static int perf_mon_task(void *data)
+{
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		/*
+		 * Put this thread to sleep and wait
+		 * for a wakeup from timer or scheduler.
+		 *
+		 * TODO: Try wait_on bits (b/323458771)
+		 */
+		schedule();
+		set_current_state(TASK_RUNNING);
+		gs_perf_mon_update_clients();
+	}
+	return 0;
+}
+
+/* Driver initialization code. */
+int gs_perf_mon_driver_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	int ret = 0;
+
+	ret = gs_perf_mon_parse_dt(pdev);
+	if (ret)
+		return ret;
+
+	/* Allocate and schedule the perf_mon task. */
+	perf_mon_metadata.perf_mon_task =
+		kthread_create(perf_mon_task, NULL, "perf_mon_update_client_task");
+	if (IS_ERR(perf_mon_metadata.perf_mon_task)) {
+		pr_err("%s: failed kthread_create for perf_mon \n", __func__);
+		ret = PTR_ERR(perf_mon_metadata.perf_mon_task);
+		return ret;
+	}
+	sched_set_fifo(perf_mon_metadata.perf_mon_task);
+
+	/* Allocate container for client's shared data. */
+	perf_mon_metadata.client_shared_data = devm_kzalloc(
+		dev, sizeof(struct gs_cpu_perf_data) * num_possible_cpus(), GFP_KERNEL);
+	if (!perf_mon_metadata.client_shared_data) {
+		ret = -ENOMEM;
+		goto err_client_data;
+	}
+
+	/* Register hooks. */
+	ret = register_trace_android_vh_cpu_idle_enter(vendor_update_event_cpu_idle_enter, NULL);
+	if (ret) {
+		dev_err(dev, "Register idle enter vendor hook fail %d\n", ret);
+		goto err_vh_idle_enter_register;
+	}
+	ret = register_trace_android_vh_cpu_idle_exit(vendor_update_event_cpu_idle_exit, NULL);
+	if (ret) {
+		dev_err(dev, "Register idle exit vendor hook fail %d\n", ret);
+		goto err_vh_idle_exit_register;
+	}
+
+	/* Register cpu hotplugs. */
+	ret = gs_init_perf_mon_cpuhp();
+	if (ret < 0) {
+		dev_err(dev, "gs_init_perf_mon_cpuhp errored with number %d\n", ret);
+		goto err_cpuhp_init;
+	}
+
+	/* Check for if clients registered before we probed. */
+	mutex_lock(&perf_mon_metadata.client_list_lock);
+	if (!list_empty(&perf_mon_metadata.client_list))
+		gs_perf_mon_start();
+	perf_mon_metadata.perf_monitor_initialized = true;
+	mutex_unlock(&perf_mon_metadata.client_list_lock);
+
+	return 0;
+
+/* If any of the above steps failed, we need to free resources and unregister hooks. */
+err_cpuhp_init:
+	unregister_trace_android_vh_cpu_idle_exit(vendor_update_event_cpu_idle_exit, NULL);
+err_vh_idle_exit_register:
+	unregister_trace_android_vh_cpu_idle_enter(vendor_update_event_cpu_idle_enter, NULL);
+err_vh_idle_enter_register:
+err_client_data:
+	kthread_stop(perf_mon_metadata.perf_mon_task);
+	return ret;
+}
+
+static const struct of_device_id gs_perf_mon_root_match[] = { {
+	.compatible = "google,gs_perf_mon",
+} };
+
+static struct platform_driver gs_perf_mon_platform_driver = {
+	.probe = gs_perf_mon_driver_probe,
+	.driver = {
+		.name = "gs_perf_mon",
+		.owner = THIS_MODULE,
+		.of_match_table = gs_perf_mon_root_match,
+		.suppress_bind_attrs = true,
+	},
+};
+
+/* Driver initialization step for lists and locks. */
+static int __init gs_perf_mon_init(void)
+{
+	int ret = 0;
+	mutex_init(&perf_mon_metadata.client_list_lock);
+	mutex_init(&perf_mon_metadata.active_state_lock);
+	INIT_LIST_HEAD(&perf_mon_metadata.client_list);
+
+	ret = platform_driver_register(&gs_perf_mon_platform_driver);
+	if (ret)
+		pr_err("Error when registering driver!\n");
+
+	return ret;
+}
+
+/* Driver exit step to stop monitor if module exits. */
+static void __exit gs_perf_mon_exit(void)
+{
+	gs_perf_mon_stop();
+}
+
+/* A module parameter for frequency of counter updates. */
+static int gs_perf_mon_param_set_ticks(const char *val, const struct kernel_param *kp)
+{
+	unsigned int ticks = 0;
+
+	if (kstrtoint(val, 10, &ticks)) {
+		pr_err("%s: gs_perf_mon parse error", __func__);
+		return -EINVAL;
+	}
+
+	if (ticks < 1 || ticks > 32) {
+		pr_err("%s: gs_perf_mon invalid number of ticks", __func__);
+		return -EINVAL;
+	}
+
+	perf_mon_config.param_ticks_per_counter_update = ticks;
+	perf_mon_config.client_update_interval_us = ticks * USECS_PER_TICK;
+	perf_mon_config.client_update_backup_us = ticks * USECS_PER_TICK + USECS_PER_TICK / 2;
+
+	return 0;
+}
+
+static int gs_perf_mon_param_get_ticks(char *buf, const struct kernel_param *kp)
+{
+	return sysfs_emit_at(buf, 0, "%u\n", perf_mon_config.param_ticks_per_counter_update);
+}
+
+static const struct kernel_param_ops param_ops = {
+	.set = gs_perf_mon_param_set_ticks,
+	.get = gs_perf_mon_param_get_ticks,
+};
+
+module_param_cb(gs_perf_mon_ticks, &param_ops, NULL, 0644);
+
+module_init(gs_perf_mon_init);
+module_exit(gs_perf_mon_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Google Performance Monitor");
+MODULE_AUTHOR("Will Song <jinpengsong@google.com>");
+\ No newline at end of file
diff --git a/drivers/performance/gs_perf_mon/gs_perf_mon_priv.h b/drivers/performance/gs_perf_mon/gs_perf_mon_priv.h
new file mode 100644
index 000000000..f4eb6efb2
--- /dev/null
+++ b/drivers/performance/gs_perf_mon/gs_perf_mon_priv.h
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Google, Inc.
+ *
+ * Private Header for Google Pixel Performance Monitor.
+ */
+
+#ifndef _GS_PERF_MON_PRIVATE_H_
+#define _GS_PERF_MON_PRIVATE_H_
+
+/* Num ticks between perf counter updates. */
+#define DEFAULT_TICKS_PER_COUNTER_UPDATE 2
+
+/* How many microseconds per arch_timer tick. */
+#define USECS_PER_TICK (1000000 / CONFIG_HZ)
+
+/**
+ * struct cpu_perf_info - Internal container for per-cpu counter data.
+ * @cpu_id:			The identifier for this cpu.
+ * @perf_allocation_lock:	Syncs allocation and deallocation of perf events.
+ * @idle_state:			The idle state of the CPU.
+ * @cpu_perf_lock:		Syncs access to perf_ev_data, last_update_ts,
+ * 				ticks_since_update, and mon_active.
+ *
+ * @mon_active:			Is the monitor servicing this CPU?
+ * @time_delta_us:		Delta between current perf count and last perf count.
+ * @last_update_ts:		Time since last perf update.
+ * @ticks_since_update:		Number of ticks since last perf update.
+ * @perf_ev_data:		Internal per-cpu perf event containers.
+ */
+struct cpu_perf_info {
+	int cpu_id;
+	struct mutex perf_allocation_lock;
+	enum gs_perf_cpu_idle_state idle_state;
+
+	spinlock_t cpu_perf_lock; /* This lock protects the below. */
+	bool mon_active;
+	unsigned long time_delta_us;
+	ktime_t last_update_ts;
+	unsigned int ticks_since_update;
+	struct gs_event_data perf_ev_data[PERF_NUM_COMMON_EVS];
+};
+
+/**
+ * struct gs_perf_mon_state - Container for Monitor Metadata.
+ *
+ * This struct contains internal gs_perf_mon meta data
+ * It should be not be exposed to files outside of gs_perf_mon.
+ *
+ * @is_active:			Is the monitor enabled?
+ * @perf_monitor_initialized:	Has the monitor initialized?
+ * @active_state_lock:		Lock for tuning on/off the Monitor.
+ * @last_client_update_ts:	Last time the clients were updated.
+ * @client_list:		List of clients to service, num clients, and lock.
+ * @client_list_lock:		Spin-lock for the client_list.
+ * @client_shared_data:		Performance data to supply to clients.
+ * @perf_mon_task:		Kernel thread servicing the clients.
+ * @cpu_data_arr:		Array of per-cpu performance data.
+ */
+struct gs_perf_mon_state {
+	bool is_active;
+	bool perf_monitor_initialized;
+	struct mutex active_state_lock;
+	ktime_t last_client_update_ts;
+	struct list_head client_list;
+	struct mutex client_list_lock;
+	struct gs_cpu_perf_data *client_shared_data;
+	struct task_struct *perf_mon_task;
+	struct cpu_perf_info *cpu_data_arr;
+};
+
+/**
+ * struct gs_perf_mon_configs - Container for Monitor Configuration Data.
+ *
+ * This struct contains configuration data for the monitor.
+ *
+ * @client_update_backup_us:		Backup client interval from the tick.
+ * @client_update_interval_us:		Opportunistic update interval for clients.
+ * @param_ticks_per_counter_update:	Module parameter for how many ticks per monitor
+ * 					data update.
+ */
+struct gs_perf_mon_config {
+	unsigned int client_update_backup_us;
+	unsigned int client_update_interval_us;
+	unsigned int param_ticks_per_counter_update;
+};
+
+#endif // _GS_PERF_MON_PRIVATE_H_
+\ No newline at end of file
diff --git a/include/performance/gs_perf_mon/gs_perf_mon.h b/include/performance/gs_perf_mon/gs_perf_mon.h
new file mode 100644
index 000000000..bd91fbfb9
--- /dev/null
+++ b/include/performance/gs_perf_mon/gs_perf_mon.h
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Google, Inc.
+ *
+ * Performance Monitor for Google Pixel.
+ */
+
+#ifndef _GS_PERF_MON_H_
+#define _GS_PERF_MON_H_
+
+/**
+ * gs_perf_event_idx - A type for performance events.
+ *
+ * We expect a particular event to have
+ * the same index across all ids and data arrays.
+*/
+enum gs_perf_event_idx {
+	PERF_L2D_CACHE_REFILL_IDX,
+	PERF_STALL_BACKEND_MEM_IDX,
+	PERF_L3_CACHE_MISS_IDX,
+	PERF_INST_IDX,
+	PERF_CYCLE_IDX,
+	PERF_NUM_COMMON_EVS
+};
+
+/**
+ * gs_perf_cpu_idle_state - Enum for idle states.
+ *
+ * Used to supply clients cpu idle information.
+*/
+enum gs_perf_cpu_idle_state {
+	PERF_CPU_ACTIVE,
+	PERF_CPU_IDLE_C1,
+	PERF_CPU_IDLE_C2
+};
+
+/**
+ * gs_counter_type - AMU or PMU
+ *
+ * Different CPUs may choose between AMU or PMU
+ * for the same event. So, for each event, we store
+ * the source monitoring unit.
+*/
+enum gs_counter_type {
+	PMU,	/* Performance Monitoring Unit. */
+	AMU	/* Activity Monitoring Unit. */
+};
+
+/**
+ * struct gs_event_data - The representation of a perf event.
+ *
+ * Each performance counter on each CPU will have an associated
+ * gs_event_data with it. This struct provides all the information
+ * for the perf event including what the last values are and how
+ * to read the event.
+ *
+ * @pevent:		The underlying event structure.
+ * @element_idx:	The index of this event. Indicator for what this
+ *			event *should* be. Useful for AMU reading.
+ * @counter_type:	Is this event from the PMU or AMU?
+ * @raw_event_id:	The hardware event id associated with this event.
+ * @curr_count:		Total event count.
+ * @prev_count:		The last total event count.
+ * @last_delta:		The difference between curr_count and prev_count.
+ */
+struct gs_event_data {
+	struct perf_event *pevent;
+	enum gs_perf_event_idx element_idx;
+	enum gs_counter_type counter_type;
+	unsigned int raw_event_id;
+	unsigned long prev_count;
+	unsigned long curr_count;
+	unsigned long last_delta;
+};
+
+/**
+ * struct gs_cpu_perf_data - Container for per-cpu counter data.
+ *
+ * This struct provides the interface for clients desiring CPU profiling
+ * data.
+ *
+ * @cpu_mon_on:		Is this cpu being monitored?
+ * @last_update_ts:	The last time the counters were updated.
+ * @time_delta_us:	Duration of last counter update.
+ * @perf_ev_last_delta:	Counts for performance events.
+ * @cpu_idle_state:	Current cpu_idle state.
+ */
+struct gs_cpu_perf_data {
+	bool cpu_mon_on;
+	ktime_t last_update_ts;
+	unsigned long cpu_freq;
+	unsigned long time_delta_us;
+	unsigned long perf_ev_last_delta[PERF_NUM_COMMON_EVS];
+	enum gs_perf_cpu_idle_state cpu_idle_state;
+};
+
+/**
+ * gs_perf_mon_callback_func_t - A type for client callbacks.
+ *
+ * @gs_cpu_perf_data_arr:	We supply a copy of all CPU cores
+ * @private_data:		Private data for the client metadata.
+ */
+typedef void (*gs_perf_mon_callback_func_t)(struct gs_cpu_perf_data* gs_cpu_perf_data_arr,
+						void *private_data);
+
+/**
+ * struct gs_perf_mon_client - Callback data for a client.
+ *
+ * the client representation in monitor. Used to invoke callbacks to service
+ * clients.
+ *
+ * @node:		The list entry for this client.
+ * @name:		Identifier for the client.
+ * @client_callback:	The client's callback function to be called.
+ */
+struct gs_perf_mon_client {
+	struct list_head node;
+	const char *name;
+	void* private_data;
+	gs_perf_mon_callback_func_t client_callback;
+};
+
+#if IS_ENABLED(CONFIG_GS_PERF_MON)
+
+/***
+ * gs_perf_mon_get_data - Primary function for retrieving cpu performance data.
+ *
+ * Inputs:
+ * @cpu:	Which CPU's data should we supply?
+ * @data_dest:	A location to store the perf data
+ *
+ * Returns:	0 on success and Populates data_dest.
+ * 		-EINVAL if the monitor is not active.
+*/
+int gs_perf_mon_get_data(unsigned int cpu, struct gs_cpu_perf_data *data_dest);
+
+/**
+ * read_perf_event_local - Reading function for total count of a single event.
+ *
+ * REQUIREMENT: IRQs must be disabled.
+ *
+ * This function should be called with IRQs disabled since the calling
+ * task could migrate to another CPU and return an unexpected result.
+ *
+ * Inputs:
+ * @cpu:	Should be ignored, this function must be called on the cpu
+ * 		the caller is running from.
+ * @event_id:	Which event to read?
+ * @count:	Place to store the total event count.
+ * Returns:	0 if read is successful. Stores result in count.
+ * 		-EINVAL if the monitor is not active
+*/
+int read_perf_event_local(int cpu, unsigned int event_id, u64 *count);
+
+/**
+ * gs_perf_mon_add_client - Registers a client for the perf monitor to service.
+ *
+ * Input:
+ * @client:		Data to be added to a serviced list.
+ *
+ * Returns:		0 on success
+ *
+ * Side-Effects:	May call gs_perf_mon_start on first client.
+*/
+int gs_perf_mon_add_client(struct gs_perf_mon_client *client);
+
+/**
+ * gs_perf_mon_remove_client - Unregisters a client for the perf monitor to service.
+ *
+ * Input:
+ * @client:		Client to be removed from the list.
+ *
+ * Side-Effects:	Stops counter collection when the last client gets deregistered.
+*/
+void gs_perf_mon_remove_client(struct gs_perf_mon_client *client);
+
+/**
+ * gs_perf_mon_tick_update_counters - Updates performance counters and triggers
+ *				      latency governor servicing.
+ *
+ * Requirements:		IRQs must be off on local cpu.
+*/
+void gs_perf_mon_tick_update_counters(void);
+
+/**
+ * gs_perf_mon_update_clients - Checks and updates monitor clients.
+ *
+ * Side-Effects:	Could call client servicing function.
+*/
+void gs_perf_mon_update_clients(void);
+
+#else
+
+/* No-op functions if monitor is unused.*/
+static inline int gs_perf_mon_get_data(unsigned int cpu, struct gs_cpu_perf_data *data_dest)
+{
+	return 0;
+}
+static inline int read_perf_event_local(int cpu, unsigned int event_id, u64 *count)
+{
+	return 0;
+}
+static inline int gs_perf_mon_add_client(struct gs_perf_mon_client *client)
+{
+	return 0;
+}
+static inline void gs_perf_mon_remove_client(struct gs_perf_mon_client *client)
+{
+	return;
+}
+static inline void gs_perf_mon_tick_update_counters(void)
+{
+	return;
+}
+static inline void gs_perf_mon_update_clients(void)
+{
+	return;
+}
+
+#endif
+
+#endif // _GS_PERF_MON_H_
+\ No newline at end of file
author	Will Song <jinpengsong@google.com>	2024-01-30 14:43:10 -0800
committer	Will Song <jinpengsong@google.com>	2024-03-04 19:09:06 +0000
commit	004df3b16589abb6c9faf61f4626244b2004338a (patch)
tree	0b464de96e8932da7182d19fafda534af5b5cf9a
parent	fa879c4c56d10069e37d84e0b1aa264cd952a8a0 (diff)
download	gs-004df3b16589abb6c9faf61f4626244b2004338a.tar.gz