summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Song <jinpengsong@google.com>2024-01-30 14:43:10 -0800
committerWill Song <jinpengsong@google.com>2024-03-04 19:09:06 +0000
commit004df3b16589abb6c9faf61f4626244b2004338a (patch)
tree0b464de96e8932da7182d19fafda534af5b5cf9a
parentfa879c4c56d10069e37d84e0b1aa264cd952a8a0 (diff)
downloadgs-004df3b16589abb6c9faf61f4626244b2004338a.tar.gz
drivers: performance: Add Google Perf Mon
Adds tick driven PMU/AMU counter reader. This monitor is serviced by the arch_timer and updates the performance counters with on a fixed cadence. Test: Flash + Perfetto on Burncycles + gb5 Bug: 325274590 Bug: 262894231 Change-Id: Ic5ee631cfd93e74e49f519c33db95436dbc29338 Signed-off-by: Will Song <jinpengsong@google.com>
-rw-r--r--drivers/performance/gs_perf_mon/gs_perf_mon.c950
-rw-r--r--drivers/performance/gs_perf_mon/gs_perf_mon_priv.h88
-rw-r--r--include/performance/gs_perf_mon/gs_perf_mon.h222
3 files changed, 1260 insertions, 0 deletions
diff --git a/drivers/performance/gs_perf_mon/gs_perf_mon.c b/drivers/performance/gs_perf_mon/gs_perf_mon.c
new file mode 100644
index 000000000..fe4ed846e
--- /dev/null
+++ b/drivers/performance/gs_perf_mon/gs_perf_mon.c
@@ -0,0 +1,950 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Google LLC.
+ *
+ * Module for Performance Monitoring.
+ */
+#define pr_fmt(fmt) "gs_perf_mon: " fmt
+
+#include <linux/cpuidle.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+#include <linux/cpu_pm.h>
+#include <linux/cpu.h>
+#include <linux/of_fdt.h>
+#include <linux/perf_event.h>
+#include <linux/of_device.h>
+#include <linux/mutex.h>
+#include <trace/hooks/cpuidle.h>
+#include <linux/spinlock.h>
+#include <trace/events/power.h>
+#include <uapi/linux/sched/types.h>
+
+#include <performance/gs_perf_mon/gs_perf_mon.h>
+#include "gs_perf_mon_priv.h"
+
+static struct gs_perf_mon_config perf_mon_config;
+static struct gs_perf_mon_state perf_mon_metadata;
+
+int gs_perf_mon_get_data(unsigned int cpu, struct gs_cpu_perf_data *data_dest)
+{
+ int perf_idx;
+ int ret = 0;
+ struct cpu_perf_info *cpu_data;
+ unsigned long flags;
+
+ /* If this function gets called before we probe. */
+ if (!perf_mon_metadata.perf_monitor_initialized)
+ return -EINVAL;
+
+ cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+
+ spin_lock_irqsave(&cpu_data->cpu_perf_lock, flags);
+
+ /* If monitor not active, return error. */
+ if (!cpu_data->mon_active) {
+ spin_unlock_irqrestore(&cpu_data->cpu_perf_lock, flags);
+ return -ENODATA;
+ }
+
+ /* Inform caller of monitor status. */
+ data_dest->cpu_mon_on = true;
+
+ for (perf_idx = 0; perf_idx < PERF_NUM_COMMON_EVS; perf_idx++) {
+ data_dest->perf_ev_last_delta[perf_idx] =
+ cpu_data->perf_ev_data[perf_idx].last_delta;
+ }
+
+ /* Copy over cpu metadata. */
+ data_dest->time_delta_us = cpu_data->time_delta_us;
+ data_dest->cpu_idle_state = READ_ONCE(cpu_data->idle_state);
+
+ spin_unlock_irqrestore(&cpu_data->cpu_perf_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(gs_perf_mon_get_data);
+
+static int gs_perf_mon_start(void);
+static void gs_perf_mon_stop(void);
+static void disable_perf_events(int cpu);
+
+/**
+ * read_perf_event - Reads PMU or AMU event from current cpu.
+ *
+ * Inputs:
+ * @event: The perf event to read.
+ * @event_total: Container for read result.
+ *
+ * Returns: Non-zero on error.
+ */
+static inline int read_perf_event(struct gs_event_data *event, u64 *event_total)
+{
+ int ret = 0;
+
+ if (event->counter_type == PMU) {
+ /* Read this event from PMU. */
+ ret = perf_event_read_local(event->pevent, event_total, NULL, NULL);
+ if (ret) {
+ *event_total = 0;
+ return ret;
+ }
+ } else {
+ /* Read this event from AMU. */
+ switch (event->element_idx) {
+ case PERF_CYCLE_IDX:
+ *event_total = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
+ break;
+
+ case PERF_INST_IDX:
+ *event_total = read_sysreg_s(SYS_AMEVCNTR0_INST_RET_EL0);
+ break;
+
+ case PERF_STALL_BACKEND_MEM_IDX:
+ *event_total = read_sysreg_s(SYS_AMEVCNTR0_MEM_STALL);
+ break;
+
+ default:
+ *event_total = 0;
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/* TODO: b/323458771. Rename to be global namespace appropriate and remove cpu field. */
+int read_perf_event_local(int cpu, unsigned int event_id, u64 *count)
+{
+ struct gs_event_data *event;
+ struct cpu_perf_info *cpu_data;
+ int mon_active;
+
+ /* Ignoring input cpu parameter. */
+ cpu = raw_smp_processor_id();
+
+ if (!perf_mon_metadata.perf_monitor_initialized)
+ return -EINVAL;
+
+ cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+
+ spin_lock(&cpu_data->cpu_perf_lock);
+ mon_active = cpu_data->mon_active;
+
+ if (!mon_active) {
+ spin_unlock(&cpu_data->cpu_perf_lock);
+ return -ENODATA;
+ }
+
+ event = &cpu_data->perf_ev_data[event_id];
+ read_perf_event(event, count);
+ spin_unlock(&cpu_data->cpu_perf_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(read_perf_event_local);
+
+int gs_perf_mon_add_client(struct gs_perf_mon_client *client)
+{
+ if (!client)
+ return -EINVAL;
+
+ mutex_lock(&perf_mon_metadata.client_list_lock);
+
+ /* Start the perf monitor on registration of first client. */
+ if (perf_mon_metadata.perf_monitor_initialized &&
+ list_empty(&perf_mon_metadata.client_list))
+ gs_perf_mon_start();
+
+ INIT_LIST_HEAD(&client->node);
+ list_add(&client->node, &perf_mon_metadata.client_list);
+
+ mutex_unlock(&perf_mon_metadata.client_list_lock);
+ return 0;
+}
+EXPORT_SYMBOL(gs_perf_mon_add_client);
+
+void gs_perf_mon_remove_client(struct gs_perf_mon_client *client)
+{
+ if (!client)
+ return;
+ mutex_lock(&perf_mon_metadata.client_list_lock);
+ list_del(&client->node);
+ if (perf_mon_metadata.perf_monitor_initialized &&
+ list_empty(&perf_mon_metadata.client_list))
+ gs_perf_mon_stop();
+ mutex_unlock(&perf_mon_metadata.client_list_lock);
+}
+EXPORT_SYMBOL(gs_perf_mon_remove_client);
+
+void gs_perf_mon_tick_update_counters(void)
+{
+ unsigned int perf_idx;
+ unsigned int cpu = raw_smp_processor_id();
+ u64 total;
+ struct cpu_perf_info *cpu_data;
+ struct gs_event_data *ev_data;
+ ktime_t now = ktime_get();
+ ktime_t last_update_client_ts;
+ unsigned long time_delta_us;
+
+ /* If the node is not probed yet, do nothing. */
+ if (!perf_mon_metadata.perf_monitor_initialized)
+ return;
+
+ cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+
+ spin_lock(&cpu_data->cpu_perf_lock);
+
+ /* If this CPU is not monitored, do nothing. */
+ if (!cpu_data->mon_active || perf_mon_metadata.is_active == false) {
+ spin_unlock(&cpu_data->cpu_perf_lock);
+ return;
+ }
+
+ time_delta_us = ktime_us_delta(now, cpu_data->last_update_ts);
+ cpu_data->ticks_since_update += 1;
+
+ /* Check if its time to poll. */
+ if (cpu_data->ticks_since_update >= perf_mon_config.param_ticks_per_counter_update ||
+ time_delta_us > perf_mon_config.param_ticks_per_counter_update * USECS_PER_TICK) {
+ /* Loop over all AMU/PMU counters and read them. */
+ for (perf_idx = 0; perf_idx < PERF_NUM_COMMON_EVS; perf_idx++) {
+ ev_data = &cpu_data->perf_ev_data[perf_idx];
+ if (read_perf_event(ev_data, &total)) {
+ pr_err("Perf event read failed on cpu=%u for event_idx=%u", cpu,
+ perf_idx);
+ continue;
+ }
+ ev_data->prev_count = ev_data->curr_count;
+ ev_data->curr_count = total;
+ ev_data->last_delta = ev_data->curr_count - ev_data->prev_count;
+ }
+ cpu_data->time_delta_us = ktime_us_delta(now, cpu_data->last_update_ts);
+ cpu_data->last_update_ts = now;
+ cpu_data->ticks_since_update = 0;
+ }
+
+ spin_unlock(&cpu_data->cpu_perf_lock);
+
+ /* Check if we need to wakeup backup client handling work. */
+ last_update_client_ts = READ_ONCE(perf_mon_metadata.last_client_update_ts);
+ time_delta_us = ktime_us_delta(now, last_update_client_ts);
+ if (time_delta_us > perf_mon_config.client_update_backup_us)
+ wake_up_process(perf_mon_metadata.perf_mon_task);
+}
+EXPORT_SYMBOL(gs_perf_mon_tick_update_counters);
+
+/**
+ * delete_event - Deallocates an event.
+ *
+ * Input:
+ * @event: The performance event to deallocate.
+*/
+static void delete_event(struct gs_event_data *event)
+{
+ if (event->pevent) {
+ perf_event_release_kernel(event->pevent);
+ event->pevent = NULL;
+ }
+}
+
+/**
+ * init_event - Initialize a single perf event.
+ *
+ * Input:
+ * @event: The event identifier to allocate.
+ * @cpu: The cpu to allocate this event on.
+ *
+ * Returns: Non-zero on error.
+*/
+static int init_event(struct gs_event_data *event, unsigned int cpu)
+{
+ struct perf_event *pevent;
+ unsigned int event_id = event->raw_event_id;
+ struct perf_event_attr attr;
+
+ attr.type = PERF_TYPE_RAW;
+ attr.size = sizeof(struct perf_event_attr);
+ attr.pinned = 1;
+ attr.exclude_idle = 0;
+ attr.config = event_id;
+
+
+ /* The following allocation steps are only needed for PMU events. */
+ if (event->counter_type == PMU) {
+ /* Allocate the event from kernel. */
+ pevent = perf_event_create_kernel_counter(&attr, cpu, NULL, NULL, NULL);
+ if (IS_ERR(pevent))
+ return PTR_ERR(pevent);
+
+ event->pevent = pevent;
+
+ /* Enable the event. */
+ perf_event_enable(pevent);
+ }
+ return 0;
+}
+
+/**
+ * enable_perf_events - Enables all pmu events on a cpu.
+ *
+ * Input:
+ * @cpu: Which CPU to allocate events for?
+ *
+ * Returns: Non-zero on error.
+ *
+ * Synchronization: Must have perf_allocation_lock held.
+*/
+static int enable_perf_events(unsigned int cpu)
+{
+ struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+ struct gs_event_data *ev_data;
+ unsigned int perf_idx;
+ int ret = 0;
+ unsigned long flags;
+
+ /* Loop on all events and initialize them.*/
+ for (perf_idx = 0; perf_idx < PERF_NUM_COMMON_EVS; perf_idx++) {
+ ev_data = &cpu_data->perf_ev_data[perf_idx];
+ ret = init_event(ev_data, cpu);
+ if (WARN_ON(ret))
+ goto err_init;
+ }
+
+ spin_lock_irqsave(&cpu_data->cpu_perf_lock, flags);
+ cpu_data->mon_active = true;
+ spin_unlock_irqrestore(&cpu_data->cpu_perf_lock, flags);
+ return 0;
+
+err_init:
+ /* On error of enabling any event, deallocate all events. */
+ disable_perf_events(cpu);
+ return ret;
+}
+
+/**
+ * disable_perf_events - Disables all pmu events on a cpu.
+ *
+ * Input:
+ * @cpu: Which CPU to disable events for.
+ *
+ * Synchronization: Caller must hold perf_allocation_lock.
+*/
+static void disable_perf_events(int cpu)
+{
+ struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+ struct gs_event_data *ev_data;
+ unsigned int perf_idx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_data->cpu_perf_lock, flags);
+ cpu_data->mon_active = false;
+ spin_unlock_irqrestore(&cpu_data->cpu_perf_lock, flags);
+ for (perf_idx = 0; perf_idx < PERF_NUM_COMMON_EVS; perf_idx++) {
+ ev_data = &cpu_data->perf_ev_data[perf_idx];
+ delete_event(ev_data);
+ }
+}
+
+/**
+ * vendor_update_event_cpu_idle_enter - idle hook.
+ *
+ * Vendor hook for entering CPU idle.
+ *
+ * Input:
+ * @data: Unused.
+ * @state: The idle state this cpu transitioned to.
+ * @dev: Unused.
+ *
+ * Output: Sets idle_state to:
+ * 0 = PERF_CPU_IDLE_C1, 1 = PERF_CPU_IDLE_C2
+*/
+static void vendor_update_event_cpu_idle_enter(void *data, int *state, struct cpuidle_device *dev)
+{
+ unsigned int cpu = raw_smp_processor_id();
+ struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+ int idle_state = *state;
+
+ if (idle_state == 0)
+ WRITE_ONCE(cpu_data->idle_state, PERF_CPU_IDLE_C1);
+ else if (idle_state == 1)
+ WRITE_ONCE(cpu_data->idle_state, PERF_CPU_IDLE_C2);
+}
+
+/**
+ * vendor_update_event_cpu_idle_exit - idle exit hook.
+ *
+ * Sets idle_state to PERF_CPU_ACTIVE to indicate busy.
+*/
+static void vendor_update_event_cpu_idle_exit(void *data, int state, struct cpuidle_device *dev)
+{
+ unsigned int cpu = raw_smp_processor_id();
+ struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+ WRITE_ONCE(cpu_data->idle_state, PERF_CPU_ACTIVE);
+}
+
+/**
+ * gs_perf_mon_update_clients - Main function for updating clients.
+ *
+ * This is the function loops over client_list and services
+ * their callbacks on a fixed interval.
+ *
+*/
+void gs_perf_mon_update_clients(void)
+{
+ unsigned int cpu;
+ struct gs_perf_mon_client *curr_client;
+ ktime_t last_update_client_ts;
+ unsigned long delta_us;
+ ktime_t now = ktime_get();
+ int ret = 0;
+
+ if (!perf_mon_metadata.perf_monitor_initialized)
+ return;
+
+ /* Update the current cpu's performance counters. */
+ if (mutex_trylock(&perf_mon_metadata.client_list_lock)) {
+ /* Update last client service update ts and metadata. */
+ last_update_client_ts = READ_ONCE(perf_mon_metadata.last_client_update_ts);
+ delta_us = ktime_us_delta(now, last_update_client_ts);
+
+ /* For spurious or stale wakeups. */
+ if (!perf_mon_metadata.is_active ||
+ delta_us < perf_mon_config.client_update_interval_us) {
+ mutex_unlock(&perf_mon_metadata.client_list_lock);
+ return;
+ }
+
+ WRITE_ONCE(perf_mon_metadata.last_client_update_ts, now);
+
+ /* Copy over all the performance information for all cpus. */
+ for_each_possible_cpu (cpu) {
+ ret = gs_perf_mon_get_data(cpu, &perf_mon_metadata.client_shared_data[cpu]);
+ if (ret)
+ perf_mon_metadata.client_shared_data[cpu].cpu_mon_on = false;
+ }
+
+ /* Update all clients supplying a callback pointer to monitor data. */
+ list_for_each_entry (curr_client, &perf_mon_metadata.client_list, node) {
+ if (curr_client->client_callback)
+ curr_client->client_callback(perf_mon_metadata.client_shared_data,
+ curr_client->private_data);
+ }
+ mutex_unlock(&perf_mon_metadata.client_list_lock);
+ }
+}
+EXPORT_SYMBOL(gs_perf_mon_update_clients);
+
+/**
+ * gs_perf_mon_cpuhp_up - re-enables perf monitoring on cpu-up.
+ *
+ * Input:
+ * @cpu: Whichever CPU just came back online.
+*/
+static int gs_perf_mon_cpuhp_up(unsigned int cpu)
+{
+ int ret = 0;
+ int mon_active;
+ unsigned long flags;
+ struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+
+ mutex_lock(&cpu_data->perf_allocation_lock);
+ spin_lock_irqsave(&cpu_data->cpu_perf_lock, flags);
+ mon_active = cpu_data->mon_active;
+ spin_unlock_irqrestore(&cpu_data->cpu_perf_lock, flags);
+
+ /* Do nothing if already on. */
+ if (mon_active) {
+ mutex_unlock(&cpu_data->perf_allocation_lock);
+ return 0;
+ }
+
+ /* Enable the events. */
+ ret = enable_perf_events(cpu);
+ mutex_unlock(&cpu_data->perf_allocation_lock);
+
+ /*
+ * If we were unable to restart the performance events, we
+ * should stop the perf monitor entirely.
+ */
+ if (WARN_ON(ret))
+ gs_perf_mon_stop();
+ return ret;
+}
+
+/**
+ * gs_perf_mon_cpuhp_down - disables perf monitoring on cpu-down.
+ *
+ * Input:
+ * @cpu: Whichever CPU just went down.
+*/
+static int gs_perf_mon_cpuhp_down(unsigned int cpu)
+{
+ struct cpu_perf_info *cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+ mutex_lock(&cpu_data->perf_allocation_lock);
+ disable_perf_events(cpu);
+ mutex_unlock(&cpu_data->perf_allocation_lock);
+
+ return 0;
+}
+
+/* Initializes CPU hotplugs*/
+static int gs_init_perf_mon_cpuhp(void)
+{
+ int ret = 0;
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "gs_perf_mon", gs_perf_mon_cpuhp_up,
+ gs_perf_mon_cpuhp_down);
+ if (ret < 0)
+ pr_err("init cpuhp fail:%d\n", ret);
+
+ return ret;
+}
+
+/**
+ * gs_perf_mon_start - Start and perf monitoring.
+ *
+ * This function will allocate PMU units and queue
+ * monitoring work.
+*/
+static int gs_perf_mon_start(void)
+{
+ unsigned int cpu;
+ struct cpu_perf_info *cpu_data;
+ int ret = 0;
+
+ mutex_lock(&perf_mon_metadata.active_state_lock);
+ if (perf_mon_metadata.is_active)
+ goto unlock_out;
+
+ /* Allocate and enable perf events. */
+ for_each_possible_cpu (cpu) {
+ cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+ mutex_lock(&cpu_data->perf_allocation_lock);
+ ret = enable_perf_events(cpu);
+ mutex_unlock(&cpu_data->perf_allocation_lock);
+ if (ret)
+ goto unlock_out;
+ }
+
+ perf_mon_metadata.is_active = true;
+unlock_out:
+ mutex_unlock(&perf_mon_metadata.active_state_lock);
+ return ret;
+}
+
+/**
+ * gs_perf_mon_stop - Stop perf monitoring.
+ *
+ * This function will deallocate PMU units and cancel
+ * monitor work.
+*/
+static void gs_perf_mon_stop(void)
+{
+ unsigned int cpu;
+ struct cpu_perf_info *cpu_data;
+ mutex_lock(&perf_mon_metadata.active_state_lock);
+
+ /* If the monitor is already disabled. */
+ if (!perf_mon_metadata.is_active)
+ goto unlock_out;
+
+ perf_mon_metadata.is_active = false;
+ for_each_possible_cpu (cpu) {
+ /* Deallocate all the perf events. */
+ cpu_data = &perf_mon_metadata.cpu_data_arr[cpu];
+ mutex_lock(&cpu_data->perf_allocation_lock);
+ disable_perf_events(cpu);
+ mutex_unlock(&cpu_data->perf_allocation_lock);
+ }
+unlock_out:
+ mutex_unlock(&perf_mon_metadata.active_state_lock);
+}
+
+/* Helper function for parsing a single perf event. */
+static int parse_perf_event(struct device_node *counter_type, struct gs_event_data *cpu_event,
+ enum gs_counter_type unit_id, enum gs_perf_event_idx ev_idx, char *prop_name)
+{
+ int ret = 0;
+ unsigned int event_id;
+ ret = of_property_read_u32(counter_type, prop_name, &event_id);
+ if (ret) {
+ return -EINVAL;
+ } else {
+ cpu_event->counter_type = unit_id;
+ cpu_event->raw_event_id = event_id;
+ cpu_event->element_idx = ev_idx;
+ }
+ return ret;
+}
+
+/**
+ * parse_perf_counters - Populates the values for the performance unit.
+ *
+ * Inputs:
+ * @dev: The device for error messages.
+ * @counter_type: The perf node.
+ * @cpu_events: Container for parsed data.
+ * @unit_id: Processing AMU or PMU?
+ *
+ * Returns: Non-zero if any perf event not allocateable.
+*/
+static int parse_perf_counters(struct device *dev, struct device_node *counter_type,
+ struct gs_event_data *cpu_events, enum gs_counter_type unit_id)
+{
+ int ret = 0;
+
+ ret = parse_perf_event(counter_type, &cpu_events[PERF_L2D_CACHE_REFILL_IDX], unit_id,
+ PERF_L2D_CACHE_REFILL_IDX, "l2-cachemiss-ev");
+ if (ret)
+ dev_dbg(dev, "l2-cachemiss-ev event not specified. Skipping.\n");
+
+ ret = parse_perf_event(counter_type, &cpu_events[PERF_STALL_BACKEND_MEM_IDX], unit_id,
+ PERF_STALL_BACKEND_MEM_IDX, "stall-backend-mem-ev");
+ if (ret)
+ dev_dbg(dev, "stall-backend-mem-event not specified. Skipping.\n");
+
+ ret = parse_perf_event(counter_type, &cpu_events[PERF_INST_IDX], unit_id, PERF_INST_IDX,
+ "inst-ev");
+ if (ret)
+ dev_dbg(dev, "inst-ev not specified. Skipping.\n");
+
+ ret = parse_perf_event(counter_type, &cpu_events[PERF_CYCLE_IDX], unit_id, PERF_CYCLE_IDX,
+ "cyc-ev");
+ if (ret)
+ dev_dbg(dev, "cyc-ev not specified. Skipping.\n");
+
+ ret = parse_perf_event(counter_type, &cpu_events[PERF_L3_CACHE_MISS_IDX], unit_id,
+ PERF_L3_CACHE_MISS_IDX, "l3-cachemiss-ev");
+ if (ret)
+ dev_dbg(dev, "l3-cachemiss-ev not specified. Skipping.\n");
+
+ return 0;
+}
+
+/**
+ * initialize_cpu_data_info
+ *
+ * Inputs:
+ * @dev: Device for error reporting and node reading.
+ * @cpu_node: The individual CPU node to populate.
+ * @cpu_data: The container for the CPU data.
+ *
+ * Returns: Non-zero on error.
+*/
+static int initialize_cpu_data_info(struct device *dev, struct device_node *cpu_node,
+ struct cpu_perf_info *cpu_data)
+{
+ struct device_node *pmu_node;
+ struct device_node *amu_node;
+ struct gs_event_data *cpu_event_data = cpu_data->perf_ev_data;
+ unsigned int event_idx;
+ int ret = 0;
+
+ spin_lock_init(&cpu_data->cpu_perf_lock);
+ mutex_init(&cpu_data->perf_allocation_lock);
+
+ /* Default events to uninitialized. */
+ for (event_idx = 0; event_idx < PERF_NUM_COMMON_EVS; event_idx++)
+ cpu_event_data[event_idx].raw_event_id = UINT_MAX;
+
+ /* Find and populate the pmu data. */
+ pmu_node = of_get_child_by_name(cpu_node, "pmu_events");
+ ret = parse_perf_counters(dev, pmu_node, cpu_event_data, PMU);
+ if (ret) {
+ dev_err(dev, "Couldn't parse pmu_node, skipping performance monitoring.\n");
+ return ret;
+ }
+
+ /* Find and populate the amu data. */
+ amu_node = of_get_child_by_name(cpu_node, "amu_events");
+ ret = parse_perf_counters(dev, amu_node, cpu_event_data, AMU);
+ if (ret) {
+ dev_err(dev, "Couldn't parse amu_node, skipping performance monitoring.\n");
+ return ret;
+ }
+
+ /* Check that every event is supported from at least one of PMU or AMU. */
+ for (event_idx = 0; event_idx < PERF_NUM_COMMON_EVS; event_idx++) {
+ if (cpu_event_data[event_idx].raw_event_id == UINT_MAX) {
+ dev_err(dev, "Event at index %u is unsupported. Stopping probe.\n",
+ event_idx);
+ ret = -EINVAL;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * gs_perf_mon_parse_dt - Main parse function for the perf monitor.
+ */
+static int gs_perf_mon_parse_dt(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct cpu_perf_info *cpu_data;
+
+ /* The node we currently have. */
+ struct device_node *cpus_data_np;
+
+ /* Containers for children nodes. */
+ struct device_node *cpu_node;
+ unsigned int cpu_idx;
+ unsigned int num_children;
+ int ret = 0;
+
+ /* Populate metadata. */
+ if (ret) {
+ dev_err(dev, "num_cpu invalid, skipping performance monitoring.\n");
+ return -EINVAL;
+ }
+
+ /* Set monitor's performance counter update interval in us. */
+ ret = of_property_read_u32(dev->of_node, "param_ticks_per_counter_update",
+ &perf_mon_config.param_ticks_per_counter_update);
+ if (ret) {
+ dev_err(dev, "param_ticks_per_counter_update unspecified, using default value.\n");
+ ret = 0;
+ perf_mon_config.param_ticks_per_counter_update = DEFAULT_TICKS_PER_COUNTER_UPDATE;
+ }
+
+ /* Set monitor's tick client update interval in us. */
+ ret = of_property_read_u32(dev->of_node, "client_update_backup_us",
+ &perf_mon_config.client_update_backup_us);
+ if (ret) {
+ dev_err(dev, "client_update_backup_us unspecified, using default value.\n");
+ ret = 0;
+ perf_mon_config.client_update_backup_us =
+ DEFAULT_TICKS_PER_COUNTER_UPDATE * USECS_PER_TICK;
+ }
+
+ /* Set monitor's opportunistic client update interval in us. */
+ ret = of_property_read_u32(dev->of_node, "client_update_interval_us",
+ &perf_mon_config.client_update_interval_us);
+ if (ret) {
+ dev_err(dev, "client_update_interval_us unspecified, using default value.\n");
+ ret = 0;
+ perf_mon_config.client_update_interval_us =
+ DEFAULT_TICKS_PER_COUNTER_UPDATE * USECS_PER_TICK;
+ }
+
+ /* Allocate memory for cpu state representations. */
+ perf_mon_metadata.cpu_data_arr =
+ devm_kzalloc(dev, sizeof(struct cpu_perf_info) * num_possible_cpus(), GFP_KERNEL);
+ if (!perf_mon_metadata.cpu_data_arr) {
+ dev_err(dev, "Insufficient memory for cpu_data_arr. Aborting\n");
+ return -ENOMEM;
+ }
+
+ /* Populate the cpu state representations. */
+ cpu_idx = 0;
+ cpu_node = NULL;
+ cpus_data_np = of_get_child_by_name(dev->of_node, "gs_perf_cpu");
+ if (!cpus_data_np) {
+ dev_err(dev, "gs_perf_cpu invalid skipping performance monitoring.\n");
+ ret = -EINVAL;
+ goto err_probe;
+ }
+
+ num_children = of_get_child_count(cpus_data_np);
+
+ /* Loop over all children nodes we account for. */
+ while ((cpu_node = of_get_next_child(cpus_data_np, cpu_node)) != NULL) {
+ /* Find the CPU index. */
+ ret = of_property_read_u32(cpu_node, "cpu_idx", &cpu_idx);
+ if (ret || cpu_idx > num_possible_cpus() - 1) {
+ dev_err(dev, "cpu_idx invalid, skipping performance monitoring.\n");
+ ret = -EINVAL;
+ goto err_probe;
+ }
+
+ /* Retrieve probe target. */
+ cpu_data = &perf_mon_metadata.cpu_data_arr[cpu_idx];
+ cpu_data->cpu_id = cpu_idx;
+
+ /* Initialize cpu data. */
+ ret = initialize_cpu_data_info(dev, cpu_node, cpu_data);
+ if (ret)
+ goto err_probe;
+ }
+
+ return 0;
+
+err_probe:
+ kfree(perf_mon_metadata.cpu_data_arr);
+ return ret;
+}
+
+/* Thread for servicing the latency governors.*/
+static int perf_mon_task(void *data)
+{
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ /*
+ * Put this thread to sleep and wait
+ * for a wakeup from timer or scheduler.
+ *
+ * TODO: Try wait_on bits (b/323458771)
+ */
+ schedule();
+ set_current_state(TASK_RUNNING);
+ gs_perf_mon_update_clients();
+ }
+ return 0;
+}
+
+/* Driver initialization code. */
+int gs_perf_mon_driver_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ int ret = 0;
+
+ ret = gs_perf_mon_parse_dt(pdev);
+ if (ret)
+ return ret;
+
+ /* Allocate and schedule the perf_mon task. */
+ perf_mon_metadata.perf_mon_task =
+ kthread_create(perf_mon_task, NULL, "perf_mon_update_client_task");
+ if (IS_ERR(perf_mon_metadata.perf_mon_task)) {
+ pr_err("%s: failed kthread_create for perf_mon \n", __func__);
+ ret = PTR_ERR(perf_mon_metadata.perf_mon_task);
+ return ret;
+ }
+ sched_set_fifo(perf_mon_metadata.perf_mon_task);
+
+ /* Allocate container for client's shared data. */
+ perf_mon_metadata.client_shared_data = devm_kzalloc(
+ dev, sizeof(struct gs_cpu_perf_data) * num_possible_cpus(), GFP_KERNEL);
+ if (!perf_mon_metadata.client_shared_data) {
+ ret = -ENOMEM;
+ goto err_client_data;
+ }
+
+ /* Register hooks. */
+ ret = register_trace_android_vh_cpu_idle_enter(vendor_update_event_cpu_idle_enter, NULL);
+ if (ret) {
+ dev_err(dev, "Register idle enter vendor hook fail %d\n", ret);
+ goto err_vh_idle_enter_register;
+ }
+ ret = register_trace_android_vh_cpu_idle_exit(vendor_update_event_cpu_idle_exit, NULL);
+ if (ret) {
+ dev_err(dev, "Register idle exit vendor hook fail %d\n", ret);
+ goto err_vh_idle_exit_register;
+ }
+
+ /* Register cpu hotplugs. */
+ ret = gs_init_perf_mon_cpuhp();
+ if (ret < 0) {
+ dev_err(dev, "gs_init_perf_mon_cpuhp errored with number %d\n", ret);
+ goto err_cpuhp_init;
+ }
+
+ /* Check for if clients registered before we probed. */
+ mutex_lock(&perf_mon_metadata.client_list_lock);
+ if (!list_empty(&perf_mon_metadata.client_list))
+ gs_perf_mon_start();
+ perf_mon_metadata.perf_monitor_initialized = true;
+ mutex_unlock(&perf_mon_metadata.client_list_lock);
+
+ return 0;
+
+/* If any of the above steps failed, we need to free resources and unregister hooks. */
+err_cpuhp_init:
+ unregister_trace_android_vh_cpu_idle_exit(vendor_update_event_cpu_idle_exit, NULL);
+err_vh_idle_exit_register:
+ unregister_trace_android_vh_cpu_idle_enter(vendor_update_event_cpu_idle_enter, NULL);
+err_vh_idle_enter_register:
+err_client_data:
+ kthread_stop(perf_mon_metadata.perf_mon_task);
+ return ret;
+}
+
+static const struct of_device_id gs_perf_mon_root_match[] = { {
+ .compatible = "google,gs_perf_mon",
+} };
+
+static struct platform_driver gs_perf_mon_platform_driver = {
+ .probe = gs_perf_mon_driver_probe,
+ .driver = {
+ .name = "gs_perf_mon",
+ .owner = THIS_MODULE,
+ .of_match_table = gs_perf_mon_root_match,
+ .suppress_bind_attrs = true,
+ },
+};
+
+/* Driver initialization step for lists and locks. */
+static int __init gs_perf_mon_init(void)
+{
+ int ret = 0;
+ mutex_init(&perf_mon_metadata.client_list_lock);
+ mutex_init(&perf_mon_metadata.active_state_lock);
+ INIT_LIST_HEAD(&perf_mon_metadata.client_list);
+
+ ret = platform_driver_register(&gs_perf_mon_platform_driver);
+ if (ret)
+ pr_err("Error when registering driver!\n");
+
+ return ret;
+}
+
+/* Driver exit step to stop monitor if module exits. */
+static void __exit gs_perf_mon_exit(void)
+{
+ gs_perf_mon_stop();
+}
+
+/* A module parameter for frequency of counter updates. */
+static int gs_perf_mon_param_set_ticks(const char *val, const struct kernel_param *kp)
+{
+ unsigned int ticks = 0;
+
+ if (kstrtoint(val, 10, &ticks)) {
+ pr_err("%s: gs_perf_mon parse error", __func__);
+ return -EINVAL;
+ }
+
+ if (ticks < 1 || ticks > 32) {
+ pr_err("%s: gs_perf_mon invalid number of ticks", __func__);
+ return -EINVAL;
+ }
+
+ perf_mon_config.param_ticks_per_counter_update = ticks;
+ perf_mon_config.client_update_interval_us = ticks * USECS_PER_TICK;
+ perf_mon_config.client_update_backup_us = ticks * USECS_PER_TICK + USECS_PER_TICK / 2;
+
+ return 0;
+}
+
+static int gs_perf_mon_param_get_ticks(char *buf, const struct kernel_param *kp)
+{
+ return sysfs_emit_at(buf, 0, "%u\n", perf_mon_config.param_ticks_per_counter_update);
+}
+
+static const struct kernel_param_ops param_ops = {
+ .set = gs_perf_mon_param_set_ticks,
+ .get = gs_perf_mon_param_get_ticks,
+};
+
+module_param_cb(gs_perf_mon_ticks, &param_ops, NULL, 0644);
+
+module_init(gs_perf_mon_init);
+module_exit(gs_perf_mon_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Google Performance Monitor");
+MODULE_AUTHOR("Will Song <jinpengsong@google.com>"); \ No newline at end of file
diff --git a/drivers/performance/gs_perf_mon/gs_perf_mon_priv.h b/drivers/performance/gs_perf_mon/gs_perf_mon_priv.h
new file mode 100644
index 000000000..f4eb6efb2
--- /dev/null
+++ b/drivers/performance/gs_perf_mon/gs_perf_mon_priv.h
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Google, Inc.
+ *
+ * Private Header for Google Pixel Performance Monitor.
+ */
+
+#ifndef _GS_PERF_MON_PRIVATE_H_
+#define _GS_PERF_MON_PRIVATE_H_
+
+/* Num ticks between perf counter updates. */
+#define DEFAULT_TICKS_PER_COUNTER_UPDATE 2
+
+/* How many microseconds per arch_timer tick. */
+#define USECS_PER_TICK (1000000 / CONFIG_HZ)
+
+/**
+ * struct cpu_perf_info - Internal container for per-cpu counter data.
+ * @cpu_id: The identifier for this cpu.
+ * @perf_allocation_lock: Syncs allocation and deallocation of perf events.
+ * @idle_state: The idle state of the CPU.
+ * @cpu_perf_lock: Syncs access to perf_ev_data, last_update_ts,
+ * ticks_since_update, and mon_active.
+ *
+ * @mon_active: Is the monitor servicing this CPU?
+ * @time_delta_us: Delta between current perf count and last perf count.
+ * @last_update_ts: Time since last perf update.
+ * @ticks_since_update: Number of ticks since last perf update.
+ * @perf_ev_data: Internal per-cpu perf event containers.
+ */
+struct cpu_perf_info {
+ int cpu_id;
+ struct mutex perf_allocation_lock;
+ enum gs_perf_cpu_idle_state idle_state;
+
+ spinlock_t cpu_perf_lock; /* This lock protects the below. */
+ bool mon_active;
+ unsigned long time_delta_us;
+ ktime_t last_update_ts;
+ unsigned int ticks_since_update;
+ struct gs_event_data perf_ev_data[PERF_NUM_COMMON_EVS];
+};
+
+/**
+ * struct gs_perf_mon_state - Container for Monitor Metadata.
+ *
+ * This struct contains internal gs_perf_mon meta data
+ * It should be not be exposed to files outside of gs_perf_mon.
+ *
+ * @is_active: Is the monitor enabled?
+ * @perf_monitor_initialized: Has the monitor initialized?
+ * @active_state_lock: Lock for tuning on/off the Monitor.
+ * @last_client_update_ts: Last time the clients were updated.
+ * @client_list: List of clients to service, num clients, and lock.
+ * @client_list_lock: Spin-lock for the client_list.
+ * @client_shared_data: Performance data to supply to clients.
+ * @perf_mon_task: Kernel thread servicing the clients.
+ * @cpu_data_arr: Array of per-cpu performance data.
+ */
+struct gs_perf_mon_state {
+ bool is_active;
+ bool perf_monitor_initialized;
+ struct mutex active_state_lock;
+ ktime_t last_client_update_ts;
+ struct list_head client_list;
+ struct mutex client_list_lock;
+ struct gs_cpu_perf_data *client_shared_data;
+ struct task_struct *perf_mon_task;
+ struct cpu_perf_info *cpu_data_arr;
+};
+
+/**
+ * struct gs_perf_mon_configs - Container for Monitor Configuration Data.
+ *
+ * This struct contains configuration data for the monitor.
+ *
+ * @client_update_backup_us: Backup client interval from the tick.
+ * @client_update_interval_us: Opportunistic update interval for clients.
+ * @param_ticks_per_counter_update: Module parameter for how many ticks per monitor
+ * data update.
+ */
+struct gs_perf_mon_config {
+ unsigned int client_update_backup_us;
+ unsigned int client_update_interval_us;
+ unsigned int param_ticks_per_counter_update;
+};
+
+#endif // _GS_PERF_MON_PRIVATE_H_ \ No newline at end of file
diff --git a/include/performance/gs_perf_mon/gs_perf_mon.h b/include/performance/gs_perf_mon/gs_perf_mon.h
new file mode 100644
index 000000000..bd91fbfb9
--- /dev/null
+++ b/include/performance/gs_perf_mon/gs_perf_mon.h
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Google, Inc.
+ *
+ * Performance Monitor for Google Pixel.
+ */
+
+#ifndef _GS_PERF_MON_H_
+#define _GS_PERF_MON_H_
+
+/**
+ * gs_perf_event_idx - A type for performance events.
+ *
+ * We expect a particular event to have
+ * the same index across all ids and data arrays.
+*/
+enum gs_perf_event_idx {
+ PERF_L2D_CACHE_REFILL_IDX,
+ PERF_STALL_BACKEND_MEM_IDX,
+ PERF_L3_CACHE_MISS_IDX,
+ PERF_INST_IDX,
+ PERF_CYCLE_IDX,
+ PERF_NUM_COMMON_EVS
+};
+
+/**
+ * gs_perf_cpu_idle_state - Enum for idle states.
+ *
+ * Used to supply clients cpu idle information.
+*/
+enum gs_perf_cpu_idle_state {
+ PERF_CPU_ACTIVE,
+ PERF_CPU_IDLE_C1,
+ PERF_CPU_IDLE_C2
+};
+
+/**
+ * gs_counter_type - AMU or PMU
+ *
+ * Different CPUs may choose between AMU or PMU
+ * for the same event. So, for each event, we store
+ * the source monitoring unit.
+*/
+enum gs_counter_type {
+ PMU, /* Performance Monitoring Unit. */
+ AMU /* Activity Monitoring Unit. */
+};
+
+/**
+ * struct gs_event_data - The representation of a perf event.
+ *
+ * Each performance counter on each CPU will have an associated
+ * gs_event_data with it. This struct provides all the information
+ * for the perf event including what the last values are and how
+ * to read the event.
+ *
+ * @pevent: The underlying event structure.
+ * @element_idx: The index of this event. Indicator for what this
+ * event *should* be. Useful for AMU reading.
+ * @counter_type: Is this event from the PMU or AMU?
+ * @raw_event_id: The hardware event id associated with this event.
+ * @curr_count: Total event count.
+ * @prev_count: The last total event count.
+ * @last_delta: The difference between curr_count and prev_count.
+ */
+struct gs_event_data {
+ struct perf_event *pevent;
+ enum gs_perf_event_idx element_idx;
+ enum gs_counter_type counter_type;
+ unsigned int raw_event_id;
+ unsigned long prev_count;
+ unsigned long curr_count;
+ unsigned long last_delta;
+};
+
+/**
+ * struct gs_cpu_perf_data - Container for per-cpu counter data.
+ *
+ * This struct provides the interface for clients desiring CPU profiling
+ * data.
+ *
+ * @cpu_mon_on: Is this cpu being monitored?
+ * @last_update_ts: The last time the counters were updated.
+ * @time_delta_us: Duration of last counter update.
+ * @perf_ev_last_delta: Counts for performance events.
+ * @cpu_idle_state: Current cpu_idle state.
+ */
+struct gs_cpu_perf_data {
+ bool cpu_mon_on;
+ ktime_t last_update_ts;
+ unsigned long cpu_freq;
+ unsigned long time_delta_us;
+ unsigned long perf_ev_last_delta[PERF_NUM_COMMON_EVS];
+ enum gs_perf_cpu_idle_state cpu_idle_state;
+};
+
+/**
+ * gs_perf_mon_callback_func_t - A type for client callbacks.
+ *
+ * @gs_cpu_perf_data_arr: We supply a copy of all CPU cores
+ * @private_data: Private data for the client metadata.
+ */
+typedef void (*gs_perf_mon_callback_func_t)(struct gs_cpu_perf_data* gs_cpu_perf_data_arr,
+ void *private_data);
+
+/**
+ * struct gs_perf_mon_client - Callback data for a client.
+ *
+ * the client representation in monitor. Used to invoke callbacks to service
+ * clients.
+ *
+ * @node: The list entry for this client.
+ * @name: Identifier for the client.
+ * @client_callback: The client's callback function to be called.
+ */
+struct gs_perf_mon_client {
+ struct list_head node;
+ const char *name;
+ void* private_data;
+ gs_perf_mon_callback_func_t client_callback;
+};
+
+#if IS_ENABLED(CONFIG_GS_PERF_MON)
+
+/***
+ * gs_perf_mon_get_data - Primary function for retrieving cpu performance data.
+ *
+ * Inputs:
+ * @cpu: Which CPU's data should we supply?
+ * @data_dest: A location to store the perf data
+ *
+ * Returns: 0 on success and Populates data_dest.
+ * -EINVAL if the monitor is not active.
+*/
+int gs_perf_mon_get_data(unsigned int cpu, struct gs_cpu_perf_data *data_dest);
+
+/**
+ * read_perf_event_local - Reading function for total count of a single event.
+ *
+ * REQUIREMENT: IRQs must be disabled.
+ *
+ * This function should be called with IRQs disabled since the calling
+ * task could migrate to another CPU and return an unexpected result.
+ *
+ * Inputs:
+ * @cpu: Should be ignored, this function must be called on the cpu
+ * the caller is running from.
+ * @event_id: Which event to read?
+ * @count: Place to store the total event count.
+ * Returns: 0 if read is successful. Stores result in count.
+ * -EINVAL if the monitor is not active
+*/
+int read_perf_event_local(int cpu, unsigned int event_id, u64 *count);
+
+/**
+ * gs_perf_mon_add_client - Registers a client for the perf monitor to service.
+ *
+ * Input:
+ * @client: Data to be added to a serviced list.
+ *
+ * Returns: 0 on success
+ *
+ * Side-Effects: May call gs_perf_mon_start on first client.
+*/
+int gs_perf_mon_add_client(struct gs_perf_mon_client *client);
+
+/**
+ * gs_perf_mon_remove_client - Unregisters a client for the perf monitor to service.
+ *
+ * Input:
+ * @client: Client to be removed from the list.
+ *
+ * Side-Effects: Stops counter collection when the last client gets deregistered.
+*/
+void gs_perf_mon_remove_client(struct gs_perf_mon_client *client);
+
+/**
+ * gs_perf_mon_tick_update_counters - Updates performance counters and triggers
+ * latency governor servicing.
+ *
+ * Requirements: IRQs must be off on local cpu.
+*/
+void gs_perf_mon_tick_update_counters(void);
+
+/**
+ * gs_perf_mon_update_clients - Checks and updates monitor clients.
+ *
+ * Side-Effects: Could call client servicing function.
+*/
+void gs_perf_mon_update_clients(void);
+
+#else
+
+/* No-op functions if monitor is unused.*/
+static inline int gs_perf_mon_get_data(unsigned int cpu, struct gs_cpu_perf_data *data_dest)
+{
+ return 0;
+}
+static inline int read_perf_event_local(int cpu, unsigned int event_id, u64 *count)
+{
+ return 0;
+}
+static inline int gs_perf_mon_add_client(struct gs_perf_mon_client *client)
+{
+ return 0;
+}
+static inline void gs_perf_mon_remove_client(struct gs_perf_mon_client *client)
+{
+ return;
+}
+static inline void gs_perf_mon_tick_update_counters(void)
+{
+ return;
+}
+static inline void gs_perf_mon_update_clients(void)
+{
+ return;
+}
+
+#endif
+
+#endif // _GS_PERF_MON_H_ \ No newline at end of file