diff options
author | Siddharth Kapoor <ksiddharth@google.com> | 2022-01-07 19:09:01 +0800 |
---|---|---|
committer | Siddharth Kapoor <ksiddharth@google.com> | 2022-01-07 19:09:01 +0800 |
commit | 0207d6c3b7a2002f15c60d08617e956faf5ba90c (patch) | |
tree | eae0afe608a70b25f64e959db2b782fb33f89160 /mali_kbase/mali_kbase_kinstr_prfcnt.c | |
parent | 0c596dc70431fa2c70021fa1685e3efc969a852d (diff) | |
download | gpu-0207d6c3b7a2002f15c60d08617e956faf5ba90c.tar.gz |
Mali Valhall Android DDK r35p0 KMD
Provenance: 3e260085ac (collaborate/EAC/v_r35p0)
VX504X08X-BU-00000-r35p0-01eac0 - Valhall Android DDK
VX504X08X-BU-60000-r35p0-01eac0 - Valhall Android Document Bundle
VX504X08X-DC-11001-r35p0-01eac0 - Valhall Android DDK Software Errata
VX504X08X-SW-99006-r35p0-01eac0 - Valhall Android Renderscript AOSP parts
Signed-off-by: Siddharth Kapoor <ksiddharth@google.com>
Change-Id: Id9ef73da49680e2935a827c40d54169545f7162e
Diffstat (limited to 'mali_kbase/mali_kbase_kinstr_prfcnt.c')
-rw-r--r-- | mali_kbase/mali_kbase_kinstr_prfcnt.c | 1254 |
1 files changed, 1095 insertions, 159 deletions
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c index ce996ca..27ff3bb 100644 --- a/mali_kbase/mali_kbase_kinstr_prfcnt.c +++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c @@ -19,10 +19,10 @@ * */ +#include "mali_kbase.h" #include "mali_kbase_kinstr_prfcnt.h" #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_types.h" -#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h> #include "mali_kbase_hwcnt_gpu.h" #include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #include "mali_malisw.h" @@ -44,14 +44,12 @@ */ #define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) -/* The minimum allowed interval between dumps, in microseconds - * (equivalent to 10KHz) - */ -#define DUMP_INTERVAL_MIN_US (DUMP_INTERVAL_MIN_NS / 1000) - /* The maximum allowed buffers per client */ #define MAX_BUFFER_COUNT 32 +/* The module printing prefix */ +#define KINSTR_PRFCNT_PREFIX "mali_kbase_kinstr_prfcnt: " + /** * struct kbase_kinstr_prfcnt_context - IOCTL interface for userspace hardware * counters. @@ -80,11 +78,11 @@ struct kbase_kinstr_prfcnt_context { /** * struct kbase_kinstr_prfcnt_sample - Buffer and descriptor for sample data. - * @sample_meta: Pointer to samle metadata. + * @sample_meta: Pointer to sample metadata. * @dump_buf: Dump buffer containing sample data. */ struct kbase_kinstr_prfcnt_sample { - u64 *sample_meta; + struct prfcnt_metadata *sample_meta; struct kbase_hwcnt_dump_buffer dump_buf; }; @@ -92,7 +90,8 @@ struct kbase_kinstr_prfcnt_sample { * struct kbase_kinstr_prfcnt_sample_array - Array of sample data. * @page_addr: Address of allocated pages. A single allocation is used * for all Dump Buffers in the array. - * @page_order: The allocation order of the pages. + * @page_order: The allocation order of the pages, the order is on a + * logarithmic scale. * @sample_count: Number of allocated samples. * @samples: Non-NULL pointer to the array of Dump Buffers. */ @@ -107,59 +106,91 @@ struct kbase_kinstr_prfcnt_sample_array { * struct kbase_kinstr_prfcnt_client_config - Client session configuration. * @prfcnt_mode: Sampling mode: either manual or periodic. * @counter_set: Set of performance counter blocks. + * @scope: Scope of performance counters to capture. * @buffer_count: Number of buffers used to store samples. - * @period_us: Sampling period, in microseconds, or 0 if manual mode. + * @period_ns: Sampling period, in nanoseconds, or 0 if manual mode. * @phys_em: Enable map used by the GPU. */ struct kbase_kinstr_prfcnt_client_config { u8 prfcnt_mode; u8 counter_set; + u8 scope; u16 buffer_count; - u64 period_us; + u64 period_ns; struct kbase_hwcnt_physical_enable_map phys_em; }; /** + * struct kbase_kinstr_prfcnt_async - Asynchronous sampling operation to + * carry out for a kinstr_prfcnt_client. + * @dump_work: Worker for performing asynchronous counter dumps. + * @user_data: User data for asynchronous dump in progress. + * @ts_end_ns: End timestamp of most recent async dump. + */ +struct kbase_kinstr_prfcnt_async { + struct work_struct dump_work; + u64 user_data; + u64 ts_end_ns; +}; + +/** * struct kbase_kinstr_prfcnt_client - A kinstr_prfcnt client attached * to a kinstr_prfcnt context. - * @kinstr_ctx: kinstr_prfcnt context client is attached to. - * @hvcli: Hardware counter virtualizer client. - * @node: Node used to attach this client to list in kinstr_prfcnt - * context. - * @next_dump_time_ns: Time in ns when this client's next periodic dump must - * occur. If 0, not a periodic client. - * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic - * client. - * @config: Configuration of the client session. - * @enable_map: Counters enable map. - * @tmp_buf: Temporary buffer to use before handing over dump to - * client. - * @sample_arr: Array of dump buffers allocated by this client. - * @dump_bufs_meta: Metadata of dump buffers. - * @meta_idx: Index of metadata being accessed by userspace. - * @read_idx: Index of buffer read by userspace. - * @write_idx: Index of buffer being written by dump worker. - * @waitq: Client's notification queue. - * @sample_size: Size of the data required for one sample, in bytes. - * @sample_count: Number of samples the client is able to capture. + * @kinstr_ctx: kinstr_prfcnt context client is attached to. + * @hvcli: Hardware counter virtualizer client. + * @node: Node used to attach this client to list in + * kinstr_prfcnt context. + * @cmd_sync_lock: Lock coordinating the reader interface for commands + * that need interacting with the async sample dump + * worker thread. + * @next_dump_time_ns: Time in ns when this client's next periodic dump must + * occur. If 0, not a periodic client. + * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic + * client. + * @sample_flags: Flags for the current active dumping sample, marking + * the conditions/events during the dump duration. + * @active: True if the client has been started. + * @config: Configuration of the client session. + * @enable_map: Counters enable map. + * @tmp_buf: Temporary buffer to use before handing over dump to + * client. + * @sample_arr: Array of dump buffers allocated by this client. + * @read_idx: Index of buffer read by userspace. + * @write_idx: Index of buffer being written by dump worker. + * @waitq: Client's notification queue. + * @sample_size: Size of the data required for one sample, in bytes. + * @sample_count: Number of samples the client is able to capture. + * @sync_sample_count: Number of available spaces for synchronous samples. + * It can differ from sample_count if asynchronous + * sample requests are reserving space in the buffer. + * @user_data: User data associated with the session. + * This is set when the session is started and stopped. + * This value is ignored for control commands that + * provide another value. + * @async: Asynchronous sampling operations to carry out in this + * client's session. */ struct kbase_kinstr_prfcnt_client { struct kbase_kinstr_prfcnt_context *kinstr_ctx; struct kbase_hwcnt_virtualizer_client *hvcli; struct list_head node; + struct mutex cmd_sync_lock; u64 next_dump_time_ns; u32 dump_interval_ns; + u32 sample_flags; + bool active; struct kbase_kinstr_prfcnt_client_config config; struct kbase_hwcnt_enable_map enable_map; struct kbase_hwcnt_dump_buffer tmp_buf; struct kbase_kinstr_prfcnt_sample_array sample_arr; - struct kbase_hwcnt_reader_metadata *dump_bufs_meta; - atomic_t meta_idx; atomic_t read_idx; atomic_t write_idx; wait_queue_head_t waitq; size_t sample_size; size_t sample_count; + atomic_t sync_sample_count; + u64 user_data; + struct kbase_kinstr_prfcnt_async async; }; static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = { @@ -188,21 +219,6 @@ static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = { }; /** - * kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready() - Check if client has ready - * buffers. - * @cli: Non-NULL pointer to kinstr_prfcnt client. - * - * Return: Non-zero if client has at least one dumping buffer filled that was - * not notified to user yet. - */ -static int kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready( - struct kbase_kinstr_prfcnt_client *cli) -{ - WARN_ON(!cli); - return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); -} - -/** * kbasep_kinstr_prfcnt_hwcnt_reader_poll() - hwcnt reader's poll. * @filp: Non-NULL pointer to file structure. * @wait: Non-NULL pointer to poll table. @@ -210,8 +226,15 @@ static int kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready( * Return: POLLIN if data can be read without blocking, 0 if data can not be * read without blocking, else error code. */ -static unsigned int kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, - poll_table *wait) +#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE +static unsigned int +kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, + struct poll_table_struct *wait) +#else +static __poll_t +kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, + struct poll_table_struct *wait) +#endif { struct kbase_kinstr_prfcnt_client *cli; @@ -225,13 +248,776 @@ static unsigned int kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, poll_wait(filp, &cli->waitq, wait); - if (kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready(cli)) + if (atomic_read(&cli->write_idx) != atomic_read(&cli->read_idx)) return POLLIN; return 0; } /** + * kbasep_kinstr_prfcnt_next_dump_time_ns() - Calculate the next periodic + * dump time. + * @cur_ts_ns: Current time in nanoseconds. + * @interval: Interval between dumps in nanoseconds. + * + * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump + * time that occurs after cur_ts_ns. + */ +static u64 kbasep_kinstr_prfcnt_next_dump_time_ns(u64 cur_ts_ns, u32 interval) +{ + /* Non-periodic client */ + if (interval == 0) + return 0; + + /* + * Return the next interval after the current time relative to t=0. + * This means multiple clients with the same period will synchronize, + * regardless of when they were started, allowing the worker to be + * scheduled less frequently. + */ + do_div(cur_ts_ns, interval); + + return (cur_ts_ns + 1) * interval; +} + +/** + * kbasep_kinstr_prfcnt_timestamp_ns() - Get the current time in nanoseconds. + * + * Return: Current time in nanoseconds. + */ +static u64 kbasep_kinstr_prfcnt_timestamp_ns(void) +{ + return ktime_get_raw_ns(); +} + +/** + * kbasep_kinstr_prfcnt_reschedule_worker() - Update next dump times for all + * periodic kinstr_prfcnt clients, + * then reschedule the dump worker + * appropriately. + * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context. + * + * If there are no periodic clients, then the dump worker will not be + * rescheduled. Else, the dump worker will be rescheduled for the next + * periodic client dump. + */ +static void kbasep_kinstr_prfcnt_reschedule_worker( + struct kbase_kinstr_prfcnt_context *kinstr_ctx) +{ + u64 cur_ts_ns; + u64 shortest_period_ns = U64_MAX; + struct kbase_kinstr_prfcnt_client *pos; + + WARN_ON(!kinstr_ctx); + lockdep_assert_held(&kinstr_ctx->lock); + cur_ts_ns = kbasep_kinstr_prfcnt_timestamp_ns(); + + /* + * This loop fulfills 2 separate tasks that don't affect each other: + * + * 1) Determine the shortest period. + * 2) Update the next dump time of clients that have already been + * dumped. It's important not to alter the next dump time of clients + * that haven't been dumped yet. + * + * For the sake of efficiency, the rescheduling decision ignores the time + * of the next dump and just uses the shortest period among all periodic + * clients. It is more efficient to serve multiple dump requests at once, + * rather than trying to reschedule the worker to serve each request + * individually. + */ + list_for_each_entry(pos, &kinstr_ctx->clients, node) { + /* Ignore clients that are not periodic or not active. */ + if (pos->active && pos->dump_interval_ns > 0) { + shortest_period_ns = + MIN(shortest_period_ns, pos->dump_interval_ns); + + /* Next dump should happen exactly one period after the last dump. + * If last dump was overdue and scheduled to happen more than one + * period ago, compensate for that by scheduling next dump in the + * immediate future. + */ + if (pos->next_dump_time_ns < cur_ts_ns) + pos->next_dump_time_ns = + MAX(cur_ts_ns + 1, + pos->next_dump_time_ns + + pos->dump_interval_ns); + } + } + + /* Cancel the timer if it is already pending */ + hrtimer_cancel(&kinstr_ctx->dump_timer); + + /* Start the timer if there are periodic clients and kinstr_prfcnt is not + * suspended. + */ + if ((shortest_period_ns != U64_MAX) && + (kinstr_ctx->suspend_count == 0)) { + u64 next_schedule_time_ns = + kbasep_kinstr_prfcnt_next_dump_time_ns( + cur_ts_ns, shortest_period_ns); + hrtimer_start(&kinstr_ctx->dump_timer, + ns_to_ktime(next_schedule_time_ns - cur_ts_ns), + HRTIMER_MODE_REL); + } +} + +static enum prfcnt_block_type +kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type) +{ + enum prfcnt_block_type block_type; + + switch (type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: + block_type = PRFCNT_BLOCK_TYPE_FE; + break; + + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + block_type = PRFCNT_BLOCK_TYPE_TILER; + break; + + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: + block_type = PRFCNT_BLOCK_TYPE_SHADER_CORE; + break; + + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + block_type = PRFCNT_BLOCK_TYPE_MEMORY; + break; + + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: + default: + block_type = PRFCNT_BLOCK_TYPE_RESERVED; + break; + } + + return block_type; +} + +/** + * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta + * item array. + * @dst: Non-NULL pointer to the sample's dump buffer object. + * @block_meta_base: Non-NULL double pointer to the start of the block meta + * data items. + * @base_addr: Address of allocated pages for array of samples. Used + * to calculate offset of block values. + * @counter_set: The SET which blocks represent. + */ +int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_dump_buffer *dst, + struct prfcnt_metadata **block_meta_base, + u64 base_addr, u8 counter_set) +{ + size_t grp, blk, blk_inst; + struct prfcnt_metadata **ptr_md = block_meta_base; + const struct kbase_hwcnt_metadata *metadata; + + if (!dst || !*block_meta_base) + return -EINVAL; + + metadata = dst->metadata; + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u64 *dst_blk; + + /* Skip unused blocks */ + if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + (*ptr_md)->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_BLOCK; + (*ptr_md)->hdr.item_version = PRFCNT_READER_API_VERSION; + (*ptr_md)->u.block_md.block_type = + kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( + kbase_hwcnt_metadata_block_type(metadata, grp, + blk)); + (*ptr_md)->u.block_md.block_idx = (u8)blk_inst; + (*ptr_md)->u.block_md.set = counter_set; + (*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN; + (*ptr_md)->u.block_md.values_offset = (u32)((u64)(uintptr_t)dst_blk - base_addr); + + /* update the buf meta data block pointer to next item */ + (*ptr_md)++; + } + + return 0; +} + +/** + * kbasep_kinstr_prfcnt_set_sample_metadata() - Set sample metadata for sample + * output. + * @cli: Non-NULL pointer to a kinstr_prfcnt client. + * @dump_buf: Non-NULL pointer to dump buffer where sample is stored. + * @ptr_md: Non-NULL pointer to sample metadata. + */ +static void kbasep_kinstr_prfcnt_set_sample_metadata( + struct kbase_kinstr_prfcnt_client *cli, + struct kbase_hwcnt_dump_buffer *dump_buf, + struct prfcnt_metadata *ptr_md) +{ + u8 clk_cnt, i; + + clk_cnt = cli->kinstr_ctx->metadata->clk_cnt; + + /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item */ + ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_SAMPLE; + ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION; + ptr_md->u.sample_md.seq = atomic_read(&cli->write_idx); + ptr_md->u.sample_md.flags = cli->sample_flags; + + /* Place the PRFCNT_SAMPLE_META_TYPE_CLOCK optionally as the 2nd */ + ptr_md++; + if (clk_cnt > MAX_REPORTED_DOMAINS) + clk_cnt = MAX_REPORTED_DOMAINS; + + /* Handle the prfcnt_clock_metadata meta item */ + ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_CLOCK; + ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION; + ptr_md->u.clock_md.num_domains = clk_cnt; + for (i = 0; i < clk_cnt; i++) + ptr_md->u.clock_md.cycles[i] = dump_buf->clk_cnt_buf[i]; + + /* Dealing with counter blocks */ + ptr_md++; + if (WARN_ON(kbasep_kinstr_prfcnt_set_block_meta_items( + dump_buf, &ptr_md, cli->sample_arr.page_addr, cli->config.counter_set))) + return; + + /* Handle the last sentinel item */ + ptr_md->hdr.item_type = FLEX_LIST_TYPE_NONE; + ptr_md->hdr.item_version = 0; +} + +/** + * kbasep_kinstr_prfcnt_client_output_empty_sample() - Assemble an empty sample + * for output. + * @cli: Non-NULL pointer to a kinstr_prfcnt client. + * @buf_idx: The index to the sample array for saving the sample. + */ +static void kbasep_kinstr_prfcnt_client_output_empty_sample( + struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx) +{ + struct kbase_hwcnt_dump_buffer *dump_buf; + struct prfcnt_metadata *ptr_md; + + if (WARN_ON(buf_idx >= cli->sample_arr.sample_count)) + return; + + dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf; + ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; + + kbase_hwcnt_dump_buffer_zero(dump_buf, &cli->enable_map); + + /* Use end timestamp from most recent async dump */ + ptr_md->u.sample_md.timestamp_start = cli->async.ts_end_ns; + ptr_md->u.sample_md.timestamp_end = cli->async.ts_end_ns; + + kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md); +} + +/** + * kbasep_kinstr_prfcnt_client_output_sample() - Assemble a sample for output. + * @cli: Non-NULL pointer to a kinstr_prfcnt client. + * @buf_idx: The index to the sample array for saving the sample. + * @user_data: User data to return to the user. + * @ts_start_ns: Time stamp for the start point of the sample dump. + * @ts_end_ns: Time stamp for the end point of the sample dump. + */ +static void kbasep_kinstr_prfcnt_client_output_sample( + struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx, + u64 user_data, u64 ts_start_ns, u64 ts_end_ns) +{ + struct kbase_hwcnt_dump_buffer *dump_buf; + struct kbase_hwcnt_dump_buffer *tmp_buf = &cli->tmp_buf; + struct prfcnt_metadata *ptr_md; + + if (WARN_ON(buf_idx >= cli->sample_arr.sample_count)) + return; + + dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf; + ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; + + /* Patch the dump buf headers, to hide the counters that other hwcnt + * clients are using. + */ + kbase_hwcnt_gpu_patch_dump_headers(tmp_buf, &cli->enable_map); + + /* Copy the temp buffer to the userspace visible buffer. The strict + * variant will explicitly zero any non-enabled counters to ensure + * nothing except exactly what the user asked for is made visible. + */ + kbase_hwcnt_dump_buffer_copy_strict(dump_buf, tmp_buf, + &cli->enable_map); + + /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item. + * Set timestamp and user data for real dump. + */ + ptr_md->u.sample_md.timestamp_start = ts_start_ns; + ptr_md->u.sample_md.timestamp_end = ts_end_ns; + ptr_md->u.sample_md.user_data = user_data; + + kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md); +} + +/** + * kbasep_kinstr_prfcnt_client_dump() - Perform a dump for a client. + * @cli: Non-NULL pointer to a kinstr_prfcnt client. + * @event_id: Event type that triggered the dump. + * @user_data: User data to return to the user. + * @async_dump: Whether this is an asynchronous dump or not. + * @empty_sample: Sample block data will be 0 if this is true. + * + * Return: 0 on success, else error code. + */ +static int +kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, + enum base_hwcnt_reader_event event_id, + u64 user_data, bool async_dump, + bool empty_sample) +{ + int ret; + u64 ts_start_ns = 0; + u64 ts_end_ns = 0; + unsigned int write_idx; + unsigned int read_idx; + size_t available_samples_count; + + WARN_ON(!cli); + lockdep_assert_held(&cli->kinstr_ctx->lock); + + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + + /* Check if there is a place to copy HWC block into. Calculate the + * number of available samples count, by taking into account the type + * of dump. + * Asynchronous dumps have the ability to reserve space in the samples + * array for future dumps, unlike synchronous dumps. Because of that, + * the samples count for synchronous dumps is managed by a variable + * called sync_sample_count, that originally is defined as equal to the + * size of the whole array but later decreases every time an + * asynchronous dump request is pending and then re-increased every + * time an asynchronous dump request is completed. + */ + available_samples_count = async_dump ? + cli->sample_arr.sample_count : + atomic_read(&cli->sync_sample_count); + if (write_idx - read_idx == available_samples_count) { + /* For periodic sampling, the current active dump + * will be accumulated in the next sample, when + * a buffer becomes available. + */ + if (event_id == BASE_HWCNT_READER_EVENT_PERIODIC) + cli->sample_flags |= SAMPLE_FLAG_OVERFLOW; + return -EBUSY; + } + + /* For the rest of the function, use the actual sample_count + * that represents the real size of the array. + */ + write_idx %= cli->sample_arr.sample_count; + + if (!empty_sample) { + ret = kbase_hwcnt_virtualizer_client_dump( + cli->hvcli, &ts_start_ns, &ts_end_ns, &cli->tmp_buf); + /* HWC dump error, set the sample with error flag */ + if (ret) + cli->sample_flags |= SAMPLE_FLAG_ERROR; + + /* Make the sample ready and copy it to the userspace mapped buffer */ + kbasep_kinstr_prfcnt_client_output_sample( + cli, write_idx, user_data, ts_start_ns, ts_end_ns); + } else { + if (!async_dump) { + struct prfcnt_metadata *ptr_md; + /* User data will not be updated for empty samples. */ + ptr_md = cli->sample_arr.samples[write_idx].sample_meta; + ptr_md->u.sample_md.user_data = user_data; + } + + /* Make the sample ready and copy it to the userspace mapped buffer */ + kbasep_kinstr_prfcnt_client_output_empty_sample(cli, write_idx); + } + + /* Notify client. Make sure all changes to memory are visible. */ + wmb(); + atomic_inc(&cli->write_idx); + if (async_dump) { + /* Remember the end timestamp of async dump for empty samples */ + if (!empty_sample) + cli->async.ts_end_ns = ts_end_ns; + + atomic_inc(&cli->sync_sample_count); + } + wake_up_interruptible(&cli->waitq); + /* Reset the flags for the next sample dump */ + cli->sample_flags = 0; + + return 0; +} + +static int +kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, + u64 user_data) +{ + int ret; + u64 tm_start, tm_end; + + WARN_ON(!cli); + lockdep_assert_held(&cli->cmd_sync_lock); + + /* If the client is already started, the command is a no-op */ + if (cli->active) + return 0; + + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, + &cli->config.phys_em); + + mutex_lock(&cli->kinstr_ctx->lock); + /* Enable HWC from the configuration of the client creation */ + ret = kbase_hwcnt_virtualizer_client_set_counters( + cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL); + + if (!ret) { + atomic_set(&cli->sync_sample_count, cli->sample_count); + cli->active = true; + cli->user_data = user_data; + cli->sample_flags = 0; + + if (cli->dump_interval_ns) + kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx); + } + + mutex_unlock(&cli->kinstr_ctx->lock); + + return ret; +} + +static int kbasep_kinstr_prfcnt_client_wait_async_done( + struct kbase_kinstr_prfcnt_client *cli) +{ + lockdep_assert_held(&cli->cmd_sync_lock); + + return wait_event_interruptible(cli->waitq, + atomic_read(&cli->sync_sample_count) == + cli->sample_count); +} + +static int +kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, + u64 user_data) +{ + int ret; + u64 tm_start = 0; + u64 tm_end = 0; + struct kbase_hwcnt_physical_enable_map phys_em; + struct kbase_hwcnt_dump_buffer *tmp_buf = NULL; + unsigned int write_idx; + unsigned int read_idx; + + WARN_ON(!cli); + lockdep_assert_held(&cli->cmd_sync_lock); + + /* If the client is not started, the command is invalid */ + if (!cli->active) + return -EINVAL; + + /* Wait until pending async sample operation done */ + ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli); + + if (ret < 0) + return -ERESTARTSYS; + + phys_em.fe_bm = 0; + phys_em.tiler_bm = 0; + phys_em.mmu_l2_bm = 0; + phys_em.shader_bm = 0; + + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em); + + mutex_lock(&cli->kinstr_ctx->lock); + + /* Check whether one has the buffer to hold the last sample */ + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + + /* Check if there is a place to save the last stop produced sample */ + if (write_idx - read_idx < cli->sample_arr.sample_count) + tmp_buf = &cli->tmp_buf; + + ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli, + &cli->enable_map, + &tm_start, &tm_end, + &cli->tmp_buf); + /* If the last stop sample is in error, set the sample flag */ + if (ret) + cli->sample_flags |= SAMPLE_FLAG_ERROR; + + if (tmp_buf) { + write_idx %= cli->sample_arr.sample_count; + /* Handle the last stop sample */ + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, + &cli->config.phys_em); + /* As this is a stop sample, mark it as MANUAL */ + kbasep_kinstr_prfcnt_client_output_sample( + cli, write_idx, user_data, tm_start, tm_end); + /* Notify client. Make sure all changes to memory are visible. */ + wmb(); + atomic_inc(&cli->write_idx); + wake_up_interruptible(&cli->waitq); + } + + cli->active = false; + cli->user_data = user_data; + + if (cli->dump_interval_ns) + kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx); + + mutex_unlock(&cli->kinstr_ctx->lock); + + return ret; +} + +static int +kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli, + u64 user_data) +{ + int ret; + bool empty_sample = false; + + lockdep_assert_held(&cli->cmd_sync_lock); + + /* If the client is not started, or not manual, the command invalid */ + if (!cli->active || cli->dump_interval_ns) + return -EINVAL; + + /* Wait until pending async sample operation done, this is required to + * satisfy the stated sample sequence following their issuing order, + * reflected by the sample start timestamp. + */ + if (atomic_read(&cli->sync_sample_count) != cli->sample_count) { + /* Return empty sample instead of performing real dump. + * As there is an async dump currently in-flight which will + * have the desired information. + */ + empty_sample = true; + ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli); + + if (ret < 0) + return -ERESTARTSYS; + } + + mutex_lock(&cli->kinstr_ctx->lock); + + ret = kbasep_kinstr_prfcnt_client_dump(cli, + BASE_HWCNT_READER_EVENT_MANUAL, + user_data, false, empty_sample); + + mutex_unlock(&cli->kinstr_ctx->lock); + + return ret; +} + +static int +kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli, + u64 user_data) +{ + unsigned int write_idx; + unsigned int read_idx; + unsigned int active_async_dumps; + unsigned int new_async_buf_idx; + int ret; + + lockdep_assert_held(&cli->cmd_sync_lock); + + /* If the client is not started, or not manual, the command invalid */ + if (!cli->active || cli->dump_interval_ns) + return -EINVAL; + + mutex_lock(&cli->kinstr_ctx->lock); + + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + active_async_dumps = + cli->sample_count - atomic_read(&cli->sync_sample_count); + new_async_buf_idx = write_idx + active_async_dumps; + + /* Check if there is a place to copy HWC block into. + * If successful, reserve space in the buffer for the asynchronous + * operation to make sure that it can actually take place. + * Because we reserve space for asynchronous dumps we need to take that + * in consideration here. + */ + ret = (new_async_buf_idx - read_idx == cli->sample_arr.sample_count) ? + -EBUSY : + 0; + + if (ret == -EBUSY) { + mutex_unlock(&cli->kinstr_ctx->lock); + return ret; + } + + if (active_async_dumps > 0) { + struct prfcnt_metadata *ptr_md; + unsigned int buf_idx = + new_async_buf_idx % cli->sample_arr.sample_count; + /* Instead of storing user_data, write it directly to future + * empty sample. + */ + ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; + ptr_md->u.sample_md.user_data = user_data; + + atomic_dec(&cli->sync_sample_count); + } else { + cli->async.user_data = user_data; + atomic_dec(&cli->sync_sample_count); + + kbase_hwcnt_virtualizer_queue_work(cli->kinstr_ctx->hvirt, + &cli->async.dump_work); + } + + mutex_unlock(&cli->kinstr_ctx->lock); + + return ret; +} + +static int +kbasep_kinstr_prfcnt_client_discard(struct kbase_kinstr_prfcnt_client *cli) +{ + WARN_ON(!cli); + lockdep_assert_held(&cli->cmd_sync_lock); + + mutex_lock(&cli->kinstr_ctx->lock); + + /* Discard (Clear) all internally buffered samples */ + atomic_set(&cli->read_idx, atomic_read(&cli->write_idx)); + + mutex_unlock(&cli->kinstr_ctx->lock); + + return 0; +} + +/** + * kbasep_kinstr_prfcnt_cmd() - Execute command for a client session. + * @cli: Non-NULL pointer to kinstr_prfcnt client. + * @control_cmd: Control command to execute. + * + * Return: 0 on success, else error code. + */ +static int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli, + struct prfcnt_control_cmd *control_cmd) +{ + int ret = 0; + + mutex_lock(&cli->cmd_sync_lock); + + switch (control_cmd->cmd) { + case PRFCNT_CONTROL_CMD_START: + ret = kbasep_kinstr_prfcnt_client_start(cli, + control_cmd->user_data); + break; + case PRFCNT_CONTROL_CMD_STOP: + ret = kbasep_kinstr_prfcnt_client_stop(cli, + control_cmd->user_data); + break; + case PRFCNT_CONTROL_CMD_SAMPLE_SYNC: + ret = kbasep_kinstr_prfcnt_client_sync_dump( + cli, control_cmd->user_data); + break; + case PRFCNT_CONTROL_CMD_SAMPLE_ASYNC: + ret = kbasep_kinstr_prfcnt_client_async_dump( + cli, control_cmd->user_data); + break; + case PRFCNT_CONTROL_CMD_DISCARD: + ret = kbasep_kinstr_prfcnt_client_discard(cli); + break; + default: + ret = -EINVAL; + break; + } + + mutex_unlock(&cli->cmd_sync_lock); + + return ret; +} + +static int +kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli, + struct prfcnt_sample_access *sample_access) +{ + unsigned int write_idx; + unsigned int read_idx; + u64 sample_offset_bytes; + struct prfcnt_metadata *sample_meta; + + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + + if (write_idx == read_idx) + return -EINVAL; + + read_idx %= cli->sample_arr.sample_count; + sample_offset_bytes = + (u64)(uintptr_t)cli->sample_arr.samples[read_idx].sample_meta - + (u64)(uintptr_t)cli->sample_arr.page_addr; + sample_meta = + (struct prfcnt_metadata *)cli->sample_arr.samples[read_idx] + .sample_meta; + + /* Verify that a valid sample has been dumped in the read_idx. + * There are situations where this may not be the case, + * for instance if the client is trying to get an asynchronous + * sample which has not been dumped yet. + */ + if (sample_meta->hdr.item_type != PRFCNT_SAMPLE_META_TYPE_SAMPLE) + return -EINVAL; + if (sample_meta->hdr.item_version != PRFCNT_READER_API_VERSION) + return -EINVAL; + + sample_access->sequence = sample_meta->u.sample_md.seq; + sample_access->sample_offset_bytes = sample_offset_bytes; + + /* read_idx is not incremented here, because the interface allows + * only one sample to be "in flight" between kernel space and user space. + */ + + return 0; +} + +static int +kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client *cli, + struct prfcnt_sample_access *sample_access) +{ + unsigned int write_idx; + unsigned int read_idx; + u64 sample_offset_bytes; + + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + + if (write_idx == read_idx) + return -EINVAL; + + if (sample_access->sequence != read_idx) + return -EINVAL; + + read_idx %= cli->sample_arr.sample_count; + sample_offset_bytes = + (u64)(uintptr_t)cli->sample_arr.samples[read_idx].sample_meta - + (u64)(uintptr_t)cli->sample_arr.page_addr; + + if (sample_access->sample_offset_bytes != sample_offset_bytes) + return -EINVAL; + + atomic_inc(&cli->read_idx); + + return 0; +} + +/** * kbasep_kinstr_prfcnt_hwcnt_reader_ioctl() - hwcnt reader's ioctl. * @filp: Non-NULL pointer to file structure. * @cmd: User command. @@ -243,10 +1029,11 @@ static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - long rcode; + long rcode = 0; struct kbase_kinstr_prfcnt_client *cli; + void __user *uarg = (void __user *)arg; - if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER)) + if (!filp) return -EINVAL; cli = filp->private_data; @@ -255,8 +1042,36 @@ static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp, return -EINVAL; switch (_IOC_NR(cmd)) { + case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_CMD): { + struct prfcnt_control_cmd control_cmd; + int err; + + err = copy_from_user(&control_cmd, uarg, sizeof(control_cmd)); + if (err) + return -EFAULT; + rcode = kbasep_kinstr_prfcnt_cmd(cli, &control_cmd); + } break; + case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE): { + struct prfcnt_sample_access sample_access; + int err; + + memset(&sample_access, 0, sizeof(sample_access)); + rcode = kbasep_kinstr_prfcnt_get_sample(cli, &sample_access); + err = copy_to_user(uarg, &sample_access, sizeof(sample_access)); + if (err) + return -EFAULT; + } break; + case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE): { + struct prfcnt_sample_access sample_access; + int err; + + err = copy_from_user(&sample_access, uarg, + sizeof(sample_access)); + if (err) + return -EFAULT; + rcode = kbasep_kinstr_prfcnt_put_sample(cli, &sample_access); + } break; default: - pr_warn("Unknown HWCNT ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); rcode = -EINVAL; break; } @@ -279,7 +1094,6 @@ static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp, if (!filp || !vma) return -EINVAL; - cli = filp->private_data; if (!cli) @@ -334,10 +1148,10 @@ kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) return; kbase_hwcnt_virtualizer_client_destroy(cli->hvcli); - kfree(cli->dump_bufs_meta); kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr); kbase_hwcnt_dump_buffer_free(&cli->tmp_buf); kbase_hwcnt_enable_map_free(&cli->enable_map); + mutex_destroy(&cli->cmd_sync_lock); kfree(cli); } @@ -377,6 +1191,31 @@ static const struct file_operations kinstr_prfcnt_client_fops = { .release = kbasep_kinstr_prfcnt_hwcnt_reader_release, }; +size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata) +{ + size_t grp, blk, blk_inst; + size_t md_count = 0; + + if (!metadata) + return 0; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + /* Skip unused blocks */ + if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) + continue; + + md_count++; + } + + /* add counts for clock_meta and sample meta, respectively */ + md_count += 2; + + /* Reserve one for last sentinel item. */ + md_count++; + + return md_count; +} + static size_t kbasep_kinstr_prfcnt_get_sample_size( const struct kbase_hwcnt_metadata *metadata, struct kbase_hwcnt_dump_buffer *dump_buf) @@ -384,19 +1223,12 @@ static size_t kbasep_kinstr_prfcnt_get_sample_size( size_t dump_buf_bytes; size_t clk_cnt_buf_bytes; size_t sample_meta_bytes; - size_t block_count = 0; - size_t grp, blk, blk_inst; + size_t md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata); if (!metadata) return 0; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - block_count++; - - /* Reserve one for last sentinel item. */ - block_count++; - - sample_meta_bytes = sizeof(struct prfcnt_metadata) * block_count; + sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count; dump_buf_bytes = metadata->dump_buf_bytes; clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; @@ -411,7 +1243,68 @@ static size_t kbasep_kinstr_prfcnt_get_sample_size( */ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) { - /* Do nothing. */ + struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of( + work, struct kbase_kinstr_prfcnt_context, dump_work); + struct kbase_kinstr_prfcnt_client *pos; + u64 cur_time_ns; + + mutex_lock(&kinstr_ctx->lock); + + cur_time_ns = kbasep_kinstr_prfcnt_timestamp_ns(); + + list_for_each_entry(pos, &kinstr_ctx->clients, node) { + if (pos->active && (pos->next_dump_time_ns != 0) && + (pos->next_dump_time_ns < cur_time_ns)) + kbasep_kinstr_prfcnt_client_dump( + pos, BASE_HWCNT_READER_EVENT_PERIODIC, + pos->user_data, false, false); + } + + kbasep_kinstr_prfcnt_reschedule_worker(kinstr_ctx); + + mutex_unlock(&kinstr_ctx->lock); +} + +/** + * kbasep_kinstr_prfcnt_async_dump_worker()- Dump worker for a manual client + * to take a single asynchronous + * sample. + * @work: Work structure. + */ +static void kbasep_kinstr_prfcnt_async_dump_worker(struct work_struct *work) +{ + struct kbase_kinstr_prfcnt_async *cli_async = + container_of(work, struct kbase_kinstr_prfcnt_async, dump_work); + struct kbase_kinstr_prfcnt_client *cli = container_of( + cli_async, struct kbase_kinstr_prfcnt_client, async); + + mutex_lock(&cli->kinstr_ctx->lock); + /* While the async operation is in flight, a sync stop might have been + * executed, for which the dump should be skipped. Further as we are + * doing an async dump, we expect that there is reserved buffer for + * this to happen. This is to avoid the rare corner case where the + * user side has issued a stop/start pair before the async work item + * get the chance to execute. + */ + if (cli->active && + (atomic_read(&cli->sync_sample_count) < cli->sample_count)) + kbasep_kinstr_prfcnt_client_dump(cli, + BASE_HWCNT_READER_EVENT_MANUAL, + cli->async.user_data, true, + false); + + /* While the async operation is in flight, more async dump requests + * may have been submitted. In this case, no more async dumps work + * will be queued. Instead space will be reserved for that dump and + * an empty sample will be return after handling the current async + * dump. + */ + while (cli->active && + (atomic_read(&cli->sync_sample_count) < cli->sample_count)) { + kbasep_kinstr_prfcnt_client_dump( + cli, BASE_HWCNT_READER_EVENT_MANUAL, 0, true, true); + } + mutex_unlock(&cli->kinstr_ctx->lock); } /** @@ -422,6 +1315,17 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) static enum hrtimer_restart kbasep_kinstr_prfcnt_dump_timer(struct hrtimer *timer) { + struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of( + timer, struct kbase_kinstr_prfcnt_context, dump_timer); + + /* We don't need to check kinstr_ctx->suspend_count here. + * Suspend and resume functions already ensure that the worker + * is cancelled when the driver is suspended, and resumed when + * the suspend_count reaches 0. + */ + kbase_hwcnt_virtualizer_queue_work(kinstr_ctx->hvirt, + &kinstr_ctx->dump_work); + return HRTIMER_NORESTART; } @@ -555,20 +1459,14 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc( size_t dump_buf_bytes; size_t clk_cnt_buf_bytes; size_t sample_meta_bytes; - size_t block_count = 0; + size_t md_count; size_t sample_size; - size_t grp, blk, blk_inst; if (!metadata || !sample_arr) return -EINVAL; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - block_count++; - - /* Reserve one for last sentinel item. */ - block_count++; - - sample_meta_bytes = sizeof(struct prfcnt_metadata) * block_count; + md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata); + sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count; dump_buf_bytes = metadata->dump_buf_bytes; clk_cnt_buf_bytes = sizeof(*samples->dump_buf.clk_cnt_buf) * metadata->clk_cnt; @@ -602,7 +1500,8 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc( /* Internal layout in a sample buffer: [sample metadata, dump_buf, clk_cnt_buf]. */ samples[sample_idx].dump_buf.metadata = metadata; samples[sample_idx].sample_meta = - (u64 *)(uintptr_t)(addr + sample_meta_offset); + (struct prfcnt_metadata *)(uintptr_t)( + addr + sample_meta_offset); samples[sample_idx].dump_buf.dump_buf = (u64 *)(uintptr_t)(addr + dump_buf_offset); samples[sample_idx].dump_buf.clk_cnt_buf = @@ -724,6 +1623,31 @@ static int kbasep_kinstr_prfcnt_parse_request_enable( } /** + * kbasep_kinstr_prfcnt_parse_request_scope - Parse a scope request + * @req_scope: Performance counters scope request to parse. + * @config: Client object the session configuration should be written to. + * + * This function parses a performance counters scope request. + * There are only 2 acceptable outcomes: either the client leaves the scope + * as undefined, or all the scope requests are set to the same value. + * + * Return: 0 on success, else error code. + */ +static int kbasep_kinstr_prfcnt_parse_request_scope( + const struct prfcnt_request_scope *req_scope, + struct kbase_kinstr_prfcnt_client_config *config) +{ + int err = 0; + + if (config->scope == PRFCNT_SCOPE_RESERVED) + config->scope = req_scope->scope; + else if (config->scope != req_scope->scope) + err = -EINVAL; + + return err; +} + +/** * kbasep_kinstr_prfcnt_parse_setup - Parse session setup * @kinstr_ctx: Pointer to the kinstr_prfcnt context. * @setup: Session setup information to parse. @@ -742,35 +1666,48 @@ static int kbasep_kinstr_prfcnt_parse_setup( { uint32_t i; struct prfcnt_request_item *req_arr; + unsigned int item_count = setup->in.request_item_count; + unsigned long bytes; int err = 0; - if (!setup->in.requests_ptr || (setup->in.request_item_count == 0) || - (setup->in.request_item_size == 0)) { + /* Limiting the request items to 2x of the expected: acommodating + * moderate duplications but rejecting excessive abuses. + */ + if (!setup->in.requests_ptr || (item_count < 2) || + (setup->in.request_item_size == 0) || + item_count > 2 * kinstr_ctx->info_item_count) { return -EINVAL; } - req_arr = - (struct prfcnt_request_item *)(uintptr_t)setup->in.requests_ptr; + bytes = item_count * sizeof(*req_arr); + req_arr = kmalloc(bytes, GFP_KERNEL); + if (!req_arr) + return -ENOMEM; - if (req_arr[setup->in.request_item_count - 1].hdr.item_type != - FLEX_LIST_TYPE_NONE) { - return -EINVAL; + if (copy_from_user(req_arr, u64_to_user_ptr(setup->in.requests_ptr), + bytes)) { + err = -EFAULT; + goto free_buf; } - if (req_arr[setup->in.request_item_count - 1].hdr.item_version != 0) - return -EINVAL; + if (req_arr[item_count - 1].hdr.item_type != FLEX_LIST_TYPE_NONE || + req_arr[item_count - 1].hdr.item_version != 0) { + err = -EINVAL; + goto free_buf; + } /* The session configuration can only feature one value for some - * properties (like capture mode and block counter set), but the client - * may potential issue multiple requests and try to set more than one - * value for those properties. While issuing multiple requests for the + * properties (like capture mode, block counter set and scope), but the + * client may potential issue multiple requests and try to set more than + * one value for those properties. While issuing multiple requests for the * same property is allowed by the protocol, asking for different values * is illegal. Leaving these properties as undefined is illegal, too. */ config->prfcnt_mode = PRFCNT_MODE_RESERVED; config->counter_set = KBASE_HWCNT_SET_UNDEFINED; + config->scope = PRFCNT_SCOPE_RESERVED; - for (i = 0; i < setup->in.request_item_count - 1; i++) { + for (i = 0; i < item_count - 1; i++) { if (req_arr[i].hdr.item_version > PRFCNT_READER_API_VERSION) { err = -EINVAL; break; @@ -797,17 +1734,20 @@ static int kbasep_kinstr_prfcnt_parse_setup( break; if (config->prfcnt_mode == PRFCNT_MODE_PERIODIC) { - config->period_us = + config->period_ns = req_arr[i] .u.req_mode.mode_config.periodic - .period_us; + .period_ns; - if ((config->period_us != 0) && - (config->period_us < - DUMP_INTERVAL_MIN_US)) { - config->period_us = - DUMP_INTERVAL_MIN_US; + if ((config->period_ns != 0) && + (config->period_ns < + DUMP_INTERVAL_MIN_NS)) { + config->period_ns = + DUMP_INTERVAL_MIN_NS; } + + if (config->period_ns == 0) + err = -EINVAL; } break; @@ -816,6 +1756,11 @@ static int kbasep_kinstr_prfcnt_parse_setup( &req_arr[i].u.req_enable, config); break; + case PRFCNT_REQUEST_TYPE_SCOPE: + err = kbasep_kinstr_prfcnt_parse_request_scope( + &req_arr[i].u.req_scope, config); + break; + default: err = -EINVAL; break; @@ -825,14 +1770,19 @@ static int kbasep_kinstr_prfcnt_parse_setup( break; } - /* Verify that properties (like capture mode and block counter set) - * have been defined by the user space client. - */ - if (config->prfcnt_mode == PRFCNT_MODE_RESERVED) - err = -EINVAL; +free_buf: + kfree(req_arr); - if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED) - err = -EINVAL; + if (!err) { + /* Verify that properties (like capture mode and block counter + * set) have been defined by the user space client. + */ + if (config->prfcnt_mode == PRFCNT_MODE_RESERVED) + err = -EINVAL; + + if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED) + err = -EINVAL; + } return err; } @@ -872,8 +1822,12 @@ static int kbasep_kinstr_prfcnt_client_create( goto error; cli->config.buffer_count = MAX_BUFFER_COUNT; - cli->dump_interval_ns = cli->config.period_us * NSEC_PER_USEC; + cli->dump_interval_ns = cli->config.period_ns; cli->next_dump_time_ns = 0; + cli->active = false; + atomic_set(&cli->write_idx, 0); + atomic_set(&cli->read_idx, 0); + err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, &cli->enable_map); @@ -888,6 +1842,7 @@ static int kbasep_kinstr_prfcnt_client_create( kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em); cli->sample_count = cli->config.buffer_count; + atomic_set(&cli->sync_sample_count, cli->sample_count); cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size( kinstr_ctx->metadata, &cli->tmp_buf); @@ -914,15 +1869,6 @@ static int kbasep_kinstr_prfcnt_client_create( if (err < 0) goto error; - err = -ENOMEM; - - cli->dump_bufs_meta = - kmalloc_array(cli->config.buffer_count, - sizeof(*cli->dump_bufs_meta), GFP_KERNEL); - - if (!cli->dump_bufs_meta) - goto error; - err = kbase_hwcnt_virtualizer_client_create( kinstr_ctx->hvirt, &cli->enable_map, &cli->hvcli); @@ -930,6 +1876,9 @@ static int kbasep_kinstr_prfcnt_client_create( goto error; init_waitqueue_head(&cli->waitq); + INIT_WORK(&cli->async.dump_work, + kbasep_kinstr_prfcnt_async_dump_worker); + mutex_init(&cli->cmd_sync_lock); *out_vcli = cli; return 0; @@ -965,48 +1914,11 @@ static void kbasep_kinstr_prfcnt_get_request_info_list( *arr_idx += ARRAY_SIZE(kinstr_prfcnt_supported_requests); } -static enum prfcnt_block_type -kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type) -{ - enum prfcnt_block_type block_type; - - switch (type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: - block_type = PRFCNT_BLOCK_TYPE_FE; - break; - - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: - block_type = PRFCNT_BLOCK_TYPE_TILER; - break; - - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: - block_type = PRFCNT_BLOCK_TYPE_SHADER_CORE; - break; - - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: - block_type = PRFCNT_BLOCK_TYPE_MEMORY; - break; - - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: - default: - block_type = PRFCNT_BLOCK_TYPE_RESERVED; - break; - } - - return block_type; -} - -static int kbasep_kinstr_prfcnt_get_block_info_list( - const struct kbase_hwcnt_metadata *metadata, size_t block_set, - struct prfcnt_enum_item *item_arr, size_t *arr_idx) +int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata, + size_t block_set, struct prfcnt_enum_item *item_arr, + size_t *arr_idx) { - size_t grp; - size_t blk; + size_t grp, blk; if (!metadata || !item_arr || !arr_idx) return -EINVAL; @@ -1015,19 +1927,30 @@ static int kbasep_kinstr_prfcnt_get_block_info_list( for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++, (*arr_idx)++) { + size_t blk_inst; + size_t unused_blk_inst_count = 0; + size_t blk_inst_count = + kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk); + item_arr[*arr_idx].hdr.item_type = PRFCNT_ENUM_TYPE_BLOCK; item_arr[*arr_idx].hdr.item_version = PRFCNT_READER_API_VERSION; item_arr[*arr_idx].u.block_counter.set = block_set; - item_arr[*arr_idx].u.block_counter.block_type = kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( kbase_hwcnt_metadata_block_type( metadata, grp, blk)); + + /* Count number of unused blocks to updated number of instances */ + for (blk_inst = 0; blk_inst < blk_inst_count; blk_inst++) { + if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, + blk_inst)) + unused_blk_inst_count++; + } + item_arr[*arr_idx].u.block_counter.num_instances = - kbase_hwcnt_metadata_block_instance_count( - metadata, grp, blk); + blk_inst_count - unused_blk_inst_count; item_arr[*arr_idx].u.block_counter.num_values = kbase_hwcnt_metadata_block_values_count( metadata, grp, blk); @@ -1086,8 +2009,11 @@ static int kbasep_kinstr_prfcnt_enum_info_list( if (enum_info->info_item_count != kinstr_ctx->info_item_count) return -EINVAL; - prfcnt_item_arr = - (struct prfcnt_enum_item *)(uintptr_t)enum_info->info_list_ptr; + prfcnt_item_arr = kcalloc(enum_info->info_item_count, + sizeof(*prfcnt_item_arr), GFP_KERNEL); + if (!prfcnt_item_arr) + return -ENOMEM; + kbasep_kinstr_prfcnt_get_request_info_list(kinstr_ctx, prfcnt_item_arr, &arr_idx); metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt); @@ -1118,6 +2044,16 @@ static int kbasep_kinstr_prfcnt_enum_info_list( FLEX_LIST_TYPE_NONE; prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_version = 0; + if (!err) { + unsigned long bytes = + enum_info->info_item_count * sizeof(*prfcnt_item_arr); + + if (copy_to_user(u64_to_user_ptr(enum_info->info_list_ptr), + prfcnt_item_arr, bytes)) + err = -EFAULT; + } + + kfree(prfcnt_item_arr); return err; } |