summaryrefslogtreecommitdiff
path: root/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c
diff options
context:
space:
mode:
Diffstat (limited to 'mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c')
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c298
1 files changed, 298 insertions, 0 deletions
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c
new file mode 100644
index 0000000..0cf2f94
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h"
+
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
+ const struct kbase_hwcnt_metadata *src_md)
+{
+ struct kbase_hwcnt_description desc;
+ struct kbase_hwcnt_group_description group;
+ struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
+ size_t prfcnt_values_per_block;
+ size_t blk;
+ int err;
+ struct kbase_hwcnt_metadata_narrow *metadata_narrow;
+
+ if (!dst_md_narrow || !src_md || !src_md->grp_metadata ||
+ !src_md->grp_metadata[0].blk_metadata)
+ return -EINVAL;
+
+ /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block
+ * count in the metadata.
+ */
+ if ((kbase_hwcnt_metadata_group_count(src_md) != 1) ||
+ (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
+ return -EINVAL;
+
+ /* Get the values count in the first block. */
+ prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
+
+ /* check all blocks should have same values count. */
+ for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
+ if (val_cnt != prfcnt_values_per_block)
+ return -EINVAL;
+ }
+
+ /* Only support 64 and 128 entries per block. */
+ if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128))
+ return -EINVAL;
+
+ metadata_narrow = kmalloc(sizeof(*metadata_narrow), GFP_KERNEL);
+ if (!metadata_narrow)
+ return -ENOMEM;
+
+ /* Narrow to 64 entries per block to keep API backward compatibility. */
+ prfcnt_values_per_block = 64;
+
+ for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
+ size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk);
+ blks[blk] = (struct kbase_hwcnt_block_description){
+ .type = kbase_hwcnt_metadata_block_type(src_md, 0, blk),
+ .inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk),
+ .hdr_cnt = blk_hdr_cnt,
+ .ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt,
+ };
+ }
+
+ group = (struct kbase_hwcnt_group_description){
+ .type = kbase_hwcnt_metadata_group_type(src_md, 0),
+ .blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT,
+ .blks = blks,
+ };
+
+ desc = (struct kbase_hwcnt_description){
+ .grp_cnt = kbase_hwcnt_metadata_group_count(src_md),
+ .avail_mask = src_md->avail_mask,
+ .clk_cnt = src_md->clk_cnt,
+ .grps = &group,
+ };
+
+ err = kbase_hwcnt_metadata_create(&desc, &metadata_narrow->metadata);
+ if (!err) {
+ /* Narrow down the buffer size to half as the narrowed metadata
+ * only supports 32-bit but the created metadata uses 64-bit for
+ * block entry.
+ */
+ metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1;
+ *dst_md_narrow = metadata_narrow;
+ } else {
+ kfree(metadata_narrow);
+ }
+
+ return err;
+}
+
+void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow)
+{
+ if (!md_narrow)
+ return;
+
+ kbase_hwcnt_metadata_destroy(md_narrow->metadata);
+ kfree(md_narrow);
+}
+
+int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
+{
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
+ u8 *buf;
+
+ if (!md_narrow || !dump_buf)
+ return -EINVAL;
+
+ dump_buf_bytes = md_narrow->dump_buf_bytes;
+ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
+
+ /* Make a single allocation for both dump_buf and clk_cnt_buf. */
+ buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ *dump_buf = (struct kbase_hwcnt_dump_buffer_narrow){
+ .md_narrow = md_narrow,
+ .dump_buf = (u32 *)buf,
+ .clk_cnt_buf = (u64 *)(buf + dump_buf_bytes),
+ };
+
+ return 0;
+}
+
+void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
+{
+ if (!dump_buf_narrow)
+ return;
+
+ kfree(dump_buf_narrow->dump_buf);
+ *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL,
+ .dump_buf = NULL,
+ .clk_cnt_buf = NULL };
+}
+
+int kbase_hwcnt_dump_buffer_narrow_array_alloc(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n,
+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
+{
+ struct kbase_hwcnt_dump_buffer_narrow *buffers;
+ size_t buf_idx;
+ unsigned int order;
+ unsigned long addr;
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
+ size_t total_dump_buf_size;
+
+ if (!md_narrow || !dump_bufs)
+ return -EINVAL;
+
+ dump_buf_bytes = md_narrow->dump_buf_bytes;
+ clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
+
+ /* Allocate memory for the dump buffer struct array */
+ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
+ if (!buffers)
+ return -ENOMEM;
+
+ /* Allocate pages for the actual dump buffers, as they tend to be fairly
+ * large.
+ */
+ order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n);
+ addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+
+ if (!addr) {
+ kfree(buffers);
+ return -ENOMEM;
+ }
+
+ *dump_bufs = (struct kbase_hwcnt_dump_buffer_narrow_array){
+ .page_addr = addr,
+ .page_order = order,
+ .buf_cnt = n,
+ .bufs = buffers,
+ };
+
+ total_dump_buf_size = dump_buf_bytes * n;
+ /* Set the buffer of each dump buf */
+ for (buf_idx = 0; buf_idx < n; buf_idx++) {
+ const size_t dump_buf_offset = dump_buf_bytes * buf_idx;
+ const size_t clk_cnt_buf_offset =
+ total_dump_buf_size + (clk_cnt_buf_bytes * buf_idx);
+
+ buffers[buf_idx] = (struct kbase_hwcnt_dump_buffer_narrow){
+ .md_narrow = md_narrow,
+ .dump_buf = (u32 *)(addr + dump_buf_offset),
+ .clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset),
+ };
+ }
+
+ return 0;
+}
+
+void kbase_hwcnt_dump_buffer_narrow_array_free(
+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
+{
+ if (!dump_bufs)
+ return;
+
+ kfree(dump_bufs->bufs);
+ free_pages(dump_bufs->page_addr, dump_bufs->page_order);
+ memset(dump_bufs, 0, sizeof(*dump_bufs));
+}
+
+void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
+ const u64 *blk_em, size_t val_cnt)
+{
+ size_t val;
+
+ for (val = 0; val < val_cnt; val++) {
+ bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
+ u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
+
+ dst_blk[val] = val_enabled ? src_val : 0;
+ }
+}
+
+void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata_narrow *metadata_narrow;
+ size_t grp;
+ size_t clk;
+
+ if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) ||
+ WARN_ON(src->metadata->grp_cnt != 1) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
+ src->metadata->grp_metadata[0].blk_cnt) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
+ KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt >
+ src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt))
+ return;
+
+ /* Don't use src metadata since src buffer is bigger than dst buffer. */
+ metadata_narrow = dst_narrow->md_narrow;
+
+ for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) {
+ size_t blk;
+ size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp);
+
+ for (blk = 0; blk < blk_cnt; blk++) {
+ size_t blk_inst;
+ size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count(
+ metadata_narrow, grp, blk);
+
+ for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) {
+ /* The narrowed down buffer is only 32-bit. */
+ u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance(
+ dst_narrow, grp, blk, blk_inst);
+ const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+ src, grp, blk, blk_inst);
+ const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+ dst_enable_map, grp, blk, blk_inst);
+ size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count(
+ metadata_narrow, grp, blk);
+ /* Align upwards to include padding bytes */
+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+ KBASE_HWCNT_VALUE_BYTES));
+
+ kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk,
+ blk_em, val_cnt);
+ }
+ }
+ }
+
+ for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) {
+ bool clk_enabled =
+ kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
+
+ dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
+ }
+}