diff options
author | Caio Oliveira <caio.oliveira@intel.com> | 2024-02-26 07:46:58 -0800 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2024-02-28 05:45:39 +0000 |
commit | 35b07ab035f8427282ffb6c2985dfcff1893668f (patch) | |
tree | 1b15c1300cf859907f8b6d4dea3518484eeb592f | |
parent | 3ef6fa3767ad6b90ad5bc5dd35993891f9a0a7aa (diff) | |
download | mesa3d-35b07ab035f8427282ffb6c2985dfcff1893668f.tar.gz |
intel/brw: Use a single register set
Different sets were needed for SIMD8/SIMD16 in old Gfx versions, but now
we can use a single one regardless of the SIMD size.
Suggested by Ken.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
-rw-r--r-- | src/intel/compiler/brw_compiler.h | 8 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_reg_allocate.cpp | 70 |
2 files changed, 12 insertions, 66 deletions
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index b748c6838e9..046a1b6d359 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -65,13 +65,7 @@ struct brw_compiler { * block sizes used, indexed by register size. */ struct ra_class *classes[16]; - - /** - * ra class for the aligned barycentrics we use for PLN, which doesn't - * appear in *classes. - */ - struct ra_class *aligned_bary_class; - } fs_reg_sets[3]; + } fs_reg_set; void (*shader_debug_log)(void *, unsigned *id, const char *str, ...) PRINTFLIKE(3, 4); void (*shader_perf_log)(void *, unsigned *id, const char *str, ...) PRINTFLIKE(3, 4); diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index 35aa48fd9ef..1cbeaa11b66 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -78,30 +78,11 @@ fs_visitor::assign_regs_trivial() } -/** - * Size of a register from the aligned_bary_class register class. - */ -static unsigned -aligned_bary_size(unsigned dispatch_width) -{ - return (dispatch_width == 8 ? 2 : 4); -} - -static void -brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) +void +brw_fs_alloc_reg_sets(struct brw_compiler *compiler) { const struct intel_device_info *devinfo = compiler->devinfo; int base_reg_count = BRW_MAX_GRF; - const int index = util_logbase2(dispatch_width / 8); - - if (dispatch_width > 8) { - /* For IVB+, we don't need the PLN hacks or the even-reg alignment in - * SIMD16. Therefore, we can use the exact same register sets for - * SIMD16 as we do for SIMD8 and we don't need to recalculate them. - */ - compiler->fs_reg_sets[index] = compiler->fs_reg_sets[0]; - return; - } /* The registers used to make up almost all values handled in the compiler * are a scalar value occupying a single register (or 2 registers in the @@ -127,7 +108,6 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) ra_set_allocate_round_robin(regs); struct ra_class **classes = ralloc_array(compiler, struct ra_class *, REG_CLASS_COUNT); - struct ra_class *aligned_bary_class = NULL; /* Now, make the register classes for each size of contiguous register * allocation we might need to make. @@ -141,20 +121,11 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) ra_set_finalize(regs, NULL); - compiler->fs_reg_sets[index].regs = regs; - for (unsigned i = 0; i < ARRAY_SIZE(compiler->fs_reg_sets[index].classes); i++) - compiler->fs_reg_sets[index].classes[i] = NULL; + compiler->fs_reg_set.regs = regs; + for (unsigned i = 0; i < ARRAY_SIZE(compiler->fs_reg_set.classes); i++) + compiler->fs_reg_set.classes[i] = NULL; for (int i = 0; i < REG_CLASS_COUNT; i++) - compiler->fs_reg_sets[index].classes[class_sizes[i] - 1] = classes[i]; - compiler->fs_reg_sets[index].aligned_bary_class = aligned_bary_class; -} - -void -brw_fs_alloc_reg_sets(struct brw_compiler *compiler) -{ - brw_alloc_reg_set(compiler, 8); - brw_alloc_reg_set(compiler, 16); - brw_alloc_reg_set(compiler, 32); + compiler->fs_reg_set.classes[class_sizes[i] - 1] = classes[i]; } static int @@ -297,7 +268,6 @@ public: * for reg_width == 2. */ int reg_width = fs->dispatch_width / 8; - rsi = util_logbase2(reg_width); payload_node_count = ALIGN(fs->first_non_payload_grf, reg_width); /* Get payload IP information */ @@ -356,9 +326,6 @@ private: set *spill_insts; - /* Which compiler->fs_reg_sets[] to use */ - int rsi; - ra_graph *g; bool have_spill_costs; @@ -581,7 +548,7 @@ fs_reg_alloc::build_interference_graph(bool allow_spilling) payload_last_use_ip); assert(g == NULL); - g = ra_alloc_interference_graph(compiler->fs_reg_sets[rsi].regs, node_count); + g = ra_alloc_interference_graph(compiler->fs_reg_set.regs, node_count); ralloc_steal(mem_ctx, g); /* Set up the payload nodes */ @@ -595,26 +562,11 @@ fs_reg_alloc::build_interference_graph(bool allow_spilling) for (unsigned i = 0; i < fs->alloc.count; i++) { unsigned size = DIV_ROUND_UP(fs->alloc.sizes[i], reg_unit(devinfo)); - assert(size <= ARRAY_SIZE(compiler->fs_reg_sets[rsi].classes) && + assert(size <= ARRAY_SIZE(compiler->fs_reg_set.classes) && "Register allocation relies on split_virtual_grfs()"); ra_set_node_class(g, first_vgrf_node + i, - compiler->fs_reg_sets[rsi].classes[size - 1]); - } - - /* Special case: on pre-Gfx7 hardware that supports PLN, the second operand - * of a PLN instruction needs to be an even-numbered register, so we have a - * special register class aligned_bary_class to handle this case. - */ - if (compiler->fs_reg_sets[rsi].aligned_bary_class) { - foreach_block_and_inst(block, fs_inst, inst, fs->cfg) { - if (inst->opcode == FS_OPCODE_LINTERP && inst->src[0].file == VGRF && - fs->alloc.sizes[inst->src[0].nr] == - aligned_bary_size(fs->dispatch_width)) { - ra_set_node_class(g, first_vgrf_node + inst->src[0].nr, - compiler->fs_reg_sets[rsi].aligned_bary_class); - } - } + compiler->fs_reg_set.classes[size - 1]); } /* Add interference based on the live range of the register */ @@ -966,7 +918,7 @@ fs_reg_alloc::alloc_scratch_header() int vgrf = fs->alloc.allocate(1); assert(first_vgrf_node + vgrf == scratch_header_node); ra_set_node_class(g, scratch_header_node, - compiler->fs_reg_sets[rsi].classes[0]); + compiler->fs_reg_set.classes[0]); setup_live_interference(scratch_header_node, 0, INT_MAX); @@ -978,7 +930,7 @@ fs_reg_alloc::alloc_spill_reg(unsigned size, int ip) { int vgrf = fs->alloc.allocate(ALIGN(size, reg_unit(devinfo))); int class_idx = DIV_ROUND_UP(size, reg_unit(devinfo)) - 1; - int n = ra_add_node(g, compiler->fs_reg_sets[rsi].classes[class_idx]); + int n = ra_add_node(g, compiler->fs_reg_set.classes[class_idx]); assert(n == first_vgrf_node + vgrf); assert(n == first_spill_node + spill_node_count); |