diff options
author | Caio Oliveira <caio.oliveira@intel.com> | 2024-02-17 22:43:47 -0800 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2024-02-28 05:45:39 +0000 |
commit | 3ef1ed73d3313811d9f8941f251376a99382131e (patch) | |
tree | ffa4f190743b59ed139e0a543d295f99c3e8b705 | |
parent | c793644ce9f332efa6dade415d56d3c876bcb11b (diff) | |
download | mesa3d-3ef1ed73d3313811d9f8941f251376a99382131e.tar.gz |
intel/brw: Remove Gfx8- code from visitor
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 162 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 9 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_visitor.cpp | 119 | ||||
-rw-r--r-- | src/intel/compiler/brw_shader.cpp | 4 | ||||
-rw-r--r-- | src/intel/compiler/brw_shader.h | 14 |
5 files changed, 48 insertions, 260 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 667e7f2bebf..27716bb032c 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -202,22 +202,6 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, shuffle_from_32bit_read(bld, dst, vec4_result, 0, components); } -/** - * A helper for MOV generation for fixing up broken hardware SEND dependency - * handling. - */ -void -fs_visitor::DEP_RESOLVE_MOV(const fs_builder &bld, int grf) -{ - /* The caller always wants uncompressed to emit the minimal extra - * dependencies, and to avoid having to deal with aligning its regs to 2. - */ - const fs_builder ubld = bld.annotate("send dependency resolve") - .quarter(0); - - ubld.MOV(ubld.null_reg_f(), fs_reg(VGRF, grf, BRW_REGISTER_TYPE_F)); -} - bool fs_inst::is_send_from_grf() const { @@ -1636,7 +1620,7 @@ calculate_urb_setup(const struct intel_device_info *devinfo, prog_data->urb_setup[i] = urb_next++; } } - } else if (devinfo->ver >= 6) { + } else { assert(!nir->info.per_primitive_inputs); uint64_t vue_header_bits = @@ -1713,34 +1697,6 @@ calculate_urb_setup(const struct intel_device_info *devinfo, } urb_next = prev_stage_vue_map.num_slots - first_slot; } - } else { - /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ - for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) { - /* Point size is packed into the header, not as a general attribute */ - if (i == VARYING_SLOT_PSIZ) - continue; - - if (key->input_slots_valid & BITFIELD64_BIT(i)) { - /* The back color slot is skipped when the front color is - * also written to. In addition, some slots can be - * written in the vertex shader and not read in the - * fragment shader. So the register number must always be - * incremented, mapped or not. - */ - if (_mesa_varying_slot_in_fs((gl_varying_slot) i)) - prog_data->urb_setup[i] = urb_next; - urb_next++; - } - } - - /* - * It's a FS only attribute, and we did interpolation for this attribute - * in SF thread. So, count it here, too. - * - * See compile_sf_prog() for more info. - */ - if (inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)) - prog_data->urb_setup[VARYING_SLOT_PNTC] = urb_next++; } prog_data->num_varying_inputs = urb_next - prog_data->num_per_primitive_inputs; @@ -2071,14 +2027,11 @@ fs_visitor::assign_constant_locations() /* Now that we know how many regular uniforms we'll push, reduce the * UBO push ranges so we don't exceed the 3DSTATE_CONSTANT limits. - */ - /* For gen4/5: - * Only allow 16 registers (128 uniform components) as push constants. * * If changing this value, note the limitation about total_regs in * brw_curbe.c/crocus_state.c */ - const unsigned max_push_length = compiler->devinfo->ver < 6 ? 16 : 64; + const unsigned max_push_length = 64; unsigned push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8); for (int i = 0; i < 4; i++) { struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; @@ -2129,14 +2082,8 @@ fs_visitor::emit_repclear_shader() assert(uniforms == 0); assume(key->nr_color_regions > 0); - fs_reg color_output, header; - if (devinfo->ver >= 7) { - color_output = retype(brw_vec4_grf(127, 0), BRW_REGISTER_TYPE_UD); - header = retype(brw_vec8_grf(125, 0), BRW_REGISTER_TYPE_UD); - } else { - color_output = retype(brw_vec4_reg(MRF, 2, 0), BRW_REGISTER_TYPE_UD); - header = retype(brw_vec8_reg(MRF, 0, 0), BRW_REGISTER_TYPE_UD); - } + fs_reg color_output = retype(brw_vec4_grf(127, 0), BRW_REGISTER_TYPE_UD); + fs_reg header = retype(brw_vec8_grf(125, 0), BRW_REGISTER_TYPE_UD); /* We pass the clear color as a flat input. Copy it to the output. */ fs_reg color_input = @@ -2157,23 +2104,17 @@ fs_visitor::emit_repclear_shader() if (i > 0) bld.exec_all().group(1, 0).MOV(component(header, 2), brw_imm_ud(i)); - if (devinfo->ver >= 7) { - write = bld.emit(SHADER_OPCODE_SEND); - write->resize_sources(3); - write->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE; - write->src[0] = brw_imm_ud(0); - write->src[1] = brw_imm_ud(0); - write->src[2] = i == 0 ? color_output : header; - write->check_tdr = true; - write->send_has_side_effects = true; - write->desc = brw_fb_write_desc(devinfo, i, - BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED, - i == key->nr_color_regions - 1, false); - } else { - write = bld.emit(FS_OPCODE_REP_FB_WRITE); - write->target = i; - write->base_mrf = i == 0 ? color_output.nr : header.nr; - } + write = bld.emit(SHADER_OPCODE_SEND); + write->resize_sources(3); + write->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE; + write->src[0] = brw_imm_ud(0); + write->src[1] = brw_imm_ud(0); + write->src[2] = i == 0 ? color_output : header; + write->check_tdr = true; + write->send_has_side_effects = true; + write->desc = brw_fb_write_desc(devinfo, i, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED, + i == key->nr_color_regions - 1, false); /* We can use a headerless message for the first render target */ write->header_size = i == 0 ? 0 : 2; @@ -2206,7 +2147,7 @@ brw_sample_mask_reg(const fs_builder &bld) assert(bld.dispatch_width() <= 16); return brw_flag_subreg(sample_mask_flag_subreg(s) + bld.group() / 16); } else { - assert(s.devinfo->ver >= 6 && bld.dispatch_width() <= 16); + assert(bld.dispatch_width() <= 16); assert(s.devinfo->ver < 20); return retype(brw_vec1_grf((bld.group() >= 16 ? 2 : 1), 7), BRW_REGISTER_TYPE_UW); @@ -2774,24 +2715,6 @@ fs_visitor::allocate_registers(bool allow_spilling) prog_data->total_scratch = MAX2(brw_get_scratch_size(last_scratch), prog_data->total_scratch); - if (gl_shader_stage_is_compute(stage)) { - if (devinfo->platform == INTEL_PLATFORM_HSW) { - /* According to the MEDIA_VFE_STATE's "Per Thread Scratch Space" - * field documentation, Haswell supports a minimum of 2kB of - * scratch space for compute shaders, unlike every other stage - * and platform. - */ - prog_data->total_scratch = MAX2(prog_data->total_scratch, 2048); - } else if (devinfo->ver <= 7) { - /* According to the MEDIA_VFE_STATE's "Per Thread Scratch Space" - * field documentation, platforms prior to Haswell measure scratch - * size linearly with a range of [1kB, 12kB] and 1kB granularity. - */ - prog_data->total_scratch = ALIGN(last_scratch, 1024); - max_scratch_size = 12 * 1024; - } - } - /* We currently only support up to 2MB of scratch space. If we * need to support more eventually, the documentation suggests * that we could allocate a larger buffer, and partition it out @@ -2892,7 +2815,7 @@ fs_visitor::emit_tcs_thread_end() * separate write just to finish the thread. There isn't guaranteed to * be one, so this may not succeed. */ - if (devinfo->ver != 8 && mark_last_urb_write_with_eot()) + if (mark_last_urb_write_with_eot()) return; const fs_builder bld = fs_builder(this).at_end(); @@ -3089,10 +3012,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) if (nir->info.inputs_read > 0 || BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) || (nir->info.outputs_read > 0 && !wm_key->coherent_fb_fetch)) { - if (devinfo->ver < 6) - emit_interpolation_setup_gfx4(); - else - emit_interpolation_setup_gfx6(); + emit_interpolation_setup(); } /* We handle discards by keeping track of the still-live pixels in f0.1. @@ -3108,8 +3028,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) */ const fs_reg dispatch_mask = devinfo->ver >= 20 ? xe2_vec1_grf(i, 15) : - devinfo->ver >= 6 ? brw_vec1_grf(i + 1, 7) : - brw_vec1_grf(0, 0); + brw_vec1_grf(i + 1, 7); bld.exec_all().group(1, 0) .MOV(brw_sample_mask_reg(bld.group(lower_width, i)), retype(dispatch_mask, BRW_REGISTER_TYPE_UW)); @@ -3154,7 +3073,6 @@ bool fs_visitor::run_cs(bool allow_spilling) { assert(gl_shader_stage_is_compute(stage)); - assert(devinfo->ver >= 7); const fs_builder bld = fs_builder(this).at_end(); payload_ = new cs_thread_payload(*this); @@ -3517,26 +3435,24 @@ brw_nir_populate_wm_prog_data(nir_shader *shader, assert(prog_data->alpha_to_coverage != BRW_SOMETIMES || prog_data->persample_dispatch == BRW_SOMETIMES); - if (devinfo->ver >= 6) { - prog_data->uses_sample_mask = - BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); + prog_data->uses_sample_mask = + BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); - /* From the Ivy Bridge PRM documentation for 3DSTATE_PS: - * - * "MSDISPMODE_PERSAMPLE is required in order to select - * POSOFFSET_SAMPLE" - * - * So we can only really get sample positions if we are doing real - * per-sample dispatch. If we need gl_SamplePosition and we don't have - * persample dispatch, we hard-code it to 0.5. - */ - prog_data->uses_pos_offset = - prog_data->persample_dispatch != BRW_NEVER && - (BITSET_TEST(shader->info.system_values_read, - SYSTEM_VALUE_SAMPLE_POS) || - BITSET_TEST(shader->info.system_values_read, - SYSTEM_VALUE_SAMPLE_POS_OR_CENTER)); - } + /* From the Ivy Bridge PRM documentation for 3DSTATE_PS: + * + * "MSDISPMODE_PERSAMPLE is required in order to select + * POSOFFSET_SAMPLE" + * + * So we can only really get sample positions if we are doing real + * per-sample dispatch. If we need gl_SamplePosition and we don't have + * persample dispatch, we hard-code it to 0.5. + */ + prog_data->uses_pos_offset = + prog_data->persample_dispatch != BRW_NEVER && + (BITSET_TEST(shader->info.system_values_read, + SYSTEM_VALUE_SAMPLE_POS) || + BITSET_TEST(shader->info.system_values_read, + SYSTEM_VALUE_SAMPLE_POS_OR_CENTER)); prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests; prog_data->post_depth_coverage = shader->info.fs.post_depth_coverage; @@ -3951,17 +3867,13 @@ cs_fill_push_const_info(const struct intel_device_info *devinfo, { const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; int subgroup_id_index = brw_get_subgroup_id_param_index(devinfo, prog_data); - bool cross_thread_supported = devinfo->verx10 >= 75; /* The thread ID should be stored in the last param dword */ assert(subgroup_id_index == -1 || subgroup_id_index == (int)prog_data->nr_params - 1); unsigned cross_thread_dwords, per_thread_dwords; - if (!cross_thread_supported) { - cross_thread_dwords = 0u; - per_thread_dwords = prog_data->nr_params; - } else if (subgroup_id_index >= 0) { + if (subgroup_id_index >= 0) { /* Fill all but the last register with cross-thread payload */ cross_thread_dwords = 8 * (subgroup_id_index / 8); per_thread_dwords = prog_data->nr_params - cross_thread_dwords; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 488a573c28a..0076bc12397 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -223,7 +223,6 @@ public: uint32_t const_offset, uint8_t alignment, unsigned components); - void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf); bool run_fs(bool allow_spilling, bool do_rep_send); bool run_vs(); @@ -268,8 +267,7 @@ public: void limit_dispatch_width(unsigned n, const char *msg); void emit_repclear_shader(); - void emit_interpolation_setup_gfx4(); - void emit_interpolation_setup_gfx6(); + void emit_interpolation_setup(); void set_tcs_invocation_id(); @@ -412,14 +410,13 @@ public: /** * Return the flag register used in fragment shaders to keep track of live * samples. On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32 - * dispatch mode, while earlier generations are constrained to f0.1, which - * limits the dispatch width to SIMD16 for fragment shaders that use discard. + * dispatch mode. */ static inline unsigned sample_mask_flag_subreg(const fs_visitor &s) { assert(s.stage == MESA_SHADER_FRAGMENT); - return s.devinfo->ver >= 7 ? 2 : 1; + return 2; } /** diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index ca8411b850c..55f3ded446c 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -116,67 +116,7 @@ fs_visitor::per_primitive_reg(const fs_builder &bld, int location, unsigned comp /** Emits the interpolation for the varying inputs. */ void -fs_visitor::emit_interpolation_setup_gfx4() -{ - struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - - fs_builder abld = fs_builder(this).at_end().annotate("compute pixel centers"); - this->pixel_x = vgrf(glsl_uint_type()); - this->pixel_y = vgrf(glsl_uint_type()); - this->pixel_x.type = BRW_REGISTER_TYPE_UW; - this->pixel_y.type = BRW_REGISTER_TYPE_UW; - abld.ADD(this->pixel_x, - fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010))); - abld.ADD(this->pixel_y, - fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100))); - - const fs_builder bld = fs_builder(this).at_end(); - abld = bld.annotate("compute pixel deltas from v0"); - - this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL] = - vgrf(glsl_vec2_type()); - const fs_reg &delta_xy = this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL]; - const fs_reg xstart(negate(brw_vec1_grf(1, 0))); - const fs_reg ystart(negate(brw_vec1_grf(1, 1))); - - if (devinfo->has_pln) { - for (unsigned i = 0; i < dispatch_width / 8; i++) { - abld.quarter(i).ADD(quarter(offset(delta_xy, abld, 0), i), - quarter(this->pixel_x, i), xstart); - abld.quarter(i).ADD(quarter(offset(delta_xy, abld, 1), i), - quarter(this->pixel_y, i), ystart); - } - } else { - abld.ADD(offset(delta_xy, abld, 0), this->pixel_x, xstart); - abld.ADD(offset(delta_xy, abld, 1), this->pixel_y, ystart); - } - - this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg); - - /* The SF program automatically handles doing the perspective correction or - * not based on wm_prog_data::interp_mode[] so we can use the same pixel - * offsets for both perspective and non-perspective. - */ - this->delta_xy[BRW_BARYCENTRIC_NONPERSPECTIVE_PIXEL] = - this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL]; - - abld = bld.annotate("compute pos.w and 1/pos.w"); - /* Compute wpos.w. It's always in our setup, since it's needed to - * interpolate the other attributes. - */ - this->wpos_w = vgrf(glsl_float_type()); - abld.emit(FS_OPCODE_LINTERP, wpos_w, delta_xy, - interp_reg(abld, VARYING_SLOT_POS, 3, 0)); - /* Compute the pixel 1/W value from wpos.w. */ - this->pixel_w = vgrf(glsl_float_type()); - abld.emit(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); -} - -/** Emits the interpolation for the varying inputs. */ -void -fs_visitor::emit_interpolation_setup_gfx6() +fs_visitor::emit_interpolation_setup() { const fs_builder bld = fs_builder(this).at_end(); fs_builder abld = bld.annotate("compute pixel centers"); @@ -384,7 +324,7 @@ fs_visitor::emit_interpolation_setup_gfx6() hbld.MOV(offset(pixel_x, hbld, i), horiz_stride(int_pixel_x, 2)); hbld.MOV(offset(pixel_y, hbld, i), horiz_stride(int_pixel_y, 2)); - } else if (devinfo->ver >= 8 || dispatch_width == 8) { + } else { /* The "Register Region Restrictions" page says for BDW (and newer, * presumably): * @@ -407,31 +347,6 @@ fs_visitor::emit_interpolation_setup_gfx6() horiz_stride(half_int_pixel_offset_x, 0)); hbld.emit(FS_OPCODE_PIXEL_Y, offset(pixel_y, hbld, i), int_pixel_xy, horiz_stride(half_int_pixel_offset_y, 0)); - } else { - /* The "Register Region Restrictions" page says for SNB, IVB, HSW: - * - * "When destination spans two registers, the source MUST span - * two registers." - * - * Since the GRF source of the ADD will only read a single register, - * we must do two separate ADDs in SIMD16. - */ - const fs_reg int_pixel_x = hbld.vgrf(BRW_REGISTER_TYPE_UW); - const fs_reg int_pixel_y = hbld.vgrf(BRW_REGISTER_TYPE_UW); - - hbld.ADD(int_pixel_x, - fs_reg(stride(suboffset(gi_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010))); - hbld.ADD(int_pixel_y, - fs_reg(stride(suboffset(gi_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100))); - - /* As of gfx6, we can no longer mix float and int sources. We have - * to turn the integer pixel centers into floats for their actual - * use. - */ - hbld.MOV(offset(pixel_x, hbld, i), int_pixel_x); - hbld.MOV(offset(pixel_y, hbld, i), int_pixel_y); } } @@ -676,19 +591,8 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, const fs_reg dst_depth = fetch_payload_reg(bld, fs_payload().dest_depth_reg); fs_reg src_depth, src_stencil; - if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) src_depth = frag_depth; - } else if (source_depth_to_render_target) { - /* If we got here, we're in one of those strange Gen4-5 cases where - * we're forced to pass the source depth, unmodified, to the FB write. - * In this case, we don't want to use pixel_z because we may not have - * set up interpolation. It's also perfectly safe because it only - * happens on old hardware (no coarse interpolation) and this is - * explicitly the pass-through case. - */ - assert(devinfo->ver <= 5); - src_depth = fetch_payload_reg(bld, fs_payload().source_depth_reg); - } if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) src_stencil = frag_stencil; @@ -725,7 +629,7 @@ fs_visitor::do_emit_fb_writes(int nr_color_regions, bool replicate_alpha) ralloc_asprintf(this->mem_ctx, "FB write target %d", target)); fs_reg src0_alpha; - if (devinfo->ver >= 6 && replicate_alpha && target != 0) + if (replicate_alpha && target != 0) src0_alpha = offset(outputs[0], bld, 3); inst = emit_single_fb_write(abld, this->outputs[target], @@ -761,16 +665,6 @@ fs_visitor::emit_fb_writes() struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - if (source_depth_to_render_target && devinfo->ver == 6) { - /* For outputting oDepth on gfx6, SIMD8 writes have to be used. This - * would require SIMD8 moves of each half to message regs, e.g. by using - * the SIMD lowering pass. Unfortunately this is more difficult than it - * sounds because the SIMD8 single-source message lacks channel selects - * for the second and third subspans. - */ - limit_dispatch_width(8, "Depth writes unsupported in SIMD16+ mode.\n"); - } - if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { /* From the 'Render Target Write message' section of the docs: * "Output Stencil is not supported with SIMD16 Render Target Write @@ -786,7 +680,7 @@ fs_visitor::emit_fb_writes() */ const bool replicate_alpha = key->alpha_test_replicate_alpha || (key->nr_color_regions > 1 && key->alpha_to_coverage && - (sample_mask.file == BAD_FILE || devinfo->ver == 6)); + sample_mask.file == BAD_FILE); prog_data->dual_src_blend = (this->dual_src_output.file != BAD_FILE && this->outputs[0].file != BAD_FILE); @@ -1142,7 +1036,6 @@ fs_visitor::emit_urb_fence() void fs_visitor::emit_cs_terminate() { - assert(devinfo->ver >= 7); const fs_builder bld = fs_builder(this).at_end(); /* We can't directly send from g0, since sends with EOT have to use @@ -1247,7 +1140,7 @@ fs_visitor::init() this->source_depth_to_render_target = false; this->runtime_check_aads_emit = false; this->first_non_payload_grf = 0; - this->max_grf = devinfo->ver >= 7 ? GFX7_MRF_HACK_START : BRW_MAX_GRF; + this->max_grf = GFX7_MRF_HACK_START; this->uniforms = 0; this->last_scratch = 0; diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 0536a378884..ce0ad6c8571 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -149,10 +149,10 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) switch (op) { case 0 ... NUM_BRW_OPCODES - 1: - /* The DO instruction doesn't exist on Gfx6+, but we use it to mark the + /* The DO instruction doesn't exist on Gfx9+, but we use it to mark the * start of a loop in the IR. */ - if (devinfo->ver >= 6 && op == BRW_OPCODE_DO) + if (op == BRW_OPCODE_DO) return "do"; /* DPAS instructions may transiently exist on platforms that do not diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h index 18c867841cf..f0d3737ecf9 100644 --- a/src/intel/compiler/brw_shader.h +++ b/src/intel/compiler/brw_shader.h @@ -130,7 +130,6 @@ static inline nir_variable_mode brw_nir_no_indirect_mask(const struct brw_compiler *compiler, gl_shader_stage stage) { - const struct intel_device_info *devinfo = compiler->devinfo; nir_variable_mode indirect_mask = (nir_variable_mode) 0; switch (stage) { @@ -149,19 +148,6 @@ brw_nir_no_indirect_mask(const struct brw_compiler *compiler, stage != MESA_SHADER_MESH) indirect_mask |= nir_var_shader_out; - /* On HSW+, we allow indirects in scalar shaders. They get implemented - * using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in - * brw_postprocess_nir. - * - * We haven't plumbed through the indirect scratch messages on gfx6 or - * earlier so doing indirects via scratch doesn't work there. On gfx7 and - * earlier the scratch space size is limited to 12kB. If we allowed - * indirects as scratch all the time, we may easily exceed this limit - * without having any fallback. - */ - if (devinfo->verx10 <= 70) - indirect_mask |= nir_var_function_temp; - return indirect_mask; } |