diff options
author | Caio Oliveira <caio.oliveira@intel.com> | 2024-02-19 19:41:48 -0800 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2024-02-28 05:45:39 +0000 |
commit | 5c93a0e125e9a61019393ecc200145997bf8cd08 (patch) | |
tree | 4caa156440056a25eafb54e0593a2ff8173e645f /src | |
parent | 3ef1ed73d3313811d9f8941f251376a99382131e (diff) | |
download | mesa3d-5c93a0e125e9a61019393ecc200145997bf8cd08.tar.gz |
intel/brw: Remove Gfx8- remaining opcodes
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
Diffstat (limited to 'src')
-rw-r--r-- | src/intel/compiler/brw_eu.c | 65 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu.h | 11 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu_compact.c | 1 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu_defines.h | 234 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu_emit.c | 163 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 40 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 8 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_builder.h | 1 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_copy_propagation.cpp | 4 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_generator.cpp | 105 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_reg_allocate.cpp | 13 | ||||
-rw-r--r-- | src/intel/compiler/brw_ir_performance.cpp | 61 | ||||
-rw-r--r-- | src/intel/compiler/brw_schedule_instructions.cpp | 14 | ||||
-rw-r--r-- | src/intel/compiler/brw_shader.cpp | 70 |
14 files changed, 63 insertions, 727 deletions
diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c index d5c3af5ae63..e29efe66235 100644 --- a/src/intel/compiler/brw_eu.c +++ b/src/intel/compiler/brw_eu.c @@ -588,14 +588,14 @@ brw_disassemble(const struct brw_isa_info *isa, } static const struct opcode_desc opcode_descs[] = { - /* IR, HW, name, nsrc, ndst, gfx_vers */ + /* IR, HW, name, nsrc, ndst, gfx_vers assuming Gfx9+ */ { BRW_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL }, { BRW_OPCODE_SYNC, 1, "sync", 1, 0, GFX_GE(GFX12) }, { BRW_OPCODE_MOV, 1, "mov", 1, 1, GFX_LT(GFX12) }, { BRW_OPCODE_MOV, 97, "mov", 1, 1, GFX_GE(GFX12) }, { BRW_OPCODE_SEL, 2, "sel", 2, 1, GFX_LT(GFX12) }, { BRW_OPCODE_SEL, 98, "sel", 2, 1, GFX_GE(GFX12) }, - { BRW_OPCODE_MOVI, 3, "movi", 2, 1, GFX_GE(GFX45) & GFX_LT(GFX12) }, + { BRW_OPCODE_MOVI, 3, "movi", 2, 1, GFX_LT(GFX12) }, { BRW_OPCODE_MOVI, 99, "movi", 2, 1, GFX_GE(GFX12) }, { BRW_OPCODE_NOT, 4, "not", 1, 1, GFX_LT(GFX12) }, { BRW_OPCODE_NOT, 100, "not", 1, 1, GFX_GE(GFX12) }, @@ -609,8 +609,7 @@ static const struct opcode_desc opcode_descs[] = { { BRW_OPCODE_SHR, 104, "shr", 2, 1, GFX_GE(GFX12) }, { BRW_OPCODE_SHL, 9, "shl", 2, 1, GFX_LT(GFX12) }, { BRW_OPCODE_SHL, 105, "shl", 2, 1, GFX_GE(GFX12) }, - { BRW_OPCODE_DIM, 10, "dim", 1, 1, GFX75 }, - { BRW_OPCODE_SMOV, 10, "smov", 0, 0, GFX_GE(GFX8) & GFX_LT(GFX12) }, + { BRW_OPCODE_SMOV, 10, "smov", 0, 0, GFX_LT(GFX12) }, { BRW_OPCODE_SMOV, 106, "smov", 0, 0, GFX_GE(GFX12) }, { BRW_OPCODE_ASR, 12, "asr", 2, 1, GFX_LT(GFX12) }, { BRW_OPCODE_ASR, 108, "asr", 2, 1, GFX_GE(GFX12) }, @@ -622,46 +621,39 @@ static const struct opcode_desc opcode_descs[] = { { BRW_OPCODE_CMP, 112, "cmp", 2, 1, GFX_GE(GFX12) }, { BRW_OPCODE_CMPN, 17, "cmpn", 2, 1, GFX_LT(GFX12) }, { BRW_OPCODE_CMPN, 113, "cmpn", 2, 1, GFX_GE(GFX12) }, - { BRW_OPCODE_CSEL, 18, "csel", 3, 1, GFX_GE(GFX8) & GFX_LT(GFX12) }, + { BRW_OPCODE_CSEL, 18, "csel", 3, 1, GFX_LT(GFX12) }, { BRW_OPCODE_CSEL, 114, "csel", 3, 1, GFX_GE(GFX12) }, - { BRW_OPCODE_BFREV, 23, "bfrev", 1, 1, GFX_GE(GFX7) & GFX_LT(GFX12) }, + { BRW_OPCODE_BFREV, 23, "bfrev", 1, 1, GFX_LT(GFX12) }, { BRW_OPCODE_BFREV, 119, "bfrev", 1, 1, GFX_GE(GFX12) }, - { BRW_OPCODE_BFE, 24, "bfe", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) }, + { BRW_OPCODE_BFE, 24, "bfe", 3, 1, GFX_LT(GFX12) }, { BRW_OPCODE_BFE, 120, "bfe", 3, 1, GFX_GE(GFX12) }, - { BRW_OPCODE_BFI1, 25, "bfi1", 2, 1, GFX_GE(GFX7) & GFX_LT(GFX12) }, + { BRW_OPCODE_BFI1, 25, "bfi1", 2, 1, GFX_LT(GFX12) }, { BRW_OPCODE_BFI1, 121, "bfi1", 2, 1, GFX_GE(GFX12) }, - { BRW_OPCODE_BFI2, 26, "bfi2", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) }, + { BRW_OPCODE_BFI2, 26, "bfi2", 3, 1, GFX_LT(GFX12) }, { BRW_OPCODE_BFI2, 122, "bfi2", 3, 1, GFX_GE(GFX12) }, { BRW_OPCODE_JMPI, 32, "jmpi", 0, 0, GFX_ALL }, - { BRW_OPCODE_BRD, 33, "brd", 0, 0, GFX_GE(GFX7) }, + { BRW_OPCODE_BRD, 33, "brd", 0, 0, GFX_ALL }, { BRW_OPCODE_IF, 34, "if", 0, 0, GFX_ALL }, - { BRW_OPCODE_IFF, 35, "iff", 0, 0, GFX_LE(GFX5) }, - { BRW_OPCODE_BRC, 35, "brc", 0, 0, GFX_GE(GFX7) }, + { BRW_OPCODE_BRC, 35, "brc", 0, 0, GFX_ALL }, { BRW_OPCODE_ELSE, 36, "else", 0, 0, GFX_ALL }, { BRW_OPCODE_ENDIF, 37, "endif", 0, 0, GFX_ALL }, - { BRW_OPCODE_DO, 38, "do", 0, 0, GFX_LE(GFX5) }, - { BRW_OPCODE_CASE, 38, "case", 0, 0, GFX6 }, + { BRW_OPCODE_DO, 38, "do", 0, 0, 0 }, /* Pseudo opcode. */ { BRW_OPCODE_WHILE, 39, "while", 0, 0, GFX_ALL }, { BRW_OPCODE_BREAK, 40, "break", 0, 0, GFX_ALL }, { BRW_OPCODE_CONTINUE, 41, "cont", 0, 0, GFX_ALL }, { BRW_OPCODE_HALT, 42, "halt", 0, 0, GFX_ALL }, - { BRW_OPCODE_CALLA, 43, "calla", 0, 0, GFX_GE(GFX75) }, - { BRW_OPCODE_MSAVE, 44, "msave", 0, 0, GFX_LE(GFX5) }, - { BRW_OPCODE_CALL, 44, "call", 0, 0, GFX_GE(GFX6) }, - { BRW_OPCODE_MREST, 45, "mrest", 0, 0, GFX_LE(GFX5) }, - { BRW_OPCODE_RET, 45, "ret", 0, 0, GFX_GE(GFX6) }, - { BRW_OPCODE_PUSH, 46, "push", 0, 0, GFX_LE(GFX5) }, - { BRW_OPCODE_FORK, 46, "fork", 0, 0, GFX6 }, - { BRW_OPCODE_GOTO, 46, "goto", 0, 0, GFX_GE(GFX8) }, - { BRW_OPCODE_POP, 47, "pop", 2, 0, GFX_LE(GFX5) }, + { BRW_OPCODE_CALLA, 43, "calla", 0, 0, GFX_ALL }, + { BRW_OPCODE_CALL, 44, "call", 0, 0, GFX_ALL }, + { BRW_OPCODE_RET, 45, "ret", 0, 0, GFX_ALL }, + { BRW_OPCODE_GOTO, 46, "goto", 0, 0, GFX_ALL }, { BRW_OPCODE_WAIT, 48, "wait", 0, 1, GFX_LT(GFX12) }, { BRW_OPCODE_SEND, 49, "send", 1, 1, GFX_LT(GFX12) }, { BRW_OPCODE_SENDC, 50, "sendc", 1, 1, GFX_LT(GFX12) }, { BRW_OPCODE_SEND, 49, "send", 2, 1, GFX_GE(GFX12) }, { BRW_OPCODE_SENDC, 50, "sendc", 2, 1, GFX_GE(GFX12) }, - { BRW_OPCODE_SENDS, 51, "sends", 2, 1, GFX_GE(GFX9) & GFX_LT(GFX12) }, - { BRW_OPCODE_SENDSC, 52, "sendsc", 2, 1, GFX_GE(GFX9) & GFX_LT(GFX12) }, - { BRW_OPCODE_MATH, 56, "math", 2, 1, GFX_GE(GFX6) }, + { BRW_OPCODE_SENDS, 51, "sends", 2, 1, GFX_LT(GFX12) }, + { BRW_OPCODE_SENDSC, 52, "sendsc", 2, 1, GFX_LT(GFX12) }, + { BRW_OPCODE_MATH, 56, "math", 2, 1, GFX_ALL }, { BRW_OPCODE_ADD, 64, "add", 2, 1, GFX_ALL }, { BRW_OPCODE_MUL, 65, "mul", 2, 1, GFX_ALL }, { BRW_OPCODE_AVG, 66, "avg", 2, 1, GFX_ALL }, @@ -673,11 +665,11 @@ static const struct opcode_desc opcode_descs[] = { { BRW_OPCODE_MAC, 72, "mac", 2, 1, GFX_ALL }, { BRW_OPCODE_MACH, 73, "mach", 2, 1, GFX_ALL }, { BRW_OPCODE_LZD, 74, "lzd", 1, 1, GFX_ALL }, - { BRW_OPCODE_FBH, 75, "fbh", 1, 1, GFX_GE(GFX7) }, - { BRW_OPCODE_FBL, 76, "fbl", 1, 1, GFX_GE(GFX7) }, - { BRW_OPCODE_CBIT, 77, "cbit", 1, 1, GFX_GE(GFX7) }, - { BRW_OPCODE_ADDC, 78, "addc", 2, 1, GFX_GE(GFX7) }, - { BRW_OPCODE_SUBB, 79, "subb", 2, 1, GFX_GE(GFX7) }, + { BRW_OPCODE_FBH, 75, "fbh", 1, 1, GFX_ALL }, + { BRW_OPCODE_FBL, 76, "fbl", 1, 1, GFX_ALL }, + { BRW_OPCODE_CBIT, 77, "cbit", 1, 1, GFX_ALL }, + { BRW_OPCODE_ADDC, 78, "addc", 2, 1, GFX_ALL }, + { BRW_OPCODE_SUBB, 79, "subb", 2, 1, GFX_ALL }, { BRW_OPCODE_SAD2, 80, "sad2", 2, 1, GFX_ALL }, { BRW_OPCODE_SADA2, 81, "sada2", 2, 1, GFX_ALL }, { BRW_OPCODE_ADD3, 82, "add3", 3, 1, GFX_GE(GFX125) }, @@ -686,13 +678,12 @@ static const struct opcode_desc opcode_descs[] = { { BRW_OPCODE_DP3, 86, "dp3", 2, 1, GFX_LT(GFX11) }, { BRW_OPCODE_DP2, 87, "dp2", 2, 1, GFX_LT(GFX11) }, { BRW_OPCODE_DP4A, 88, "dp4a", 3, 1, GFX_GE(GFX12) }, - { BRW_OPCODE_LINE, 89, "line", 2, 1, GFX_LE(GFX10) }, + { BRW_OPCODE_LINE, 89, "line", 2, 1, GFX9 }, { BRW_OPCODE_DPAS, 89, "dpas", 3, 1, GFX_GE(GFX125) }, - { BRW_OPCODE_PLN, 90, "pln", 2, 1, GFX_GE(GFX45) & GFX_LE(GFX10) }, - { BRW_OPCODE_MAD, 91, "mad", 3, 1, GFX_GE(GFX6) }, - { BRW_OPCODE_LRP, 92, "lrp", 3, 1, GFX_GE(GFX6) & GFX_LE(GFX10) }, - { BRW_OPCODE_MADM, 93, "madm", 3, 1, GFX_GE(GFX8) }, - { BRW_OPCODE_NENOP, 125, "nenop", 0, 0, GFX45 }, + { BRW_OPCODE_PLN, 90, "pln", 2, 1, GFX9 }, + { BRW_OPCODE_MAD, 91, "mad", 3, 1, GFX_ALL }, + { BRW_OPCODE_LRP, 92, "lrp", 3, 1, GFX9 }, + { BRW_OPCODE_MADM, 93, "madm", 3, 1, GFX_ALL }, { BRW_OPCODE_NOP, 126, "nop", 0, 0, GFX_LT(GFX12) }, { BRW_OPCODE_NOP, 96, "nop", 0, 0, GFX_GE(GFX12) } }; diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index d533b6a3a3c..0bc55628da0 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1518,17 +1518,6 @@ void brw_oword_block_read(struct brw_codegen *p, unsigned brw_scratch_surface_idx(const struct brw_codegen *p); -void brw_oword_block_read_scratch(struct brw_codegen *p, - struct brw_reg dest, - struct brw_reg mrf, - int num_regs, - unsigned offset); - -void brw_oword_block_write_scratch(struct brw_codegen *p, - struct brw_reg mrf, - int num_regs, - unsigned offset); - void gfx7_block_read_scratch(struct brw_codegen *p, struct brw_reg dest, int num_regs, diff --git a/src/intel/compiler/brw_eu_compact.c b/src/intel/compiler/brw_eu_compact.c index adacf8e4565..fbfbae2f9b4 100644 --- a/src/intel/compiler/brw_eu_compact.c +++ b/src/intel/compiler/brw_eu_compact.c @@ -2407,7 +2407,6 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset, break; case BRW_OPCODE_IF: - case BRW_OPCODE_IFF: case BRW_OPCODE_ELSE: case BRW_OPCODE_ENDIF: case BRW_OPCODE_WHILE: diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 8469c9804eb..36ef004c6b9 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -180,53 +180,45 @@ enum opcode { BRW_OPCODE_SYNC, BRW_OPCODE_MOV, BRW_OPCODE_SEL, - BRW_OPCODE_MOVI, /**< G45+ */ + BRW_OPCODE_MOVI, BRW_OPCODE_NOT, BRW_OPCODE_AND, BRW_OPCODE_OR, BRW_OPCODE_XOR, BRW_OPCODE_SHR, BRW_OPCODE_SHL, - BRW_OPCODE_DIM, /**< Gfx7.5 only */ - BRW_OPCODE_SMOV, /**< Gfx8+ */ + BRW_OPCODE_SMOV, BRW_OPCODE_ASR, BRW_OPCODE_ROR, /**< Gfx11+ */ BRW_OPCODE_ROL, /**< Gfx11+ */ BRW_OPCODE_CMP, BRW_OPCODE_CMPN, - BRW_OPCODE_CSEL, /**< Gfx8+ */ - BRW_OPCODE_BFREV, /**< Gfx7+ */ - BRW_OPCODE_BFE, /**< Gfx7+ */ - BRW_OPCODE_BFI1, /**< Gfx7+ */ - BRW_OPCODE_BFI2, /**< Gfx7+ */ + BRW_OPCODE_CSEL, + BRW_OPCODE_BFREV, + BRW_OPCODE_BFE, + BRW_OPCODE_BFI1, + BRW_OPCODE_BFI2, BRW_OPCODE_JMPI, - BRW_OPCODE_BRD, /**< Gfx7+ */ + BRW_OPCODE_BRD, BRW_OPCODE_IF, - BRW_OPCODE_IFF, /**< Pre-Gfx6 */ - BRW_OPCODE_BRC, /**< Gfx7+ */ + BRW_OPCODE_BRC, BRW_OPCODE_ELSE, BRW_OPCODE_ENDIF, - BRW_OPCODE_DO, /**< Pre-Gfx6 */ - BRW_OPCODE_CASE, /**< Gfx6 only */ + BRW_OPCODE_DO, /**< Used as pseudo opcode, will be moved later. */ BRW_OPCODE_WHILE, BRW_OPCODE_BREAK, BRW_OPCODE_CONTINUE, BRW_OPCODE_HALT, - BRW_OPCODE_CALLA, /**< Gfx7.5+ */ - BRW_OPCODE_MSAVE, /**< Pre-Gfx6 */ - BRW_OPCODE_CALL, /**< Gfx6+ */ - BRW_OPCODE_MREST, /**< Pre-Gfx6 */ - BRW_OPCODE_RET, /**< Gfx6+ */ - BRW_OPCODE_PUSH, /**< Pre-Gfx6 */ - BRW_OPCODE_FORK, /**< Gfx6 only */ - BRW_OPCODE_GOTO, /**< Gfx8+ */ - BRW_OPCODE_POP, /**< Pre-Gfx6 */ + BRW_OPCODE_CALLA, + BRW_OPCODE_CALL, + BRW_OPCODE_RET, + BRW_OPCODE_GOTO, BRW_OPCODE_WAIT, BRW_OPCODE_SEND, BRW_OPCODE_SENDC, - BRW_OPCODE_SENDS, /**< Gfx9+ */ - BRW_OPCODE_SENDSC, /**< Gfx9+ */ - BRW_OPCODE_MATH, /**< Gfx6+ */ + BRW_OPCODE_SENDS, + BRW_OPCODE_SENDSC, + BRW_OPCODE_MATH, BRW_OPCODE_ADD, BRW_OPCODE_MUL, BRW_OPCODE_AVG, @@ -238,11 +230,11 @@ enum opcode { BRW_OPCODE_MAC, BRW_OPCODE_MACH, BRW_OPCODE_LZD, - BRW_OPCODE_FBH, /**< Gfx7+ */ - BRW_OPCODE_FBL, /**< Gfx7+ */ - BRW_OPCODE_CBIT, /**< Gfx7+ */ - BRW_OPCODE_ADDC, /**< Gfx7+ */ - BRW_OPCODE_SUBB, /**< Gfx7+ */ + BRW_OPCODE_FBH, + BRW_OPCODE_FBL, + BRW_OPCODE_CBIT, + BRW_OPCODE_ADDC, + BRW_OPCODE_SUBB, BRW_OPCODE_SAD2, BRW_OPCODE_SADA2, BRW_OPCODE_ADD3, /* Gen12+ only */ @@ -253,29 +245,19 @@ enum opcode { BRW_OPCODE_DP4A, /**< Gfx12+ */ BRW_OPCODE_LINE, BRW_OPCODE_DPAS, /**< Gfx12.5+ */ - BRW_OPCODE_PLN, /**< G45+ */ - BRW_OPCODE_MAD, /**< Gfx6+ */ - BRW_OPCODE_LRP, /**< Gfx6+ */ - BRW_OPCODE_MADM, /**< Gfx8+ */ - BRW_OPCODE_NENOP, /**< G45 only */ + BRW_OPCODE_PLN, /**< Up until Gfx9 */ + BRW_OPCODE_MAD, + BRW_OPCODE_LRP, + BRW_OPCODE_MADM, BRW_OPCODE_NOP, NUM_BRW_OPCODES, - /* These are compiler backend opcodes that get translated into other - * instructions. - */ - FS_OPCODE_FB_WRITE = NUM_BRW_OPCODES, - /** - * Same as FS_OPCODE_FB_WRITE but expects its arguments separately as - * individual sources instead of as a single payload blob. The - * position/ordering of the arguments are defined by the enum - * fb_write_logical_srcs. + * The position/ordering of the arguments are defined + * by the enum fb_write_logical_srcs. */ - FS_OPCODE_FB_WRITE_LOGICAL, - - FS_OPCODE_REP_FB_WRITE, + FS_OPCODE_FB_WRITE_LOGICAL = NUM_BRW_OPCODES, FS_OPCODE_FB_READ, FS_OPCODE_FB_READ_LOGICAL, @@ -459,10 +441,6 @@ enum opcode { */ FS_OPCODE_SCHEDULING_FENCE, - SHADER_OPCODE_GFX4_SCRATCH_READ, - SHADER_OPCODE_GFX4_SCRATCH_WRITE, - SHADER_OPCODE_GFX7_SCRATCH_READ, - SHADER_OPCODE_SCRATCH_HEADER, /** @@ -559,155 +537,12 @@ enum opcode { FS_OPCODE_PIXEL_X, FS_OPCODE_PIXEL_Y, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL, - FS_OPCODE_SET_SAMPLE_ID, FS_OPCODE_PACK_HALF_2x16_SPLIT, FS_OPCODE_INTERPOLATE_AT_SAMPLE, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, - VS_OPCODE_PULL_CONSTANT_LOAD, - VS_OPCODE_PULL_CONSTANT_LOAD_GFX7, - - VS_OPCODE_UNPACK_FLAGS_SIMD4X2, - - /** - * Terminate the geometry shader thread by doing an empty URB write. - * - * This opcode doesn't do an implied move from R0 to the first MRF. This - * allows the geometry shader to override the "GS Number of Output Vertices - * for Slot {0,1}" fields in the message header. - */ - GS_OPCODE_THREAD_END, - - /** - * Set the "Slot {0,1} Offset" fields of a URB_WRITE message header. - * - * - dst is the MRF containing the message header. - * - * - src0.x indicates which portion of the URB should be written to (e.g. a - * vertex number) - * - * - src1 is an immediate multiplier which will be applied to src0 - * (e.g. the size of a single vertex in the URB). - * - * Note: the hardware will apply this offset *in addition to* the offset in - * vec4_instruction::offset. - */ - GS_OPCODE_SET_WRITE_OFFSET, - - /** - * Set the "GS Number of Output Vertices for Slot {0,1}" fields of a - * URB_WRITE message header. - * - * - dst is the MRF containing the message header. - * - * - src0.x is the vertex count. The upper 16 bits will be ignored. - */ - GS_OPCODE_SET_VERTEX_COUNT, - - /** - * Set DWORD 2 of dst to the value in src. - */ - GS_OPCODE_SET_DWORD_2, - - /** - * Prepare the dst register for storage in the "Channel Mask" fields of a - * URB_WRITE message header. - * - * DWORD 4 of dst is shifted left by 4 bits, so that later, - * GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the - * final channel mask. - * - * Note: since GS_OPCODE_SET_CHANNEL_MASKS ORs DWORDs 0 and 4 together to - * form the final channel mask, DWORDs 0 and 4 of the dst register must not - * have any extraneous bits set prior to execution of this opcode (that is, - * they should be in the range 0x0 to 0xf). - */ - GS_OPCODE_PREPARE_CHANNEL_MASKS, - - /** - * Set the "Channel Mask" fields of a URB_WRITE message header. - * - * - dst is the MRF containing the message header. - * - * - src.x is the channel mask, as prepared by - * GS_OPCODE_PREPARE_CHANNEL_MASKS. DWORDs 0 and 4 are OR'ed together to - * form the final channel mask. - */ - GS_OPCODE_SET_CHANNEL_MASKS, - - /** - * Get the "Instance ID" fields from the payload. - * - * - dst is the GRF for gl_InvocationID. - */ - GS_OPCODE_GET_INSTANCE_ID, - - /** - * Send a FF_SYNC message to allocate initial URB handles (gfx6). - * - * - dst will be used as the writeback register for the FF_SYNC operation. - * - * - src0 is the number of primitives written. - * - * - src1 is the value to hold in M0.0: number of SO vertices to write - * and number of SO primitives needed. Its value will be overwritten - * with the SVBI values if transform feedback is enabled. - * - * Note: This opcode uses an implicit MRF register for the ff_sync message - * header, so the caller is expected to set inst->base_mrf and initialize - * that MRF register to r0. This opcode will also write to this MRF register - * to include the allocated URB handle so it can then be reused directly as - * the header in the URB write operation we are allocating the handle for. - */ - GS_OPCODE_FF_SYNC, - - /** - * Move r0.1 (which holds PrimitiveID information in gfx6) to a separate - * register. - * - * - dst is the GRF where PrimitiveID information will be moved. - */ - GS_OPCODE_SET_PRIMITIVE_ID, - - /** - * Write transform feedback data to the SVB by sending a SVB WRITE message. - * Used in gfx6. - * - * - dst is the MRF register containing the message header. - * - * - src0 is the register where the vertex data is going to be copied from. - * - * - src1 is the destination register when write commit occurs. - */ - GS_OPCODE_SVB_WRITE, - - /** - * Set destination index in the SVB write message payload (M0.5). Used - * in gfx6 for transform feedback. - * - * - dst is the header to save the destination indices for SVB WRITE. - * - src is the register that holds the destination indices value. - */ - GS_OPCODE_SVB_SET_DST_INDEX, - - /** - * Prepare Mx.0 subregister for being used in the FF_SYNC message header. - * Used in gfx6 for transform feedback. - * - * - dst will hold the register with the final Mx.0 value. - * - * - src0 has the number of vertices emitted in SO (NumSOVertsToWrite) - * - * - src1 has the number of needed primitives for SO (NumSOPrimsNeeded) - * - * - src2 is the value to hold in M0: number of SO vertices to write - * and number of SO primitives needed. - */ - GS_OPCODE_FF_SYNC_SET_PRIMITIVES, - /** * Terminate the compute shader. */ @@ -742,17 +577,6 @@ enum opcode { /** Fills out a relocatable immediate */ SHADER_OPCODE_MOV_RELOC_IMM, - TCS_OPCODE_GET_INSTANCE_ID, - TCS_OPCODE_GET_PRIMITIVE_ID, - TCS_OPCODE_CREATE_BARRIER_HEADER, - TCS_OPCODE_SRC0_010_IS_ZERO, - TCS_OPCODE_RELEASE_INPUT, - TCS_OPCODE_THREAD_END, - - TES_OPCODE_GET_PRIMITIVE_ID, - TES_OPCODE_CREATE_INPUT_READ_HEADER, - TES_OPCODE_ADD_INDIRECT_URB_OFFSET, - SHADER_OPCODE_BTD_SPAWN_LOGICAL, SHADER_OPCODE_BTD_RETIRE_LOGICAL, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 76d9009cb5f..80013cb8c41 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -224,8 +224,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); if (reg.file == BRW_IMMEDIATE_VALUE) { - if (reg.type == BRW_REGISTER_TYPE_DF || - brw_inst_opcode(p->isa, inst) == BRW_OPCODE_DIM) + if (reg.type == BRW_REGISTER_TYPE_DF) brw_inst_set_imm_df(devinfo, inst, reg.df); else if (reg.type == BRW_REGISTER_TYPE_UQ || reg.type == BRW_REGISTER_TYPE_Q) @@ -963,7 +962,6 @@ ALU2(OR) ALU2(XOR) ALU2(SHR) ALU2(SHL) -ALU1(DIM) ALU2(ASR) ALU2(ROL) ALU2(ROR) @@ -1520,165 +1518,6 @@ brw_scratch_surface_idx(const struct brw_codegen *p) return GFX8_BTI_STATELESS_NON_COHERENT; } -/** - * Write a block of OWORDs (half a GRF each) from the scratch buffer, - * using a constant offset per channel. - * - * The offset must be aligned to oword size (16 bytes). Used for - * register spilling. - */ -void brw_oword_block_write_scratch(struct brw_codegen *p, - struct brw_reg mrf, - int num_regs, - unsigned offset) -{ - const struct intel_device_info *devinfo = p->devinfo; - const unsigned target_cache = GFX7_SFID_DATAPORT_DATA_CACHE; - const struct tgl_swsb swsb = brw_get_default_swsb(p); - uint32_t msg_type; - - offset /= 16; - - mrf = retype(mrf, BRW_REGISTER_TYPE_UD); - - const unsigned mlen = 1 + num_regs; - - /* Set up the message header. This is g0, with g0.2 filled with - * the offset. We don't want to leave our offset around in g0 or - * it'll screw up texture samples, so set it up inside the message - * reg. - */ - { - brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); - - brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - - /* set message header global offset field (reg 0, element 2) */ - brw_set_default_exec_size(p, BRW_EXECUTE_1); - brw_set_default_swsb(p, tgl_swsb_null()); - brw_MOV(p, - retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, - mrf.nr, - 2), BRW_REGISTER_TYPE_UD), - brw_imm_ud(offset)); - - brw_pop_insn_state(p); - brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); - } - - { - struct brw_reg dest; - brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); - int send_commit_msg; - struct brw_reg src_header = retype(brw_vec8_grf(0, 0), - BRW_REGISTER_TYPE_UW); - - brw_inst_set_sfid(devinfo, insn, target_cache); - - if (brw_inst_exec_size(devinfo, insn) >= 16) - src_header = vec16(src_header); - - assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE); - - /* Until gfx6, writes followed by reads from the same location - * are not guaranteed to be ordered unless write_commit is set. - * If set, then a no-op write is issued to the destination - * register to set a dependency, and a read from the destination - * can be used to ensure the ordering. - * - * For gfx6, only writes between different threads need ordering - * protection. Our use of DP writes is all about register - * spilling within a thread. - */ - dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); - send_commit_msg = 0; - - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, mrf); - - msg_type = GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; - - brw_set_desc(p, insn, - brw_message_desc(devinfo, mlen, send_commit_msg, true) | - brw_dp_write_desc(devinfo, brw_scratch_surface_idx(p), - BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), - msg_type, send_commit_msg)); - } -} - - -/** - * Read a block of owords (half a GRF each) from the scratch buffer - * using a constant index per channel. - * - * Offset must be aligned to oword size (16 bytes). Used for register - * spilling. - */ -void -brw_oword_block_read_scratch(struct brw_codegen *p, - struct brw_reg dest, - struct brw_reg mrf, - int num_regs, - unsigned offset) -{ - const struct intel_device_info *devinfo = p->devinfo; - const struct tgl_swsb swsb = brw_get_default_swsb(p); - - offset /= 16; - - /* On gen 7 and above, we no longer have message registers and we can - * send from any register we want. By using the destination register - * for the message, we guarantee that the implied message write won't - * accidentally overwrite anything. This has been a problem because - * the MRF registers and source for the final FB write are both fixed - * and may overlap. - */ - mrf = retype(dest, BRW_REGISTER_TYPE_UD); - dest = retype(dest, BRW_REGISTER_TYPE_UW); - - const unsigned rlen = num_regs; - const unsigned target_cache = GFX7_SFID_DATAPORT_DATA_CACHE; - - { - brw_push_insn_state(p); - brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - - brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - - /* set message header global offset field (reg 0, element 2) */ - brw_set_default_exec_size(p, BRW_EXECUTE_1); - brw_set_default_swsb(p, tgl_swsb_null()); - brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset)); - - brw_pop_insn_state(p); - brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); - } - - { - brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); - - brw_inst_set_sfid(devinfo, insn, target_cache); - assert(brw_inst_pred_control(devinfo, insn) == 0); - - brw_set_dest(p, insn, dest); /* UW? */ - brw_set_src0(p, insn, mrf); - - brw_set_desc(p, insn, - brw_message_desc(devinfo, 1, rlen, true) | - brw_dp_read_desc(devinfo, brw_scratch_surface_idx(p), - BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, - BRW_DATAPORT_READ_TARGET_RENDER_CACHE)); - } -} - void gfx7_block_read_scratch(struct brw_codegen *p, struct brw_reg dest, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 27716bb032c..e15495a62c3 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -216,7 +216,6 @@ fs_inst::is_send_from_grf() const return true; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return src[1].file == VGRF; - case FS_OPCODE_FB_WRITE: case FS_OPCODE_FB_READ: return src[0].file == VGRF; default: @@ -229,7 +228,6 @@ fs_inst::is_control_source(unsigned arg) const { switch (opcode) { case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: return arg == 0; case SHADER_OPCODE_BROADCAST: @@ -277,7 +275,6 @@ bool fs_inst::is_payload(unsigned arg) const { switch (opcode) { - case FS_OPCODE_FB_WRITE: case FS_OPCODE_FB_READ: case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: @@ -864,16 +861,6 @@ fs_inst::size_read(int arg) const } break; - case FS_OPCODE_FB_WRITE: - case FS_OPCODE_REP_FB_WRITE: - if (arg == 0) { - if (base_mrf >= 0) - return src[0].file == BAD_FILE ? 0 : 2 * REG_SIZE; - else - return mlen * REG_SIZE; - } - break; - case FS_OPCODE_FB_READ: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: @@ -881,11 +868,6 @@ fs_inst::size_read(int arg) const return mlen * REG_SIZE; break; - case FS_OPCODE_SET_SAMPLE_ID: - if (arg == 1) - return 1; - break; - case FS_OPCODE_LINTERP: if (arg == 1) return 16; @@ -1022,11 +1004,10 @@ fs_inst::flags_read(const intel_device_info *devinfo) const unsigned fs_inst::flags_written(const intel_device_info *devinfo) const { - if ((conditional_mod && (opcode != BRW_OPCODE_SEL && - opcode != BRW_OPCODE_CSEL && - opcode != BRW_OPCODE_IF && - opcode != BRW_OPCODE_WHILE)) || - opcode == FS_OPCODE_FB_WRITE) { + if (conditional_mod && (opcode != BRW_OPCODE_SEL && + opcode != BRW_OPCODE_CSEL && + opcode != BRW_OPCODE_IF && + opcode != BRW_OPCODE_WHILE)) { return brw_fs_flag_mask(this, 1); } else if (opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL || opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL || @@ -1083,15 +1064,7 @@ fs_inst::implied_mrf_writes() const case SHADER_OPCODE_LOD: case SHADER_OPCODE_SAMPLEINFO: return 1; - case FS_OPCODE_FB_WRITE: - case FS_OPCODE_REP_FB_WRITE: - return src[0].file == BAD_FILE ? 0 : 2; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - case SHADER_OPCODE_GFX4_SCRATCH_READ: - return 1; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: - return mlen; - case SHADER_OPCODE_GFX4_SCRATCH_WRITE: return mlen; default: unreachable("not reached"); @@ -2160,10 +2133,7 @@ brw_fb_write_msg_control(const fs_inst *inst, { uint32_t mctl; - if (inst->opcode == FS_OPCODE_REP_FB_WRITE) { - assert(inst->group == 0 && inst->exec_size == 16); - mctl = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; - } else if (prog_data->dual_src_blend) { + if (prog_data->dual_src_blend) { assert(inst->exec_size == 8); if (inst->group % 16 == 0) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 0076bc12397..6fea62c7c2d 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -461,19 +461,11 @@ private: struct brw_reg dst, struct brw_reg src); void generate_ddy(const fs_inst *inst, struct brw_reg dst, struct brw_reg src); - void generate_scratch_write(fs_inst *inst, struct brw_reg src); - void generate_scratch_read(fs_inst *inst, struct brw_reg dst); - void generate_scratch_read_gfx7(fs_inst *inst, struct brw_reg dst); void generate_scratch_header(fs_inst *inst, struct brw_reg dst); void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset); - void generate_set_sample_id(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - void generate_halt(fs_inst *inst); void generate_mov_indirect(fs_inst *inst, diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 897eee51f22..334a72b2fad 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -603,7 +603,6 @@ namespace brw { ALU3(BFI2) ALU1(BFREV) ALU1(CBIT) - ALU1(DIM) ALU2(DP2) ALU2(DP3) ALU2(DP4) diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 4cd3e8a6918..3f0ebe31382 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -740,10 +740,6 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, return false; } - if (has_source_modifiers && - inst->opcode == SHADER_OPCODE_GFX4_SCRATCH_WRITE) - return false; - /* Some instructions implemented in the generator backend, such as * derivatives, assume that their operands are packed so we can't * generally propagate strided regions to them. diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 3f7e671a122..34c81b80068 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -795,63 +795,6 @@ fs_generator::generate_halt(fs_inst *) brw_HALT(p); } -void -fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src) -{ - /* The 32-wide messages only respect the first 16-wide half of the channel - * enable signals which are replicated identically for the second group of - * 16 channels, so we cannot use them unless the write is marked - * force_writemask_all. - */ - const unsigned lower_size = inst->force_writemask_all ? inst->exec_size : - MIN2(16, inst->exec_size); - const unsigned block_size = 4 * lower_size / REG_SIZE; - const tgl_swsb swsb = brw_get_default_swsb(p); - assert(inst->mlen != 0); - - brw_push_insn_state(p); - brw_set_default_exec_size(p, cvt(lower_size) - 1); - - for (unsigned i = 0; i < inst->exec_size / lower_size; i++) { - brw_set_default_group(p, inst->group + lower_size * i); - - if (i > 0) { - assert(swsb.mode & TGL_SBID_SET); - brw_set_default_swsb(p, tgl_swsb_sbid(TGL_SBID_SRC, swsb.sbid)); - } else { - brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); - } - - brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0), - retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD)); - - brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); - brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), - block_size, - inst->offset + block_size * REG_SIZE * i); - } - - brw_pop_insn_state(p); -} - -void -fs_generator::generate_scratch_read(fs_inst *inst, struct brw_reg dst) -{ - assert(inst->exec_size <= 16 || inst->force_writemask_all); - assert(inst->mlen != 0); - - brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), - inst->exec_size / 8, inst->offset); -} - -void -fs_generator::generate_scratch_read_gfx7(fs_inst *inst, struct brw_reg dst) -{ - assert(inst->exec_size <= 16 || inst->force_writemask_all); - - gfx7_block_read_scratch(p, dst, inst->exec_size / 8, inst->offset); -} - /* The A32 messages take a buffer base address in header.5:[31:0] (See * MH1_A32_PSM for typed messages or MH_A32_GO for byte/dword scattered * and OWord block messages in the SKL PRM Vol. 2d for more details.) @@ -943,35 +886,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, read_offset, surf_index); } -/* Sets vstride=1, width=4, hstride=0 of register src1 during - * the ADD instruction. - */ -void -fs_generator::generate_set_sample_id(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) -{ - assert(dst.type == BRW_REGISTER_TYPE_D || - dst.type == BRW_REGISTER_TYPE_UD); - assert(src0.type == BRW_REGISTER_TYPE_D || - src0.type == BRW_REGISTER_TYPE_UD); - - const struct brw_reg reg = stride(src1, 1, 4, 0); - const unsigned lower_size = MIN2(inst->exec_size, 16); - - for (unsigned i = 0; i < inst->exec_size / lower_size; i++) { - brw_inst *insn = brw_ADD(p, offset(dst, i * lower_size / 8), - offset(src0, (src0.vstride == 0 ? 0 : (1 << (src0.vstride - 1)) * - (i * lower_size / (1 << src0.width))) * - type_sz(src0.type) / REG_SIZE), - suboffset(reg, i * lower_size / 4)); - brw_inst_set_exec_size(devinfo, insn, cvt(lower_size) - 1); - brw_inst_set_group(devinfo, insn, inst->group + lower_size * i); - brw_set_default_swsb(p, tgl_swsb_null()); - } -} - void fs_generator::enable_debug(const char *shader_name) { @@ -1411,21 +1325,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, generate_ddy(inst, dst, src[0]); break; - case SHADER_OPCODE_GFX4_SCRATCH_WRITE: - generate_scratch_write(inst, src[0]); - send_count++; - break; - - case SHADER_OPCODE_GFX4_SCRATCH_READ: - generate_scratch_read(inst, dst); - send_count++; - break; - - case SHADER_OPCODE_GFX7_SCRATCH_READ: - generate_scratch_read_gfx7(inst, dst); - send_count++; - break; - case SHADER_OPCODE_SCRATCH_HEADER: generate_scratch_header(inst, dst); break; @@ -1571,10 +1470,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, break; } - case FS_OPCODE_SET_SAMPLE_ID: - generate_set_sample_id(inst, dst, src[0], src[1]); - break; - case SHADER_OPCODE_HALT_TARGET: /* This is the place where the final HALT needs to be inserted if * we've emitted any discards. If not, this will emit no code. diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index 3cfbbe53bf5..fd391e04a9a 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -505,18 +505,6 @@ fs_reg_alloc::setup_inst_interference(const fs_inst *inst) inst->dst.file == VGRF) ra_add_node_interference(g, first_vgrf_node + inst->dst.nr, grf127_send_hack_node); - - /* Spilling instruction are generated as SEND messages from MRF but as - * Gfx7+ supports sending from GRF the driver will maps assingn these - * MRF registers to a GRF. Implementations reuses the dest of the send - * message as source. So as we will have an overlap for sure, we create - * an interference between destination and grf127. - */ - if ((inst->opcode == SHADER_OPCODE_GFX7_SCRATCH_READ || - inst->opcode == SHADER_OPCODE_GFX4_SCRATCH_READ) && - inst->dst.file == VGRF) - ra_add_node_interference(g, first_vgrf_node + inst->dst.nr, - grf127_send_hack_node); } /* From the Skylake PRM Vol. 2a docs for sends: @@ -918,7 +906,6 @@ fs_reg_alloc::set_spill_costs() break; case BRW_OPCODE_IF: - case BRW_OPCODE_IFF: block_scale *= 0.5; break; diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index 650ce664acb..24abee2a471 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -283,7 +283,6 @@ namespace { case BRW_OPCODE_XOR: case BRW_OPCODE_SHR: case BRW_OPCODE_SHL: - case BRW_OPCODE_DIM: case BRW_OPCODE_ASR: case BRW_OPCODE_CMPN: case BRW_OPCODE_BFREV: @@ -315,18 +314,6 @@ namespace { case FS_OPCODE_DDY_COARSE: case FS_OPCODE_PIXEL_X: case FS_OPCODE_PIXEL_Y: - case FS_OPCODE_SET_SAMPLE_ID: - case GS_OPCODE_SET_DWORD_2: - case GS_OPCODE_SET_WRITE_OFFSET: - case GS_OPCODE_SET_VERTEX_COUNT: - case GS_OPCODE_PREPARE_CHANNEL_MASKS: - case GS_OPCODE_SET_CHANNEL_MASKS: - case GS_OPCODE_GET_INSTANCE_ID: - case GS_OPCODE_SET_PRIMITIVE_ID: - case GS_OPCODE_SVB_SET_DST_INDEX: - case TCS_OPCODE_SRC0_010_IS_ZERO: - case TCS_OPCODE_GET_PRIMITIVE_ID: - case TES_OPCODE_GET_PRIMITIVE_ID: case SHADER_OPCODE_READ_SR_REG: if (devinfo->ver >= 11) { return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, @@ -565,27 +552,6 @@ namespace { 0, 2 /* XXX */, 0, 0, 0, 8 /* XXX */, 0, 0); - case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: - case TCS_OPCODE_GET_INSTANCE_ID: - case TES_OPCODE_CREATE_INPUT_READ_HEADER: - return calculate_desc(info, EU_UNIT_FPU, 22 /* XXX */, 0, 0, - 6 /* XXX */, 0, - 0, 8 /* XXX */, 4 /* XXX */, 12 /* XXX */, - 0, 0); - - case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: - case TCS_OPCODE_CREATE_BARRIER_HEADER: - return calculate_desc(info, EU_UNIT_FPU, 32 /* XXX */, 0, 0, - 8 /* XXX */, 0, - 0, 8 /* XXX */, 4 /* XXX */, 12 /* XXX */, - 0, 0); - - case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: - return calculate_desc(info, EU_UNIT_FPU, 12 /* XXX */, 0, 0, - 4 /* XXX */, 0, - 0, 8 /* XXX */, 4 /* XXX */, 12 /* XXX */, - 0, 0); - case SHADER_OPCODE_TEX: case FS_OPCODE_TXB: case SHADER_OPCODE_TXD: @@ -608,18 +574,10 @@ namespace { case SHADER_OPCODE_TG4_OFFSET_LOD: case SHADER_OPCODE_TG4_OFFSET_BIAS: case SHADER_OPCODE_SAMPLEINFO: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: return calculate_desc(info, EU_UNIT_SAMPLER, 2, 0, 0, 0, 16 /* XXX */, 8 /* XXX */, 750 /* XXX */, 0, 0, 2 /* XXX */, 0); - case GS_OPCODE_THREAD_END: - case GS_OPCODE_FF_SYNC: - case TCS_OPCODE_RELEASE_INPUT: - case TCS_OPCODE_THREAD_END: - return calculate_desc(info, EU_UNIT_URB, 2, 0, 0, 0, 6 /* XXX */, - 32 /* XXX */, 200 /* XXX */, 0, 0, 0, 0); - case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_INTERLOCK: switch (info.sfid) { @@ -640,33 +598,14 @@ namespace { abort(); } - case SHADER_OPCODE_GFX4_SCRATCH_READ: - case SHADER_OPCODE_GFX4_SCRATCH_WRITE: - case SHADER_OPCODE_GFX7_SCRATCH_READ: - return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 0, 8 /* XXX */, - 10 /* XXX */, 100 /* XXX */, 0, 0, 0, 0); - - case FS_OPCODE_FB_WRITE: case FS_OPCODE_FB_READ: - case FS_OPCODE_REP_FB_WRITE: return calculate_desc(info, EU_UNIT_DP_RC, 2, 0, 0, 0, 450 /* XXX */, 10 /* XXX */, 300 /* XXX */, 0, 0, 0, 0); - case GS_OPCODE_SVB_WRITE: - return calculate_desc(info, EU_UNIT_DP_RC, 2 /* XXX */, 0, 0, - 0, 450 /* XXX */, - 10 /* XXX */, 300 /* XXX */, 0, 0, - 0, 0); - case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return calculate_desc(info, EU_UNIT_DP_CC, 2, 0, 0, 0, 16 /* XXX */, 10 /* XXX */, 100 /* XXX */, 0, 0, 0, 0); - case VS_OPCODE_PULL_CONSTANT_LOAD: - case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7: - return calculate_desc(info, EU_UNIT_SAMPLER, 2, 0, 0, 0, 16, - 8, 750, 0, 0, 2, 0); - case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index d1852de7ae5..c699adcfc01 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -317,9 +317,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa) latency = 100; break; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - case VS_OPCODE_PULL_CONSTANT_LOAD: /* testing using varying-index pull constants: * * 16 cycles: @@ -349,18 +347,6 @@ schedule_node::set_latency(const struct brw_isa_info *isa) latency = 200; break; - case SHADER_OPCODE_GFX7_SCRATCH_READ: - /* Testing a load from offset 0, that had been previously written: - * - * send(8) g114<1>UW g0<8,8,1>F data (0, 0, 0) mlen 1 rlen 1 { align1 WE_normal 1Q }; - * mov(8) null g114<8,8,1>F { align1 WE_normal 1Q }; - * - * The cycles spent seemed to be grouped around 40-50 (as low as 38), - * then around 140. Presumably this is cache hit vs miss. - */ - latency = 50; - break; - case SHADER_OPCODE_SEND: switch (inst->sfid) { case BRW_SFID_SAMPLER: { diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index ce0ad6c8571..dd52ce41fc7 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -164,12 +164,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) assert(brw_opcode_desc(isa, op)->name); return brw_opcode_desc(isa, op)->name; - case FS_OPCODE_FB_WRITE: - return "fb_write"; case FS_OPCODE_FB_WRITE_LOGICAL: return "fb_write_logical"; - case FS_OPCODE_REP_FB_WRITE: - return "rep_fb_write"; case FS_OPCODE_FB_READ: return "fb_read"; case FS_OPCODE_FB_READ_LOGICAL: @@ -342,12 +338,6 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) case FS_OPCODE_PACK: return "pack"; - case SHADER_OPCODE_GFX4_SCRATCH_READ: - return "gfx4_scratch_read"; - case SHADER_OPCODE_GFX4_SCRATCH_WRITE: - return "gfx4_scratch_write"; - case SHADER_OPCODE_GFX7_SCRATCH_READ: - return "gfx7_scratch_read"; case SHADER_OPCODE_SCRATCH_HEADER: return "scratch_header"; @@ -398,14 +388,9 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return "uniform_pull_const"; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: - return "varying_pull_const_gfx4"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: return "varying_pull_const_logical"; - case FS_OPCODE_SET_SAMPLE_ID: - return "set_sample_id"; - case FS_OPCODE_PACK_HALF_2x16_SPLIT: return "pack_half_2x16_split"; @@ -419,38 +404,6 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: return "interp_per_slot_offset"; - case VS_OPCODE_PULL_CONSTANT_LOAD: - return "pull_constant_load"; - case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7: - return "pull_constant_load_gfx7"; - - case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: - return "unpack_flags_simd4x2"; - - case GS_OPCODE_THREAD_END: - return "gs_thread_end"; - case GS_OPCODE_SET_WRITE_OFFSET: - return "set_write_offset"; - case GS_OPCODE_SET_VERTEX_COUNT: - return "set_vertex_count"; - case GS_OPCODE_SET_DWORD_2: - return "set_dword_2"; - case GS_OPCODE_PREPARE_CHANNEL_MASKS: - return "prepare_channel_masks"; - case GS_OPCODE_SET_CHANNEL_MASKS: - return "set_channel_masks"; - case GS_OPCODE_GET_INSTANCE_ID: - return "get_instance_id"; - case GS_OPCODE_FF_SYNC: - return "ff_sync"; - case GS_OPCODE_SET_PRIMITIVE_ID: - return "set_primitive_id"; - case GS_OPCODE_SVB_WRITE: - return "gs_svb_write"; - case GS_OPCODE_SVB_SET_DST_INDEX: - return "gs_svb_set_dst_index"; - case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: - return "gs_ff_sync_set_primitives"; case CS_OPCODE_CS_TERMINATE: return "cs_terminate"; case SHADER_OPCODE_BARRIER: @@ -466,25 +419,6 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) case SHADER_OPCODE_MOV_RELOC_IMM: return "mov_reloc_imm"; - case TCS_OPCODE_GET_INSTANCE_ID: - return "tcs_get_instance_id"; - case TCS_OPCODE_GET_PRIMITIVE_ID: - return "tcs_get_primitive_id"; - case TCS_OPCODE_CREATE_BARRIER_HEADER: - return "tcs_create_barrier_header"; - case TCS_OPCODE_SRC0_010_IS_ZERO: - return "tcs_src0<0,1,0>_is_zero"; - case TCS_OPCODE_RELEASE_INPUT: - return "tcs_release_input"; - case TCS_OPCODE_THREAD_END: - return "tcs_thread_end"; - case TES_OPCODE_CREATE_INPUT_READ_HEADER: - return "tes_create_input_read_header"; - case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: - return "tes_add_indirect_urb_offset"; - case TES_OPCODE_GET_PRIMITIVE_ID: - return "tes_get_primitive_id"; - case RT_OPCODE_TRACE_RAY_LOGICAL: return "rt_trace_ray_logical"; @@ -1042,7 +976,6 @@ backend_instruction::has_side_effects() const case BRW_OPCODE_SYNC: case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: - case SHADER_OPCODE_GFX4_SCRATCH_WRITE: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: @@ -1054,11 +987,8 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_INTERLOCK: case SHADER_OPCODE_URB_WRITE_LOGICAL: - case FS_OPCODE_FB_WRITE: case FS_OPCODE_FB_WRITE_LOGICAL: - case FS_OPCODE_REP_FB_WRITE: case SHADER_OPCODE_BARRIER: - case TCS_OPCODE_RELEASE_INPUT: case SHADER_OPCODE_RND_MODE: case SHADER_OPCODE_FLOAT_CONTROL_MODE: case FS_OPCODE_SCHEDULING_FENCE: |