17ec681f3Smrg# 27ec681f3Smrg# Copyright (c) 2018 Valve Corporation 37ec681f3Smrg# 47ec681f3Smrg# Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg# copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg# to deal in the Software without restriction, including without limitation 77ec681f3Smrg# the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg# and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg# Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg# 117ec681f3Smrg# The above copyright notice and this permission notice (including the next 127ec681f3Smrg# paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg# Software. 147ec681f3Smrg# 157ec681f3Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg# IN THE SOFTWARE. 227ec681f3Smrg# 237ec681f3Smrg 247ec681f3Smrg# Class that represents all the information we have about the opcode 257ec681f3Smrg# NOTE: this must be kept in sync with aco_op_info 267ec681f3Smrg 277ec681f3Smrgimport sys 287ec681f3Smrgfrom enum import Enum 297ec681f3Smrg 307ec681f3Smrgclass InstrClass(Enum): 317ec681f3Smrg Valu32 = 0 327ec681f3Smrg ValuConvert32 = 1 337ec681f3Smrg Valu64 = 2 347ec681f3Smrg ValuQuarterRate32 = 3 357ec681f3Smrg ValuFma = 4 367ec681f3Smrg ValuTranscendental32 = 5 377ec681f3Smrg ValuDouble = 6 387ec681f3Smrg ValuDoubleAdd = 7 397ec681f3Smrg ValuDoubleConvert = 8 407ec681f3Smrg ValuDoubleTranscendental = 9 417ec681f3Smrg Salu = 10 427ec681f3Smrg SMem = 11 437ec681f3Smrg Barrier = 12 447ec681f3Smrg Branch = 13 457ec681f3Smrg Sendmsg = 14 467ec681f3Smrg DS = 15 477ec681f3Smrg Export = 16 487ec681f3Smrg VMem = 17 497ec681f3Smrg Waitcnt = 18 507ec681f3Smrg Other = 19 517ec681f3Smrg 527ec681f3Smrgclass Format(Enum): 537ec681f3Smrg PSEUDO = 0 547ec681f3Smrg SOP1 = 1 557ec681f3Smrg SOP2 = 2 567ec681f3Smrg SOPK = 3 577ec681f3Smrg SOPP = 4 587ec681f3Smrg SOPC = 5 597ec681f3Smrg SMEM = 6 607ec681f3Smrg DS = 8 617ec681f3Smrg MTBUF = 9 627ec681f3Smrg MUBUF = 10 637ec681f3Smrg MIMG = 11 647ec681f3Smrg EXP = 12 657ec681f3Smrg FLAT = 13 667ec681f3Smrg GLOBAL = 14 677ec681f3Smrg SCRATCH = 15 687ec681f3Smrg PSEUDO_BRANCH = 16 697ec681f3Smrg PSEUDO_BARRIER = 17 707ec681f3Smrg PSEUDO_REDUCTION = 18 717ec681f3Smrg VOP3P = 19 727ec681f3Smrg VOP1 = 1 << 8 737ec681f3Smrg VOP2 = 1 << 9 747ec681f3Smrg VOPC = 1 << 10 757ec681f3Smrg VOP3 = 1 << 11 767ec681f3Smrg VINTRP = 1 << 12 777ec681f3Smrg DPP = 1 << 13 787ec681f3Smrg SDWA = 1 << 14 797ec681f3Smrg 807ec681f3Smrg def get_builder_fields(self): 817ec681f3Smrg if self == Format.SOPK: 827ec681f3Smrg return [('uint16_t', 'imm', None)] 837ec681f3Smrg elif self == Format.SOPP: 847ec681f3Smrg return [('uint32_t', 'block', '-1'), 857ec681f3Smrg ('uint32_t', 'imm', '0')] 867ec681f3Smrg elif self == Format.SMEM: 877ec681f3Smrg return [('memory_sync_info', 'sync', 'memory_sync_info()'), 887ec681f3Smrg ('bool', 'glc', 'false'), 897ec681f3Smrg ('bool', 'dlc', 'false'), 907ec681f3Smrg ('bool', 'nv', 'false')] 917ec681f3Smrg elif self == Format.DS: 927ec681f3Smrg return [('int16_t', 'offset0', '0'), 937ec681f3Smrg ('int8_t', 'offset1', '0'), 947ec681f3Smrg ('bool', 'gds', 'false')] 957ec681f3Smrg elif self == Format.MTBUF: 967ec681f3Smrg return [('unsigned', 'dfmt', None), 977ec681f3Smrg ('unsigned', 'nfmt', None), 987ec681f3Smrg ('unsigned', 'offset', None), 997ec681f3Smrg ('bool', 'offen', None), 1007ec681f3Smrg ('bool', 'idxen', 'false'), 1017ec681f3Smrg ('bool', 'disable_wqm', 'false'), 1027ec681f3Smrg ('bool', 'glc', 'false'), 1037ec681f3Smrg ('bool', 'dlc', 'false'), 1047ec681f3Smrg ('bool', 'slc', 'false'), 1057ec681f3Smrg ('bool', 'tfe', 'false')] 1067ec681f3Smrg elif self == Format.MUBUF: 1077ec681f3Smrg return [('unsigned', 'offset', None), 1087ec681f3Smrg ('bool', 'offen', None), 1097ec681f3Smrg ('bool', 'swizzled', 'false'), 1107ec681f3Smrg ('bool', 'idxen', 'false'), 1117ec681f3Smrg ('bool', 'addr64', 'false'), 1127ec681f3Smrg ('bool', 'disable_wqm', 'false'), 1137ec681f3Smrg ('bool', 'glc', 'false'), 1147ec681f3Smrg ('bool', 'dlc', 'false'), 1157ec681f3Smrg ('bool', 'slc', 'false'), 1167ec681f3Smrg ('bool', 'tfe', 'false'), 1177ec681f3Smrg ('bool', 'lds', 'false')] 1187ec681f3Smrg elif self == Format.MIMG: 1197ec681f3Smrg return [('unsigned', 'dmask', '0xF'), 1207ec681f3Smrg ('bool', 'da', 'false'), 1217ec681f3Smrg ('bool', 'unrm', 'true'), 1227ec681f3Smrg ('bool', 'disable_wqm', 'false'), 1237ec681f3Smrg ('bool', 'glc', 'false'), 1247ec681f3Smrg ('bool', 'dlc', 'false'), 1257ec681f3Smrg ('bool', 'slc', 'false'), 1267ec681f3Smrg ('bool', 'tfe', 'false'), 1277ec681f3Smrg ('bool', 'lwe', 'false'), 1287ec681f3Smrg ('bool', 'r128_a16', 'false', 'r128'), 1297ec681f3Smrg ('bool', 'd16', 'false')] 1307ec681f3Smrg return [('unsigned', 'attribute', None), 1317ec681f3Smrg ('unsigned', 'component', None)] 1327ec681f3Smrg elif self == Format.EXP: 1337ec681f3Smrg return [('unsigned', 'enabled_mask', None), 1347ec681f3Smrg ('unsigned', 'dest', None), 1357ec681f3Smrg ('bool', 'compr', 'false', 'compressed'), 1367ec681f3Smrg ('bool', 'done', 'false'), 1377ec681f3Smrg ('bool', 'vm', 'false', 'valid_mask')] 1387ec681f3Smrg elif self == Format.PSEUDO_BRANCH: 1397ec681f3Smrg return [('uint32_t', 'target0', '0', 'target[0]'), 1407ec681f3Smrg ('uint32_t', 'target1', '0', 'target[1]')] 1417ec681f3Smrg elif self == Format.PSEUDO_REDUCTION: 1427ec681f3Smrg return [('ReduceOp', 'op', None, 'reduce_op'), 1437ec681f3Smrg ('unsigned', 'cluster_size', '0')] 1447ec681f3Smrg elif self == Format.PSEUDO_BARRIER: 1457ec681f3Smrg return [('memory_sync_info', 'sync', None), 1467ec681f3Smrg ('sync_scope', 'exec_scope', 'scope_invocation')] 1477ec681f3Smrg elif self == Format.VINTRP: 1487ec681f3Smrg return [('unsigned', 'attribute', None), 1497ec681f3Smrg ('unsigned', 'component', None)] 1507ec681f3Smrg elif self == Format.DPP: 1517ec681f3Smrg return [('uint16_t', 'dpp_ctrl', None), 1527ec681f3Smrg ('uint8_t', 'row_mask', '0xF'), 1537ec681f3Smrg ('uint8_t', 'bank_mask', '0xF'), 1547ec681f3Smrg ('bool', 'bound_ctrl', 'true')] 1557ec681f3Smrg elif self == Format.VOP3P: 1567ec681f3Smrg return [('uint8_t', 'opsel_lo', None), 1577ec681f3Smrg ('uint8_t', 'opsel_hi', None)] 1587ec681f3Smrg elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]: 1597ec681f3Smrg return [('uint16_t', 'offset', 0), 1607ec681f3Smrg ('memory_sync_info', 'sync', 'memory_sync_info()'), 1617ec681f3Smrg ('bool', 'glc', 'false'), 1627ec681f3Smrg ('bool', 'slc', 'false'), 1637ec681f3Smrg ('bool', 'lds', 'false'), 1647ec681f3Smrg ('bool', 'nv', 'false')] 1657ec681f3Smrg else: 1667ec681f3Smrg return [] 1677ec681f3Smrg 1687ec681f3Smrg def get_builder_field_names(self): 1697ec681f3Smrg return [f[1] for f in self.get_builder_fields()] 1707ec681f3Smrg 1717ec681f3Smrg def get_builder_field_dests(self): 1727ec681f3Smrg return [(f[3] if len(f) >= 4 else f[1]) for f in self.get_builder_fields()] 1737ec681f3Smrg 1747ec681f3Smrg def get_builder_field_decls(self): 1757ec681f3Smrg return [('%s %s=%s' % (f[0], f[1], f[2]) if f[2] != None else '%s %s' % (f[0], f[1])) for f in self.get_builder_fields()] 1767ec681f3Smrg 1777ec681f3Smrg def get_builder_initialization(self, num_operands): 1787ec681f3Smrg res = '' 1797ec681f3Smrg if self == Format.SDWA: 1807ec681f3Smrg for i in range(min(num_operands, 2)): 1817ec681f3Smrg res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i) 1827ec681f3Smrg res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n' 1837ec681f3Smrg return res 1847ec681f3Smrg 1857ec681f3Smrg 1867ec681f3Smrgclass Opcode(object): 1877ec681f3Smrg """Class that represents all the information we have about the opcode 1887ec681f3Smrg NOTE: this must be kept in sync with aco_op_info 1897ec681f3Smrg """ 1907ec681f3Smrg def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls): 1917ec681f3Smrg """Parameters: 1927ec681f3Smrg 1937ec681f3Smrg - name is the name of the opcode (prepend nir_op_ for the enum name) 1947ec681f3Smrg - all types are strings that get nir_type_ prepended to them 1957ec681f3Smrg - input_types is a list of types 1967ec681f3Smrg - algebraic_properties is a space-seperated string, where nir_op_is_ is 1977ec681f3Smrg prepended before each entry 1987ec681f3Smrg - const_expr is an expression or series of statements that computes the 1997ec681f3Smrg constant value of the opcode given the constant values of its inputs. 2007ec681f3Smrg """ 2017ec681f3Smrg assert isinstance(name, str) 2027ec681f3Smrg assert isinstance(opcode_gfx7, int) 2037ec681f3Smrg assert isinstance(opcode_gfx9, int) 2047ec681f3Smrg assert isinstance(opcode_gfx10, int) 2057ec681f3Smrg assert isinstance(format, Format) 2067ec681f3Smrg assert isinstance(input_mod, bool) 2077ec681f3Smrg assert isinstance(output_mod, bool) 2087ec681f3Smrg 2097ec681f3Smrg self.name = name 2107ec681f3Smrg self.opcode_gfx7 = opcode_gfx7 2117ec681f3Smrg self.opcode_gfx9 = opcode_gfx9 2127ec681f3Smrg self.opcode_gfx10 = opcode_gfx10 2137ec681f3Smrg self.input_mod = "1" if input_mod else "0" 2147ec681f3Smrg self.output_mod = "1" if output_mod else "0" 2157ec681f3Smrg self.is_atomic = "1" if is_atomic else "0" 2167ec681f3Smrg self.format = format 2177ec681f3Smrg self.cls = cls 2187ec681f3Smrg 2197ec681f3Smrg parts = name.replace('_e64', '').rsplit('_', 2) 2207ec681f3Smrg op_dtype = parts[-1] 2217ec681f3Smrg 2227ec681f3Smrg op_dtype_sizes = {'{}{}'.format(prefix, size) : size for prefix in 'biuf' for size in [64, 32, 24, 16]} 2237ec681f3Smrg # inline constants are 32-bit for 16-bit integer/typeless instructions: https://reviews.llvm.org/D81841 2247ec681f3Smrg op_dtype_sizes['b16'] = 32 2257ec681f3Smrg op_dtype_sizes['i16'] = 32 2267ec681f3Smrg op_dtype_sizes['u16'] = 32 2277ec681f3Smrg 2287ec681f3Smrg # If we can't tell the operand size, default to 32. 2297ec681f3Smrg self.operand_size = op_dtype_sizes.get(op_dtype, 32) 2307ec681f3Smrg 2317ec681f3Smrg # exceptions for operands: 2327ec681f3Smrg if 'qsad_' in name: 2337ec681f3Smrg self.operand_size = 0 2347ec681f3Smrg elif 'sad_' in name: 2357ec681f3Smrg self.operand_size = 32 2367ec681f3Smrg elif name in ['v_mad_u64_u32', 'v_mad_i64_i32']: 2377ec681f3Smrg self.operand_size = 0 2387ec681f3Smrg elif self.operand_size == 24: 2397ec681f3Smrg self.operand_size = 32 2407ec681f3Smrg elif op_dtype == 'u8' or op_dtype == 'i8': 2417ec681f3Smrg self.operand_size = 32 2427ec681f3Smrg elif name in ['v_cvt_f32_ubyte0', 'v_cvt_f32_ubyte1', 2437ec681f3Smrg 'v_cvt_f32_ubyte2', 'v_cvt_f32_ubyte3']: 2447ec681f3Smrg self.operand_size = 32 2457ec681f3Smrg 2467ec681f3Smrg# global dictionary of opcodes 2477ec681f3Smrgopcodes = {} 2487ec681f3Smrg 2497ec681f3Smrgdef opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, cls = InstrClass.Other, input_mod = False, output_mod = False, is_atomic = False): 2507ec681f3Smrg assert name not in opcodes 2517ec681f3Smrg opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls) 2527ec681f3Smrg 2537ec681f3Smrgdef default_class(opcodes, cls): 2547ec681f3Smrg for op in opcodes: 2557ec681f3Smrg if isinstance(op[-1], InstrClass): 2567ec681f3Smrg yield op 2577ec681f3Smrg else: 2587ec681f3Smrg yield op + (cls,) 2597ec681f3Smrg 2607ec681f3Smrgopcode("exp", 0, 0, 0, format = Format.EXP, cls = InstrClass.Export) 2617ec681f3Smrgopcode("p_parallelcopy") 2627ec681f3Smrgopcode("p_startpgm") 2637ec681f3Smrgopcode("p_phi") 2647ec681f3Smrgopcode("p_linear_phi") 2657ec681f3Smrgopcode("p_as_uniform") 2667ec681f3Smrgopcode("p_unit_test") 2677ec681f3Smrg 2687ec681f3Smrgopcode("p_create_vector") 2697ec681f3Smrgopcode("p_extract_vector") 2707ec681f3Smrgopcode("p_split_vector") 2717ec681f3Smrg 2727ec681f3Smrg# start/end the parts where we can use exec based instructions 2737ec681f3Smrg# implicitly 2747ec681f3Smrgopcode("p_logical_start") 2757ec681f3Smrgopcode("p_logical_end") 2767ec681f3Smrg 2777ec681f3Smrg# e.g. subgroupMin() in SPIR-V 2787ec681f3Smrgopcode("p_reduce", format=Format.PSEUDO_REDUCTION) 2797ec681f3Smrg# e.g. subgroupInclusiveMin() 2807ec681f3Smrgopcode("p_inclusive_scan", format=Format.PSEUDO_REDUCTION) 2817ec681f3Smrg# e.g. subgroupExclusiveMin() 2827ec681f3Smrgopcode("p_exclusive_scan", format=Format.PSEUDO_REDUCTION) 2837ec681f3Smrg 2847ec681f3Smrgopcode("p_branch", format=Format.PSEUDO_BRANCH) 2857ec681f3Smrgopcode("p_cbranch", format=Format.PSEUDO_BRANCH) 2867ec681f3Smrgopcode("p_cbranch_z", format=Format.PSEUDO_BRANCH) 2877ec681f3Smrgopcode("p_cbranch_nz", format=Format.PSEUDO_BRANCH) 2887ec681f3Smrg 2897ec681f3Smrgopcode("p_barrier", format=Format.PSEUDO_BARRIER) 2907ec681f3Smrg 2917ec681f3Smrgopcode("p_spill") 2927ec681f3Smrgopcode("p_reload") 2937ec681f3Smrg 2947ec681f3Smrg# start/end linear vgprs 2957ec681f3Smrgopcode("p_start_linear_vgpr") 2967ec681f3Smrgopcode("p_end_linear_vgpr") 2977ec681f3Smrg 2987ec681f3Smrgopcode("p_wqm") 2997ec681f3Smrgopcode("p_discard_if") 3007ec681f3Smrgopcode("p_demote_to_helper") 3017ec681f3Smrgopcode("p_is_helper") 3027ec681f3Smrgopcode("p_exit_early_if") 3037ec681f3Smrg 3047ec681f3Smrg# simulates proper bpermute behavior when it's unsupported, eg. GFX10 wave64 3057ec681f3Smrgopcode("p_bpermute") 3067ec681f3Smrg 3077ec681f3Smrg# creates a lane mask where only the first active lane is selected 3087ec681f3Smrgopcode("p_elect") 3097ec681f3Smrg 3107ec681f3Smrgopcode("p_constaddr") 3117ec681f3Smrg 3127ec681f3Smrg# These don't have to be pseudo-ops, but it makes optimization easier to only 3137ec681f3Smrg# have to consider two instructions. 3147ec681f3Smrg# (src0 >> (index * bits)) & ((1 << bits) - 1) with optional sign extension 3157ec681f3Smrgopcode("p_extract") # src1=index, src2=bits, src3=signext 3167ec681f3Smrg# (src0 & ((1 << bits) - 1)) << (index * bits) 3177ec681f3Smrgopcode("p_insert") # src1=index, src2=bits 3187ec681f3Smrg 3197ec681f3Smrg 3207ec681f3Smrg# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc) 3217ec681f3SmrgSOP2 = { 3227ec681f3Smrg # GFX6, GFX7, GFX8, GFX9, GFX10, name 3237ec681f3Smrg (0x00, 0x00, 0x00, 0x00, 0x00, "s_add_u32"), 3247ec681f3Smrg (0x01, 0x01, 0x01, 0x01, 0x01, "s_sub_u32"), 3257ec681f3Smrg (0x02, 0x02, 0x02, 0x02, 0x02, "s_add_i32"), 3267ec681f3Smrg (0x03, 0x03, 0x03, 0x03, 0x03, "s_sub_i32"), 3277ec681f3Smrg (0x04, 0x04, 0x04, 0x04, 0x04, "s_addc_u32"), 3287ec681f3Smrg (0x05, 0x05, 0x05, 0x05, 0x05, "s_subb_u32"), 3297ec681f3Smrg (0x06, 0x06, 0x06, 0x06, 0x06, "s_min_i32"), 3307ec681f3Smrg (0x07, 0x07, 0x07, 0x07, 0x07, "s_min_u32"), 3317ec681f3Smrg (0x08, 0x08, 0x08, 0x08, 0x08, "s_max_i32"), 3327ec681f3Smrg (0x09, 0x09, 0x09, 0x09, 0x09, "s_max_u32"), 3337ec681f3Smrg (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cselect_b32"), 3347ec681f3Smrg (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cselect_b64"), 3357ec681f3Smrg (0x0e, 0x0e, 0x0c, 0x0c, 0x0e, "s_and_b32"), 3367ec681f3Smrg (0x0f, 0x0f, 0x0d, 0x0d, 0x0f, "s_and_b64"), 3377ec681f3Smrg (0x10, 0x10, 0x0e, 0x0e, 0x10, "s_or_b32"), 3387ec681f3Smrg (0x11, 0x11, 0x0f, 0x0f, 0x11, "s_or_b64"), 3397ec681f3Smrg (0x12, 0x12, 0x10, 0x10, 0x12, "s_xor_b32"), 3407ec681f3Smrg (0x13, 0x13, 0x11, 0x11, 0x13, "s_xor_b64"), 3417ec681f3Smrg (0x14, 0x14, 0x12, 0x12, 0x14, "s_andn2_b32"), 3427ec681f3Smrg (0x15, 0x15, 0x13, 0x13, 0x15, "s_andn2_b64"), 3437ec681f3Smrg (0x16, 0x16, 0x14, 0x14, 0x16, "s_orn2_b32"), 3447ec681f3Smrg (0x17, 0x17, 0x15, 0x15, 0x17, "s_orn2_b64"), 3457ec681f3Smrg (0x18, 0x18, 0x16, 0x16, 0x18, "s_nand_b32"), 3467ec681f3Smrg (0x19, 0x19, 0x17, 0x17, 0x19, "s_nand_b64"), 3477ec681f3Smrg (0x1a, 0x1a, 0x18, 0x18, 0x1a, "s_nor_b32"), 3487ec681f3Smrg (0x1b, 0x1b, 0x19, 0x19, 0x1b, "s_nor_b64"), 3497ec681f3Smrg (0x1c, 0x1c, 0x1a, 0x1a, 0x1c, "s_xnor_b32"), 3507ec681f3Smrg (0x1d, 0x1d, 0x1b, 0x1b, 0x1d, "s_xnor_b64"), 3517ec681f3Smrg (0x1e, 0x1e, 0x1c, 0x1c, 0x1e, "s_lshl_b32"), 3527ec681f3Smrg (0x1f, 0x1f, 0x1d, 0x1d, 0x1f, "s_lshl_b64"), 3537ec681f3Smrg (0x20, 0x20, 0x1e, 0x1e, 0x20, "s_lshr_b32"), 3547ec681f3Smrg (0x21, 0x21, 0x1f, 0x1f, 0x21, "s_lshr_b64"), 3557ec681f3Smrg (0x22, 0x22, 0x20, 0x20, 0x22, "s_ashr_i32"), 3567ec681f3Smrg (0x23, 0x23, 0x21, 0x21, 0x23, "s_ashr_i64"), 3577ec681f3Smrg (0x24, 0x24, 0x22, 0x22, 0x24, "s_bfm_b32"), 3587ec681f3Smrg (0x25, 0x25, 0x23, 0x23, 0x25, "s_bfm_b64"), 3597ec681f3Smrg (0x26, 0x26, 0x24, 0x24, 0x26, "s_mul_i32"), 3607ec681f3Smrg (0x27, 0x27, 0x25, 0x25, 0x27, "s_bfe_u32"), 3617ec681f3Smrg (0x28, 0x28, 0x26, 0x26, 0x28, "s_bfe_i32"), 3627ec681f3Smrg (0x29, 0x29, 0x27, 0x27, 0x29, "s_bfe_u64"), 3637ec681f3Smrg (0x2a, 0x2a, 0x28, 0x28, 0x2a, "s_bfe_i64"), 3647ec681f3Smrg (0x2b, 0x2b, 0x29, 0x29, -1, "s_cbranch_g_fork", InstrClass.Branch), 3657ec681f3Smrg (0x2c, 0x2c, 0x2a, 0x2a, 0x2c, "s_absdiff_i32"), 3667ec681f3Smrg ( -1, -1, 0x2b, 0x2b, -1, "s_rfe_restore_b64", InstrClass.Branch), 3677ec681f3Smrg ( -1, -1, -1, 0x2e, 0x2e, "s_lshl1_add_u32"), 3687ec681f3Smrg ( -1, -1, -1, 0x2f, 0x2f, "s_lshl2_add_u32"), 3697ec681f3Smrg ( -1, -1, -1, 0x30, 0x30, "s_lshl3_add_u32"), 3707ec681f3Smrg ( -1, -1, -1, 0x31, 0x31, "s_lshl4_add_u32"), 3717ec681f3Smrg ( -1, -1, -1, 0x32, 0x32, "s_pack_ll_b32_b16"), 3727ec681f3Smrg ( -1, -1, -1, 0x33, 0x33, "s_pack_lh_b32_b16"), 3737ec681f3Smrg ( -1, -1, -1, 0x34, 0x34, "s_pack_hh_b32_b16"), 3747ec681f3Smrg ( -1, -1, -1, 0x2c, 0x35, "s_mul_hi_u32"), 3757ec681f3Smrg ( -1, -1, -1, 0x2d, 0x36, "s_mul_hi_i32"), 3767ec681f3Smrg # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2. 3777ec681f3Smrg ( -1, -1, -1, -1, -1, "p_constaddr_addlo"), 3787ec681f3Smrg} 3797ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP2, InstrClass.Salu): 3807ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.SOP2, cls) 3817ec681f3Smrg 3827ec681f3Smrg 3837ec681f3Smrg# SOPK instructions: 0 input (+ imm), 1 output + optional scc 3847ec681f3SmrgSOPK = { 3857ec681f3Smrg # GFX6, GFX7, GFX8, GFX9, GFX10, name 3867ec681f3Smrg (0x00, 0x00, 0x00, 0x00, 0x00, "s_movk_i32"), 3877ec681f3Smrg ( -1, -1, -1, -1, 0x01, "s_version"), # GFX10+ 3887ec681f3Smrg (0x02, 0x02, 0x01, 0x01, 0x02, "s_cmovk_i32"), # GFX8_GFX9 3897ec681f3Smrg (0x03, 0x03, 0x02, 0x02, 0x03, "s_cmpk_eq_i32"), 3907ec681f3Smrg (0x04, 0x04, 0x03, 0x03, 0x04, "s_cmpk_lg_i32"), 3917ec681f3Smrg (0x05, 0x05, 0x04, 0x04, 0x05, "s_cmpk_gt_i32"), 3927ec681f3Smrg (0x06, 0x06, 0x05, 0x05, 0x06, "s_cmpk_ge_i32"), 3937ec681f3Smrg (0x07, 0x07, 0x06, 0x06, 0x07, "s_cmpk_lt_i32"), 3947ec681f3Smrg (0x08, 0x08, 0x07, 0x07, 0x08, "s_cmpk_le_i32"), 3957ec681f3Smrg (0x09, 0x09, 0x08, 0x08, 0x09, "s_cmpk_eq_u32"), 3967ec681f3Smrg (0x0a, 0x0a, 0x09, 0x09, 0x0a, "s_cmpk_lg_u32"), 3977ec681f3Smrg (0x0b, 0x0b, 0x0a, 0x0a, 0x0b, "s_cmpk_gt_u32"), 3987ec681f3Smrg (0x0c, 0x0c, 0x0b, 0x0b, 0x0c, "s_cmpk_ge_u32"), 3997ec681f3Smrg (0x0d, 0x0d, 0x0c, 0x0c, 0x0d, "s_cmpk_lt_u32"), 4007ec681f3Smrg (0x0e, 0x0e, 0x0d, 0x0d, 0x0e, "s_cmpk_le_u32"), 4017ec681f3Smrg (0x0f, 0x0f, 0x0e, 0x0e, 0x0f, "s_addk_i32"), 4027ec681f3Smrg (0x10, 0x10, 0x0f, 0x0f, 0x10, "s_mulk_i32"), 4037ec681f3Smrg (0x11, 0x11, 0x10, 0x10, -1, "s_cbranch_i_fork", InstrClass.Branch), 4047ec681f3Smrg (0x12, 0x12, 0x11, 0x11, 0x12, "s_getreg_b32"), 4057ec681f3Smrg (0x13, 0x13, 0x12, 0x12, 0x13, "s_setreg_b32"), 4067ec681f3Smrg (0x15, 0x15, 0x14, 0x14, 0x15, "s_setreg_imm32_b32"), # requires 32bit literal 4077ec681f3Smrg ( -1, -1, 0x15, 0x15, 0x16, "s_call_b64", InstrClass.Branch), 4087ec681f3Smrg ( -1, -1, -1, -1, 0x17, "s_waitcnt_vscnt", InstrClass.Waitcnt), 4097ec681f3Smrg ( -1, -1, -1, -1, 0x18, "s_waitcnt_vmcnt", InstrClass.Waitcnt), 4107ec681f3Smrg ( -1, -1, -1, -1, 0x19, "s_waitcnt_expcnt", InstrClass.Waitcnt), 4117ec681f3Smrg ( -1, -1, -1, -1, 0x1a, "s_waitcnt_lgkmcnt", InstrClass.Waitcnt), 4127ec681f3Smrg ( -1, -1, -1, -1, 0x1b, "s_subvector_loop_begin", InstrClass.Branch), 4137ec681f3Smrg ( -1, -1, -1, -1, 0x1c, "s_subvector_loop_end", InstrClass.Branch), 4147ec681f3Smrg} 4157ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPK, InstrClass.Salu): 4167ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.SOPK, cls) 4177ec681f3Smrg 4187ec681f3Smrg 4197ec681f3Smrg# SOP1 instructions: 1 input, 1 output (+optional SCC) 4207ec681f3SmrgSOP1 = { 4217ec681f3Smrg # GFX6, GFX7, GFX8, GFX9, GFX10, name 4227ec681f3Smrg (0x03, 0x03, 0x00, 0x00, 0x03, "s_mov_b32"), 4237ec681f3Smrg (0x04, 0x04, 0x01, 0x01, 0x04, "s_mov_b64"), 4247ec681f3Smrg (0x05, 0x05, 0x02, 0x02, 0x05, "s_cmov_b32"), 4257ec681f3Smrg (0x06, 0x06, 0x03, 0x03, 0x06, "s_cmov_b64"), 4267ec681f3Smrg (0x07, 0x07, 0x04, 0x04, 0x07, "s_not_b32"), 4277ec681f3Smrg (0x08, 0x08, 0x05, 0x05, 0x08, "s_not_b64"), 4287ec681f3Smrg (0x09, 0x09, 0x06, 0x06, 0x09, "s_wqm_b32"), 4297ec681f3Smrg (0x0a, 0x0a, 0x07, 0x07, 0x0a, "s_wqm_b64"), 4307ec681f3Smrg (0x0b, 0x0b, 0x08, 0x08, 0x0b, "s_brev_b32"), 4317ec681f3Smrg (0x0c, 0x0c, 0x09, 0x09, 0x0c, "s_brev_b64"), 4327ec681f3Smrg (0x0d, 0x0d, 0x0a, 0x0a, 0x0d, "s_bcnt0_i32_b32"), 4337ec681f3Smrg (0x0e, 0x0e, 0x0b, 0x0b, 0x0e, "s_bcnt0_i32_b64"), 4347ec681f3Smrg (0x0f, 0x0f, 0x0c, 0x0c, 0x0f, "s_bcnt1_i32_b32"), 4357ec681f3Smrg (0x10, 0x10, 0x0d, 0x0d, 0x10, "s_bcnt1_i32_b64"), 4367ec681f3Smrg (0x11, 0x11, 0x0e, 0x0e, 0x11, "s_ff0_i32_b32"), 4377ec681f3Smrg (0x12, 0x12, 0x0f, 0x0f, 0x12, "s_ff0_i32_b64"), 4387ec681f3Smrg (0x13, 0x13, 0x10, 0x10, 0x13, "s_ff1_i32_b32"), 4397ec681f3Smrg (0x14, 0x14, 0x11, 0x11, 0x14, "s_ff1_i32_b64"), 4407ec681f3Smrg (0x15, 0x15, 0x12, 0x12, 0x15, "s_flbit_i32_b32"), 4417ec681f3Smrg (0x16, 0x16, 0x13, 0x13, 0x16, "s_flbit_i32_b64"), 4427ec681f3Smrg (0x17, 0x17, 0x14, 0x14, 0x17, "s_flbit_i32"), 4437ec681f3Smrg (0x18, 0x18, 0x15, 0x15, 0x18, "s_flbit_i32_i64"), 4447ec681f3Smrg (0x19, 0x19, 0x16, 0x16, 0x19, "s_sext_i32_i8"), 4457ec681f3Smrg (0x1a, 0x1a, 0x17, 0x17, 0x1a, "s_sext_i32_i16"), 4467ec681f3Smrg (0x1b, 0x1b, 0x18, 0x18, 0x1b, "s_bitset0_b32"), 4477ec681f3Smrg (0x1c, 0x1c, 0x19, 0x19, 0x1c, "s_bitset0_b64"), 4487ec681f3Smrg (0x1d, 0x1d, 0x1a, 0x1a, 0x1d, "s_bitset1_b32"), 4497ec681f3Smrg (0x1e, 0x1e, 0x1b, 0x1b, 0x1e, "s_bitset1_b64"), 4507ec681f3Smrg (0x1f, 0x1f, 0x1c, 0x1c, 0x1f, "s_getpc_b64"), 4517ec681f3Smrg (0x20, 0x20, 0x1d, 0x1d, 0x20, "s_setpc_b64", InstrClass.Branch), 4527ec681f3Smrg (0x21, 0x21, 0x1e, 0x1e, 0x21, "s_swappc_b64", InstrClass.Branch), 4537ec681f3Smrg (0x22, 0x22, 0x1f, 0x1f, 0x22, "s_rfe_b64", InstrClass.Branch), 4547ec681f3Smrg (0x24, 0x24, 0x20, 0x20, 0x24, "s_and_saveexec_b64"), 4557ec681f3Smrg (0x25, 0x25, 0x21, 0x21, 0x25, "s_or_saveexec_b64"), 4567ec681f3Smrg (0x26, 0x26, 0x22, 0x22, 0x26, "s_xor_saveexec_b64"), 4577ec681f3Smrg (0x27, 0x27, 0x23, 0x23, 0x27, "s_andn2_saveexec_b64"), 4587ec681f3Smrg (0x28, 0x28, 0x24, 0x24, 0x28, "s_orn2_saveexec_b64"), 4597ec681f3Smrg (0x29, 0x29, 0x25, 0x25, 0x29, "s_nand_saveexec_b64"), 4607ec681f3Smrg (0x2a, 0x2a, 0x26, 0x26, 0x2a, "s_nor_saveexec_b64"), 4617ec681f3Smrg (0x2b, 0x2b, 0x27, 0x27, 0x2b, "s_xnor_saveexec_b64"), 4627ec681f3Smrg (0x2c, 0x2c, 0x28, 0x28, 0x2c, "s_quadmask_b32"), 4637ec681f3Smrg (0x2d, 0x2d, 0x29, 0x29, 0x2d, "s_quadmask_b64"), 4647ec681f3Smrg (0x2e, 0x2e, 0x2a, 0x2a, 0x2e, "s_movrels_b32"), 4657ec681f3Smrg (0x2f, 0x2f, 0x2b, 0x2b, 0x2f, "s_movrels_b64"), 4667ec681f3Smrg (0x30, 0x30, 0x2c, 0x2c, 0x30, "s_movreld_b32"), 4677ec681f3Smrg (0x31, 0x31, 0x2d, 0x2d, 0x31, "s_movreld_b64"), 4687ec681f3Smrg (0x32, 0x32, 0x2e, 0x2e, -1, "s_cbranch_join", InstrClass.Branch), 4697ec681f3Smrg (0x34, 0x34, 0x30, 0x30, 0x34, "s_abs_i32"), 4707ec681f3Smrg (0x35, 0x35, -1, -1, 0x35, "s_mov_fed_b32"), 4717ec681f3Smrg ( -1, -1, 0x32, 0x32, -1, "s_set_gpr_idx_idx"), 4727ec681f3Smrg ( -1, -1, -1, 0x33, 0x37, "s_andn1_saveexec_b64"), 4737ec681f3Smrg ( -1, -1, -1, 0x34, 0x38, "s_orn1_saveexec_b64"), 4747ec681f3Smrg ( -1, -1, -1, 0x35, 0x39, "s_andn1_wrexec_b64"), 4757ec681f3Smrg ( -1, -1, -1, 0x36, 0x3a, "s_andn2_wrexec_b64"), 4767ec681f3Smrg ( -1, -1, -1, 0x37, 0x3b, "s_bitreplicate_b64_b32"), 4777ec681f3Smrg ( -1, -1, -1, -1, 0x3c, "s_and_saveexec_b32"), 4787ec681f3Smrg ( -1, -1, -1, -1, 0x3d, "s_or_saveexec_b32"), 4797ec681f3Smrg ( -1, -1, -1, -1, 0x3e, "s_xor_saveexec_b32"), 4807ec681f3Smrg ( -1, -1, -1, -1, 0x3f, "s_andn2_saveexec_b32"), 4817ec681f3Smrg ( -1, -1, -1, -1, 0x40, "s_orn2_saveexec_b32"), 4827ec681f3Smrg ( -1, -1, -1, -1, 0x41, "s_nand_saveexec_b32"), 4837ec681f3Smrg ( -1, -1, -1, -1, 0x42, "s_nor_saveexec_b32"), 4847ec681f3Smrg ( -1, -1, -1, -1, 0x43, "s_xnor_saveexec_b32"), 4857ec681f3Smrg ( -1, -1, -1, -1, 0x44, "s_andn1_saveexec_b32"), 4867ec681f3Smrg ( -1, -1, -1, -1, 0x45, "s_orn1_saveexec_b32"), 4877ec681f3Smrg ( -1, -1, -1, -1, 0x46, "s_andn1_wrexec_b32"), 4887ec681f3Smrg ( -1, -1, -1, -1, 0x47, "s_andn2_wrexec_b32"), 4897ec681f3Smrg ( -1, -1, -1, -1, 0x49, "s_movrelsd_2_b32"), 4907ec681f3Smrg # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1. 4917ec681f3Smrg ( -1, -1, -1, -1, -1, "p_constaddr_getpc"), 4927ec681f3Smrg} 4937ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP1, InstrClass.Salu): 4947ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.SOP1, cls) 4957ec681f3Smrg 4967ec681f3Smrg 4977ec681f3Smrg# SOPC instructions: 2 inputs and 0 outputs (+SCC) 4987ec681f3SmrgSOPC = { 4997ec681f3Smrg # GFX6, GFX7, GFX8, GFX9, GFX10, name 5007ec681f3Smrg (0x00, 0x00, 0x00, 0x00, 0x00, "s_cmp_eq_i32"), 5017ec681f3Smrg (0x01, 0x01, 0x01, 0x01, 0x01, "s_cmp_lg_i32"), 5027ec681f3Smrg (0x02, 0x02, 0x02, 0x02, 0x02, "s_cmp_gt_i32"), 5037ec681f3Smrg (0x03, 0x03, 0x03, 0x03, 0x03, "s_cmp_ge_i32"), 5047ec681f3Smrg (0x04, 0x04, 0x04, 0x04, 0x04, "s_cmp_lt_i32"), 5057ec681f3Smrg (0x05, 0x05, 0x05, 0x05, 0x05, "s_cmp_le_i32"), 5067ec681f3Smrg (0x06, 0x06, 0x06, 0x06, 0x06, "s_cmp_eq_u32"), 5077ec681f3Smrg (0x07, 0x07, 0x07, 0x07, 0x07, "s_cmp_lg_u32"), 5087ec681f3Smrg (0x08, 0x08, 0x08, 0x08, 0x08, "s_cmp_gt_u32"), 5097ec681f3Smrg (0x09, 0x09, 0x09, 0x09, 0x09, "s_cmp_ge_u32"), 5107ec681f3Smrg (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cmp_lt_u32"), 5117ec681f3Smrg (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cmp_le_u32"), 5127ec681f3Smrg (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_bitcmp0_b32"), 5137ec681f3Smrg (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_bitcmp1_b32"), 5147ec681f3Smrg (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_bitcmp0_b64"), 5157ec681f3Smrg (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_bitcmp1_b64"), 5167ec681f3Smrg (0x10, 0x10, 0x10, 0x10, -1, "s_setvskip"), 5177ec681f3Smrg ( -1, -1, 0x11, 0x11, -1, "s_set_gpr_idx_on"), 5187ec681f3Smrg ( -1, -1, 0x12, 0x12, 0x12, "s_cmp_eq_u64"), 5197ec681f3Smrg ( -1, -1, 0x13, 0x13, 0x13, "s_cmp_lg_u64"), 5207ec681f3Smrg} 5217ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPC: 5227ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.SOPC, InstrClass.Salu) 5237ec681f3Smrg 5247ec681f3Smrg 5257ec681f3Smrg# SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs 5267ec681f3SmrgSOPP = { 5277ec681f3Smrg # GFX6, GFX7, GFX8, GFX9, GFX10, name 5287ec681f3Smrg (0x00, 0x00, 0x00, 0x00, 0x00, "s_nop"), 5297ec681f3Smrg (0x01, 0x01, 0x01, 0x01, 0x01, "s_endpgm"), 5307ec681f3Smrg (0x02, 0x02, 0x02, 0x02, 0x02, "s_branch", InstrClass.Branch), 5317ec681f3Smrg ( -1, -1, 0x03, 0x03, 0x03, "s_wakeup"), 5327ec681f3Smrg (0x04, 0x04, 0x04, 0x04, 0x04, "s_cbranch_scc0", InstrClass.Branch), 5337ec681f3Smrg (0x05, 0x05, 0x05, 0x05, 0x05, "s_cbranch_scc1", InstrClass.Branch), 5347ec681f3Smrg (0x06, 0x06, 0x06, 0x06, 0x06, "s_cbranch_vccz", InstrClass.Branch), 5357ec681f3Smrg (0x07, 0x07, 0x07, 0x07, 0x07, "s_cbranch_vccnz", InstrClass.Branch), 5367ec681f3Smrg (0x08, 0x08, 0x08, 0x08, 0x08, "s_cbranch_execz", InstrClass.Branch), 5377ec681f3Smrg (0x09, 0x09, 0x09, 0x09, 0x09, "s_cbranch_execnz", InstrClass.Branch), 5387ec681f3Smrg (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_barrier", InstrClass.Barrier), 5397ec681f3Smrg ( -1, 0x0b, 0x0b, 0x0b, 0x0b, "s_setkill"), 5407ec681f3Smrg (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_waitcnt", InstrClass.Waitcnt), 5417ec681f3Smrg (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_sethalt"), 5427ec681f3Smrg (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_sleep"), 5437ec681f3Smrg (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_setprio"), 5447ec681f3Smrg (0x10, 0x10, 0x10, 0x10, 0x10, "s_sendmsg", InstrClass.Sendmsg), 5457ec681f3Smrg (0x11, 0x11, 0x11, 0x11, 0x11, "s_sendmsghalt", InstrClass.Sendmsg), 5467ec681f3Smrg (0x12, 0x12, 0x12, 0x12, 0x12, "s_trap", InstrClass.Branch), 5477ec681f3Smrg (0x13, 0x13, 0x13, 0x13, 0x13, "s_icache_inv"), 5487ec681f3Smrg (0x14, 0x14, 0x14, 0x14, 0x14, "s_incperflevel"), 5497ec681f3Smrg (0x15, 0x15, 0x15, 0x15, 0x15, "s_decperflevel"), 5507ec681f3Smrg (0x16, 0x16, 0x16, 0x16, 0x16, "s_ttracedata"), 5517ec681f3Smrg ( -1, 0x17, 0x17, 0x17, 0x17, "s_cbranch_cdbgsys", InstrClass.Branch), 5527ec681f3Smrg ( -1, 0x18, 0x18, 0x18, 0x18, "s_cbranch_cdbguser", InstrClass.Branch), 5537ec681f3Smrg ( -1, 0x19, 0x19, 0x19, 0x19, "s_cbranch_cdbgsys_or_user", InstrClass.Branch), 5547ec681f3Smrg ( -1, 0x1a, 0x1a, 0x1a, 0x1a, "s_cbranch_cdbgsys_and_user", InstrClass.Branch), 5557ec681f3Smrg ( -1, -1, 0x1b, 0x1b, 0x1b, "s_endpgm_saved"), 5567ec681f3Smrg ( -1, -1, 0x1c, 0x1c, -1, "s_set_gpr_idx_off"), 5577ec681f3Smrg ( -1, -1, 0x1d, 0x1d, -1, "s_set_gpr_idx_mode"), 5587ec681f3Smrg ( -1, -1, -1, 0x1e, 0x1e, "s_endpgm_ordered_ps_done"), 5597ec681f3Smrg ( -1, -1, -1, -1, 0x1f, "s_code_end"), 5607ec681f3Smrg ( -1, -1, -1, -1, 0x20, "s_inst_prefetch"), 5617ec681f3Smrg ( -1, -1, -1, -1, 0x21, "s_clause"), 5627ec681f3Smrg ( -1, -1, -1, -1, 0x22, "s_wait_idle"), 5637ec681f3Smrg ( -1, -1, -1, -1, 0x23, "s_waitcnt_depctr"), 5647ec681f3Smrg ( -1, -1, -1, -1, 0x24, "s_round_mode"), 5657ec681f3Smrg ( -1, -1, -1, -1, 0x25, "s_denorm_mode"), 5667ec681f3Smrg ( -1, -1, -1, -1, 0x26, "s_ttracedata_imm"), 5677ec681f3Smrg} 5687ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPP, InstrClass.Salu): 5697ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.SOPP, cls) 5707ec681f3Smrg 5717ec681f3Smrg 5727ec681f3Smrg# SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output 5737ec681f3Smrg# Unlike GFX10, GFX10.3 does not have SMEM store, atomic or scratch instructions 5747ec681f3SmrgSMEM = { 5757ec681f3Smrg # GFX6, GFX7, GFX8, GFX9, GFX10, name 5767ec681f3Smrg (0x00, 0x00, 0x00, 0x00, 0x00, "s_load_dword"), 5777ec681f3Smrg (0x01, 0x01, 0x01, 0x01, 0x01, "s_load_dwordx2"), 5787ec681f3Smrg (0x02, 0x02, 0x02, 0x02, 0x02, "s_load_dwordx4"), 5797ec681f3Smrg (0x03, 0x03, 0x03, 0x03, 0x03, "s_load_dwordx8"), 5807ec681f3Smrg (0x04, 0x04, 0x04, 0x04, 0x04, "s_load_dwordx16"), 5817ec681f3Smrg ( -1, -1, -1, 0x05, 0x05, "s_scratch_load_dword"), 5827ec681f3Smrg ( -1, -1, -1, 0x06, 0x06, "s_scratch_load_dwordx2"), 5837ec681f3Smrg ( -1, -1, -1, 0x07, 0x07, "s_scratch_load_dwordx4"), 5847ec681f3Smrg (0x08, 0x08, 0x08, 0x08, 0x08, "s_buffer_load_dword"), 5857ec681f3Smrg (0x09, 0x09, 0x09, 0x09, 0x09, "s_buffer_load_dwordx2"), 5867ec681f3Smrg (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_buffer_load_dwordx4"), 5877ec681f3Smrg (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_buffer_load_dwordx8"), 5887ec681f3Smrg (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_buffer_load_dwordx16"), 5897ec681f3Smrg ( -1, -1, 0x10, 0x10, 0x10, "s_store_dword"), 5907ec681f3Smrg ( -1, -1, 0x11, 0x11, 0x11, "s_store_dwordx2"), 5917ec681f3Smrg ( -1, -1, 0x12, 0x12, 0x12, "s_store_dwordx4"), 5927ec681f3Smrg ( -1, -1, -1, 0x15, 0x15, "s_scratch_store_dword"), 5937ec681f3Smrg ( -1, -1, -1, 0x16, 0x16, "s_scratch_store_dwordx2"), 5947ec681f3Smrg ( -1, -1, -1, 0x17, 0x17, "s_scratch_store_dwordx4"), 5957ec681f3Smrg ( -1, -1, 0x18, 0x18, 0x18, "s_buffer_store_dword"), 5967ec681f3Smrg ( -1, -1, 0x19, 0x19, 0x19, "s_buffer_store_dwordx2"), 5977ec681f3Smrg ( -1, -1, 0x1a, 0x1a, 0x1a, "s_buffer_store_dwordx4"), 5987ec681f3Smrg ( -1, -1, 0x1f, 0x1f, 0x1f, "s_gl1_inv"), 5997ec681f3Smrg (0x1f, 0x1f, 0x20, 0x20, 0x20, "s_dcache_inv"), 6007ec681f3Smrg ( -1, -1, 0x21, 0x21, 0x21, "s_dcache_wb"), 6017ec681f3Smrg ( -1, 0x1d, 0x22, 0x22, -1, "s_dcache_inv_vol"), 6027ec681f3Smrg ( -1, -1, 0x23, 0x23, -1, "s_dcache_wb_vol"), 6037ec681f3Smrg (0x1e, 0x1e, 0x24, 0x24, 0x24, "s_memtime"), #GFX6-GFX10 6047ec681f3Smrg ( -1, -1, 0x25, 0x25, 0x25, "s_memrealtime"), 6057ec681f3Smrg ( -1, -1, 0x26, 0x26, 0x26, "s_atc_probe"), 6067ec681f3Smrg ( -1, -1, 0x27, 0x27, 0x27, "s_atc_probe_buffer"), 6077ec681f3Smrg ( -1, -1, -1, 0x28, 0x28, "s_dcache_discard"), 6087ec681f3Smrg ( -1, -1, -1, 0x29, 0x29, "s_dcache_discard_x2"), 6097ec681f3Smrg ( -1, -1, -1, -1, 0x2a, "s_get_waveid_in_workgroup"), 6107ec681f3Smrg ( -1, -1, -1, 0x40, 0x40, "s_buffer_atomic_swap"), 6117ec681f3Smrg ( -1, -1, -1, 0x41, 0x41, "s_buffer_atomic_cmpswap"), 6127ec681f3Smrg ( -1, -1, -1, 0x42, 0x42, "s_buffer_atomic_add"), 6137ec681f3Smrg ( -1, -1, -1, 0x43, 0x43, "s_buffer_atomic_sub"), 6147ec681f3Smrg ( -1, -1, -1, 0x44, 0x44, "s_buffer_atomic_smin"), 6157ec681f3Smrg ( -1, -1, -1, 0x45, 0x45, "s_buffer_atomic_umin"), 6167ec681f3Smrg ( -1, -1, -1, 0x46, 0x46, "s_buffer_atomic_smax"), 6177ec681f3Smrg ( -1, -1, -1, 0x47, 0x47, "s_buffer_atomic_umax"), 6187ec681f3Smrg ( -1, -1, -1, 0x48, 0x48, "s_buffer_atomic_and"), 6197ec681f3Smrg ( -1, -1, -1, 0x49, 0x49, "s_buffer_atomic_or"), 6207ec681f3Smrg ( -1, -1, -1, 0x4a, 0x4a, "s_buffer_atomic_xor"), 6217ec681f3Smrg ( -1, -1, -1, 0x4b, 0x4b, "s_buffer_atomic_inc"), 6227ec681f3Smrg ( -1, -1, -1, 0x4c, 0x4c, "s_buffer_atomic_dec"), 6237ec681f3Smrg ( -1, -1, -1, 0x60, 0x60, "s_buffer_atomic_swap_x2"), 6247ec681f3Smrg ( -1, -1, -1, 0x61, 0x61, "s_buffer_atomic_cmpswap_x2"), 6257ec681f3Smrg ( -1, -1, -1, 0x62, 0x62, "s_buffer_atomic_add_x2"), 6267ec681f3Smrg ( -1, -1, -1, 0x63, 0x63, "s_buffer_atomic_sub_x2"), 6277ec681f3Smrg ( -1, -1, -1, 0x64, 0x64, "s_buffer_atomic_smin_x2"), 6287ec681f3Smrg ( -1, -1, -1, 0x65, 0x65, "s_buffer_atomic_umin_x2"), 6297ec681f3Smrg ( -1, -1, -1, 0x66, 0x66, "s_buffer_atomic_smax_x2"), 6307ec681f3Smrg ( -1, -1, -1, 0x67, 0x67, "s_buffer_atomic_umax_x2"), 6317ec681f3Smrg ( -1, -1, -1, 0x68, 0x68, "s_buffer_atomic_and_x2"), 6327ec681f3Smrg ( -1, -1, -1, 0x69, 0x69, "s_buffer_atomic_or_x2"), 6337ec681f3Smrg ( -1, -1, -1, 0x6a, 0x6a, "s_buffer_atomic_xor_x2"), 6347ec681f3Smrg ( -1, -1, -1, 0x6b, 0x6b, "s_buffer_atomic_inc_x2"), 6357ec681f3Smrg ( -1, -1, -1, 0x6c, 0x6c, "s_buffer_atomic_dec_x2"), 6367ec681f3Smrg ( -1, -1, -1, 0x80, 0x80, "s_atomic_swap"), 6377ec681f3Smrg ( -1, -1, -1, 0x81, 0x81, "s_atomic_cmpswap"), 6387ec681f3Smrg ( -1, -1, -1, 0x82, 0x82, "s_atomic_add"), 6397ec681f3Smrg ( -1, -1, -1, 0x83, 0x83, "s_atomic_sub"), 6407ec681f3Smrg ( -1, -1, -1, 0x84, 0x84, "s_atomic_smin"), 6417ec681f3Smrg ( -1, -1, -1, 0x85, 0x85, "s_atomic_umin"), 6427ec681f3Smrg ( -1, -1, -1, 0x86, 0x86, "s_atomic_smax"), 6437ec681f3Smrg ( -1, -1, -1, 0x87, 0x87, "s_atomic_umax"), 6447ec681f3Smrg ( -1, -1, -1, 0x88, 0x88, "s_atomic_and"), 6457ec681f3Smrg ( -1, -1, -1, 0x89, 0x89, "s_atomic_or"), 6467ec681f3Smrg ( -1, -1, -1, 0x8a, 0x8a, "s_atomic_xor"), 6477ec681f3Smrg ( -1, -1, -1, 0x8b, 0x8b, "s_atomic_inc"), 6487ec681f3Smrg ( -1, -1, -1, 0x8c, 0x8c, "s_atomic_dec"), 6497ec681f3Smrg ( -1, -1, -1, 0xa0, 0xa0, "s_atomic_swap_x2"), 6507ec681f3Smrg ( -1, -1, -1, 0xa1, 0xa1, "s_atomic_cmpswap_x2"), 6517ec681f3Smrg ( -1, -1, -1, 0xa2, 0xa2, "s_atomic_add_x2"), 6527ec681f3Smrg ( -1, -1, -1, 0xa3, 0xa3, "s_atomic_sub_x2"), 6537ec681f3Smrg ( -1, -1, -1, 0xa4, 0xa4, "s_atomic_smin_x2"), 6547ec681f3Smrg ( -1, -1, -1, 0xa5, 0xa5, "s_atomic_umin_x2"), 6557ec681f3Smrg ( -1, -1, -1, 0xa6, 0xa6, "s_atomic_smax_x2"), 6567ec681f3Smrg ( -1, -1, -1, 0xa7, 0xa7, "s_atomic_umax_x2"), 6577ec681f3Smrg ( -1, -1, -1, 0xa8, 0xa8, "s_atomic_and_x2"), 6587ec681f3Smrg ( -1, -1, -1, 0xa9, 0xa9, "s_atomic_or_x2"), 6597ec681f3Smrg ( -1, -1, -1, 0xaa, 0xaa, "s_atomic_xor_x2"), 6607ec681f3Smrg ( -1, -1, -1, 0xab, 0xab, "s_atomic_inc_x2"), 6617ec681f3Smrg ( -1, -1, -1, 0xac, 0xac, "s_atomic_dec_x2"), 6627ec681f3Smrg} 6637ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM: 6647ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.SMEM, InstrClass.SMem, is_atomic = "atomic" in name) 6657ec681f3Smrg 6667ec681f3Smrg 6677ec681f3Smrg# VOP2 instructions: 2 inputs, 1 output (+ optional vcc) 6687ec681f3Smrg# TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8 6697ec681f3SmrgVOP2 = { 6707ec681f3Smrg # GFX6, GFX7, GFX8, GFX9, GFX10, name, input/output modifiers 6717ec681f3Smrg (0x01, 0x01, -1, -1, -1, "v_readlane_b32", False), 6727ec681f3Smrg (0x02, 0x02, -1, -1, -1, "v_writelane_b32", False), 6737ec681f3Smrg (0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True), 6747ec681f3Smrg (0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True), 6757ec681f3Smrg (0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True), 6767ec681f3Smrg (0x06, 0x06, -1, -1, 0x06, "v_mac_legacy_f32", True), 6777ec681f3Smrg (0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True), 6787ec681f3Smrg (0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True), 6797ec681f3Smrg (0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False), 6807ec681f3Smrg (0x0a, 0x0a, 0x07, 0x07, 0x0a, "v_mul_hi_i32_i24", False), 6817ec681f3Smrg (0x0b, 0x0b, 0x08, 0x08, 0x0b, "v_mul_u32_u24", False), 6827ec681f3Smrg (0x0c, 0x0c, 0x09, 0x09, 0x0c, "v_mul_hi_u32_u24", False), 6837ec681f3Smrg ( -1, -1, -1, 0x39, 0x0d, "v_dot4c_i32_i8", False), 6847ec681f3Smrg (0x0d, 0x0d, -1, -1, -1, "v_min_legacy_f32", True), 6857ec681f3Smrg (0x0e, 0x0e, -1, -1, -1, "v_max_legacy_f32", True), 6867ec681f3Smrg (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, "v_min_f32", True), 6877ec681f3Smrg (0x10, 0x10, 0x0b, 0x0b, 0x10, "v_max_f32", True), 6887ec681f3Smrg (0x11, 0x11, 0x0c, 0x0c, 0x11, "v_min_i32", False), 6897ec681f3Smrg (0x12, 0x12, 0x0d, 0x0d, 0x12, "v_max_i32", False), 6907ec681f3Smrg (0x13, 0x13, 0x0e, 0x0e, 0x13, "v_min_u32", False), 6917ec681f3Smrg (0x14, 0x14, 0x0f, 0x0f, 0x14, "v_max_u32", False), 6927ec681f3Smrg (0x15, 0x15, -1, -1, -1, "v_lshr_b32", False), 6937ec681f3Smrg (0x16, 0x16, 0x10, 0x10, 0x16, "v_lshrrev_b32", False), 6947ec681f3Smrg (0x17, 0x17, -1, -1, -1, "v_ashr_i32", False), 6957ec681f3Smrg (0x18, 0x18, 0x11, 0x11, 0x18, "v_ashrrev_i32", False), 6967ec681f3Smrg (0x19, 0x19, -1, -1, -1, "v_lshl_b32", False), 6977ec681f3Smrg (0x1a, 0x1a, 0x12, 0x12, 0x1a, "v_lshlrev_b32", False), 6987ec681f3Smrg (0x1b, 0x1b, 0x13, 0x13, 0x1b, "v_and_b32", False), 6997ec681f3Smrg (0x1c, 0x1c, 0x14, 0x14, 0x1c, "v_or_b32", False), 7007ec681f3Smrg (0x1d, 0x1d, 0x15, 0x15, 0x1d, "v_xor_b32", False), 7017ec681f3Smrg ( -1, -1, -1, -1, 0x1e, "v_xnor_b32", False), 7027ec681f3Smrg (0x1f, 0x1f, 0x16, 0x16, 0x1f, "v_mac_f32", True), 7037ec681f3Smrg (0x20, 0x20, 0x17, 0x17, 0x20, "v_madmk_f32", False), 7047ec681f3Smrg (0x21, 0x21, 0x18, 0x18, 0x21, "v_madak_f32", False), 7057ec681f3Smrg (0x24, 0x24, -1, -1, -1, "v_mbcnt_hi_u32_b32", False), 7067ec681f3Smrg (0x25, 0x25, 0x19, 0x19, -1, "v_add_co_u32", False), # VOP3B only in RDNA 7077ec681f3Smrg (0x26, 0x26, 0x1a, 0x1a, -1, "v_sub_co_u32", False), # VOP3B only in RDNA 7087ec681f3Smrg (0x27, 0x27, 0x1b, 0x1b, -1, "v_subrev_co_u32", False), # VOP3B only in RDNA 7097ec681f3Smrg (0x28, 0x28, 0x1c, 0x1c, 0x28, "v_addc_co_u32", False), # v_add_co_ci_u32 in RDNA 7107ec681f3Smrg (0x29, 0x29, 0x1d, 0x1d, 0x29, "v_subb_co_u32", False), # v_sub_co_ci_u32 in RDNA 7117ec681f3Smrg (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, "v_subbrev_co_u32", False), # v_subrev_co_ci_u32 in RDNA 7127ec681f3Smrg ( -1, -1, -1, -1, 0x2b, "v_fmac_f32", True), 7137ec681f3Smrg ( -1, -1, -1, -1, 0x2c, "v_fmamk_f32", True), 7147ec681f3Smrg ( -1, -1, -1, -1, 0x2d, "v_fmaak_f32", True), 7157ec681f3Smrg (0x2f, 0x2f, -1, -1, 0x2f, "v_cvt_pkrtz_f16_f32", True), 7167ec681f3Smrg ( -1, -1, 0x1f, 0x1f, 0x32, "v_add_f16", True), 7177ec681f3Smrg ( -1, -1, 0x20, 0x20, 0x33, "v_sub_f16", True), 7187ec681f3Smrg ( -1, -1, 0x21, 0x21, 0x34, "v_subrev_f16", True), 7197ec681f3Smrg ( -1, -1, 0x22, 0x22, 0x35, "v_mul_f16", True), 7207ec681f3Smrg ( -1, -1, 0x23, 0x23, -1, "v_mac_f16", True), 7217ec681f3Smrg ( -1, -1, 0x24, 0x24, -1, "v_madmk_f16", False), 7227ec681f3Smrg ( -1, -1, 0x25, 0x25, -1, "v_madak_f16", False), 7237ec681f3Smrg ( -1, -1, 0x26, 0x26, -1, "v_add_u16", False), 7247ec681f3Smrg ( -1, -1, 0x27, 0x27, -1, "v_sub_u16", False), 7257ec681f3Smrg ( -1, -1, 0x28, 0x28, -1, "v_subrev_u16", False), 7267ec681f3Smrg ( -1, -1, 0x29, 0x29, -1, "v_mul_lo_u16", False), 7277ec681f3Smrg ( -1, -1, 0x2a, 0x2a, -1, "v_lshlrev_b16", False), 7287ec681f3Smrg ( -1, -1, 0x2b, 0x2b, -1, "v_lshrrev_b16", False), 7297ec681f3Smrg ( -1, -1, 0x2c, 0x2c, -1, "v_ashrrev_i16", False), 7307ec681f3Smrg ( -1, -1, 0x2d, 0x2d, 0x39, "v_max_f16", True), 7317ec681f3Smrg ( -1, -1, 0x2e, 0x2e, 0x3a, "v_min_f16", True), 7327ec681f3Smrg ( -1, -1, 0x2f, 0x2f, -1, "v_max_u16", False), 7337ec681f3Smrg ( -1, -1, 0x30, 0x30, -1, "v_max_i16", False), 7347ec681f3Smrg ( -1, -1, 0x31, 0x31, -1, "v_min_u16", False), 7357ec681f3Smrg ( -1, -1, 0x32, 0x32, -1, "v_min_i16", False), 7367ec681f3Smrg ( -1, -1, 0x33, 0x33, 0x3b, "v_ldexp_f16", False), 7377ec681f3Smrg ( -1, -1, -1, 0x34, 0x25, "v_add_u32", False), # use v_add_co_u32 on GFX8, called v_add_nc_u32 in RDNA 7387ec681f3Smrg ( -1, -1, -1, 0x35, 0x26, "v_sub_u32", False), # use v_sub_co_u32 on GFX8, called v_sub_nc_u32 in RDNA 7397ec681f3Smrg ( -1, -1, -1, 0x36, 0x27, "v_subrev_u32", False), # use v_subrev_co_u32 on GFX8, called v_subrev_nc_u32 in RDNA 7407ec681f3Smrg ( -1, -1, -1, -1, 0x36, "v_fmac_f16", False), 7417ec681f3Smrg ( -1, -1, -1, -1, 0x37, "v_fmamk_f16", False), 7427ec681f3Smrg ( -1, -1, -1, -1, 0x38, "v_fmaak_f16", False), 7437ec681f3Smrg ( -1, -1, -1, -1, 0x3c, "v_pk_fmac_f16", False), 7447ec681f3Smrg} 7457ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, modifiers) in VOP2: 7467ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, modifiers, modifiers) 7477ec681f3Smrg 7487ec681f3Smrgif True: 7497ec681f3Smrg # v_cndmask_b32 can use input modifiers but not output modifiers 7507ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00, 0x00, 0x00, 0x00, 0x01, "v_cndmask_b32") 7517ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, True, False) 7527ec681f3Smrg 7537ec681f3Smrg 7547ec681f3Smrg# VOP1 instructions: instructions with 1 input and 1 output 7557ec681f3SmrgVOP1 = { 7567ec681f3Smrg # GFX6, GFX7, GFX8, GFX9, GFX10, name, input_modifiers, output_modifiers 7577ec681f3Smrg (0x00, 0x00, 0x00, 0x00, 0x00, "v_nop", False, False), 7587ec681f3Smrg (0x01, 0x01, 0x01, 0x01, 0x01, "v_mov_b32", False, False), 7597ec681f3Smrg (0x02, 0x02, 0x02, 0x02, 0x02, "v_readfirstlane_b32", False, False), 7607ec681f3Smrg (0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False, InstrClass.ValuDoubleConvert), 7617ec681f3Smrg (0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True, InstrClass.ValuDoubleConvert), 7627ec681f3Smrg (0x05, 0x05, 0x05, 0x05, 0x05, "v_cvt_f32_i32", False, True), 7637ec681f3Smrg (0x06, 0x06, 0x06, 0x06, 0x06, "v_cvt_f32_u32", False, True), 7647ec681f3Smrg (0x07, 0x07, 0x07, 0x07, 0x07, "v_cvt_u32_f32", True, False), 7657ec681f3Smrg (0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False), 7667ec681f3Smrg (0x09, 0x09, -1, -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9 7677ec681f3Smrg (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True), 7687ec681f3Smrg ( -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True), 7697ec681f3Smrg (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True), 7707ec681f3Smrg (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False), 7717ec681f3Smrg (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False), 7727ec681f3Smrg (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "v_cvt_off_f32_i4", False, True), 7737ec681f3Smrg (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True, InstrClass.ValuDoubleConvert), 7747ec681f3Smrg (0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True, InstrClass.ValuDoubleConvert), 7757ec681f3Smrg (0x11, 0x11, 0x11, 0x11, 0x11, "v_cvt_f32_ubyte0", False, True), 7767ec681f3Smrg (0x12, 0x12, 0x12, 0x12, 0x12, "v_cvt_f32_ubyte1", False, True), 7777ec681f3Smrg (0x13, 0x13, 0x13, 0x13, 0x13, "v_cvt_f32_ubyte2", False, True), 7787ec681f3Smrg (0x14, 0x14, 0x14, 0x14, 0x14, "v_cvt_f32_ubyte3", False, True), 7797ec681f3Smrg (0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False, InstrClass.ValuDoubleConvert), 7807ec681f3Smrg (0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True, InstrClass.ValuDoubleConvert), 7817ec681f3Smrg ( -1, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True, InstrClass.ValuDouble), 7827ec681f3Smrg ( -1, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True, InstrClass.ValuDouble), 7837ec681f3Smrg ( -1, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True, InstrClass.ValuDouble), 7847ec681f3Smrg ( -1, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True, InstrClass.ValuDouble), 7857ec681f3Smrg ( -1, -1, -1, -1, 0x1b, "v_pipeflush", False, False), 7867ec681f3Smrg (0x20, 0x20, 0x1b, 0x1b, 0x20, "v_fract_f32", True, True), 7877ec681f3Smrg (0x21, 0x21, 0x1c, 0x1c, 0x21, "v_trunc_f32", True, True), 7887ec681f3Smrg (0x22, 0x22, 0x1d, 0x1d, 0x22, "v_ceil_f32", True, True), 7897ec681f3Smrg (0x23, 0x23, 0x1e, 0x1e, 0x23, "v_rndne_f32", True, True), 7907ec681f3Smrg (0x24, 0x24, 0x1f, 0x1f, 0x24, "v_floor_f32", True, True), 7917ec681f3Smrg (0x25, 0x25, 0x20, 0x20, 0x25, "v_exp_f32", True, True, InstrClass.ValuTranscendental32), 7927ec681f3Smrg (0x26, 0x26, -1, -1, -1, "v_log_clamp_f32", True, True, InstrClass.ValuTranscendental32), 7937ec681f3Smrg (0x27, 0x27, 0x21, 0x21, 0x27, "v_log_f32", True, True, InstrClass.ValuTranscendental32), 7947ec681f3Smrg (0x28, 0x28, -1, -1, -1, "v_rcp_clamp_f32", True, True, InstrClass.ValuTranscendental32), 7957ec681f3Smrg (0x29, 0x29, -1, -1, -1, "v_rcp_legacy_f32", True, True, InstrClass.ValuTranscendental32), 7967ec681f3Smrg (0x2a, 0x2a, 0x22, 0x22, 0x2a, "v_rcp_f32", True, True, InstrClass.ValuTranscendental32), 7977ec681f3Smrg (0x2b, 0x2b, 0x23, 0x23, 0x2b, "v_rcp_iflag_f32", True, True, InstrClass.ValuTranscendental32), 7987ec681f3Smrg (0x2c, 0x2c, -1, -1, -1, "v_rsq_clamp_f32", True, True, InstrClass.ValuTranscendental32), 7997ec681f3Smrg (0x2d, 0x2d, -1, -1, -1, "v_rsq_legacy_f32", True, True, InstrClass.ValuTranscendental32), 8007ec681f3Smrg (0x2e, 0x2e, 0x24, 0x24, 0x2e, "v_rsq_f32", True, True, InstrClass.ValuTranscendental32), 8017ec681f3Smrg (0x2f, 0x2f, 0x25, 0x25, 0x2f, "v_rcp_f64", True, True, InstrClass.ValuDoubleTranscendental), 8027ec681f3Smrg (0x30, 0x30, -1, -1, -1, "v_rcp_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental), 8037ec681f3Smrg (0x31, 0x31, 0x26, 0x26, 0x31, "v_rsq_f64", True, True, InstrClass.ValuDoubleTranscendental), 8047ec681f3Smrg (0x32, 0x32, -1, -1, -1, "v_rsq_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental), 8057ec681f3Smrg (0x33, 0x33, 0x27, 0x27, 0x33, "v_sqrt_f32", True, True, InstrClass.ValuTranscendental32), 8067ec681f3Smrg (0x34, 0x34, 0x28, 0x28, 0x34, "v_sqrt_f64", True, True, InstrClass.ValuDoubleTranscendental), 8077ec681f3Smrg (0x35, 0x35, 0x29, 0x29, 0x35, "v_sin_f32", True, True, InstrClass.ValuTranscendental32), 8087ec681f3Smrg (0x36, 0x36, 0x2a, 0x2a, 0x36, "v_cos_f32", True, True, InstrClass.ValuTranscendental32), 8097ec681f3Smrg (0x37, 0x37, 0x2b, 0x2b, 0x37, "v_not_b32", False, False), 8107ec681f3Smrg (0x38, 0x38, 0x2c, 0x2c, 0x38, "v_bfrev_b32", False, False), 8117ec681f3Smrg (0x39, 0x39, 0x2d, 0x2d, 0x39, "v_ffbh_u32", False, False), 8127ec681f3Smrg (0x3a, 0x3a, 0x2e, 0x2e, 0x3a, "v_ffbl_b32", False, False), 8137ec681f3Smrg (0x3b, 0x3b, 0x2f, 0x2f, 0x3b, "v_ffbh_i32", False, False), 8147ec681f3Smrg (0x3c, 0x3c, 0x30, 0x30, 0x3c, "v_frexp_exp_i32_f64", True, False, InstrClass.ValuDouble), 8157ec681f3Smrg (0x3d, 0x3d, 0x31, 0x31, 0x3d, "v_frexp_mant_f64", True, False, InstrClass.ValuDouble), 8167ec681f3Smrg (0x3e, 0x3e, 0x32, 0x32, 0x3e, "v_fract_f64", True, True, InstrClass.ValuDouble), 8177ec681f3Smrg (0x3f, 0x3f, 0x33, 0x33, 0x3f, "v_frexp_exp_i32_f32", True, False), 8187ec681f3Smrg (0x40, 0x40, 0x34, 0x34, 0x40, "v_frexp_mant_f32", True, False), 8197ec681f3Smrg (0x41, 0x41, 0x35, 0x35, 0x41, "v_clrexcp", False, False), 8207ec681f3Smrg (0x42, 0x42, 0x36, -1, 0x42, "v_movreld_b32", False, False), 8217ec681f3Smrg (0x43, 0x43, 0x37, -1, 0x43, "v_movrels_b32", False, False), 8227ec681f3Smrg (0x44, 0x44, 0x38, -1, 0x44, "v_movrelsd_b32", False, False), 8237ec681f3Smrg ( -1, -1, -1, -1, 0x48, "v_movrelsd_2_b32", False, False), 8247ec681f3Smrg ( -1, -1, -1, 0x37, -1, "v_screen_partition_4se_b32", False, False), 8257ec681f3Smrg ( -1, -1, 0x39, 0x39, 0x50, "v_cvt_f16_u16", False, True), 8267ec681f3Smrg ( -1, -1, 0x3a, 0x3a, 0x51, "v_cvt_f16_i16", False, True), 8277ec681f3Smrg ( -1, -1, 0x3b, 0x3b, 0x52, "v_cvt_u16_f16", True, False), 8287ec681f3Smrg ( -1, -1, 0x3c, 0x3c, 0x53, "v_cvt_i16_f16", True, False), 8297ec681f3Smrg ( -1, -1, 0x3d, 0x3d, 0x54, "v_rcp_f16", True, True, InstrClass.ValuTranscendental32), 8307ec681f3Smrg ( -1, -1, 0x3e, 0x3e, 0x55, "v_sqrt_f16", True, True, InstrClass.ValuTranscendental32), 8317ec681f3Smrg ( -1, -1, 0x3f, 0x3f, 0x56, "v_rsq_f16", True, True, InstrClass.ValuTranscendental32), 8327ec681f3Smrg ( -1, -1, 0x40, 0x40, 0x57, "v_log_f16", True, True, InstrClass.ValuTranscendental32), 8337ec681f3Smrg ( -1, -1, 0x41, 0x41, 0x58, "v_exp_f16", True, True, InstrClass.ValuTranscendental32), 8347ec681f3Smrg ( -1, -1, 0x42, 0x42, 0x59, "v_frexp_mant_f16", True, False), 8357ec681f3Smrg ( -1, -1, 0x43, 0x43, 0x5a, "v_frexp_exp_i16_f16", True, False), 8367ec681f3Smrg ( -1, -1, 0x44, 0x44, 0x5b, "v_floor_f16", True, True), 8377ec681f3Smrg ( -1, -1, 0x45, 0x45, 0x5c, "v_ceil_f16", True, True), 8387ec681f3Smrg ( -1, -1, 0x46, 0x46, 0x5d, "v_trunc_f16", True, True), 8397ec681f3Smrg ( -1, -1, 0x47, 0x47, 0x5e, "v_rndne_f16", True, True), 8407ec681f3Smrg ( -1, -1, 0x48, 0x48, 0x5f, "v_fract_f16", True, True), 8417ec681f3Smrg ( -1, -1, 0x49, 0x49, 0x60, "v_sin_f16", True, True, InstrClass.ValuTranscendental32), 8427ec681f3Smrg ( -1, -1, 0x4a, 0x4a, 0x61, "v_cos_f16", True, True, InstrClass.ValuTranscendental32), 8437ec681f3Smrg ( -1, 0x46, 0x4b, 0x4b, -1, "v_exp_legacy_f32", True, True, InstrClass.ValuTranscendental32), 8447ec681f3Smrg ( -1, 0x45, 0x4c, 0x4c, -1, "v_log_legacy_f32", True, True, InstrClass.ValuTranscendental32), 8457ec681f3Smrg ( -1, -1, -1, 0x4f, 0x62, "v_sat_pk_u8_i16", False, False), 8467ec681f3Smrg ( -1, -1, -1, 0x4d, 0x63, "v_cvt_norm_i16_f16", True, False), 8477ec681f3Smrg ( -1, -1, -1, 0x4e, 0x64, "v_cvt_norm_u16_f16", True, False), 8487ec681f3Smrg ( -1, -1, -1, 0x51, 0x65, "v_swap_b32", False, False), 8497ec681f3Smrg ( -1, -1, -1, -1, 0x68, "v_swaprel_b32", False, False), 8507ec681f3Smrg} 8517ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP1, InstrClass.Valu32): 8527ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOP1, cls, in_mod, out_mod) 8537ec681f3Smrg 8547ec681f3Smrg 8557ec681f3Smrg# VOPC instructions: 8567ec681f3Smrg 8577ec681f3SmrgVOPC_CLASS = { 8587ec681f3Smrg (0x88, 0x88, 0x10, 0x10, 0x88, "v_cmp_class_f32"), 8597ec681f3Smrg ( -1, -1, 0x14, 0x14, 0x8f, "v_cmp_class_f16"), 8607ec681f3Smrg (0x98, 0x98, 0x11, 0x11, 0x98, "v_cmpx_class_f32"), 8617ec681f3Smrg ( -1, -1, 0x15, 0x15, 0x9f, "v_cmpx_class_f16"), 8627ec681f3Smrg (0xa8, 0xa8, 0x12, 0x12, 0xa8, "v_cmp_class_f64", InstrClass.ValuDouble), 8637ec681f3Smrg (0xb8, 0xb8, 0x13, 0x13, 0xb8, "v_cmpx_class_f64", InstrClass.ValuDouble), 8647ec681f3Smrg} 8657ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(VOPC_CLASS, InstrClass.Valu32): 8667ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, cls, True, False) 8677ec681f3Smrg 8687ec681f3SmrgCOMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"] 8697ec681f3Smrg 8707ec681f3Smrgfor i in range(8): 8717ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, "v_cmp_"+COMPF[i]+"_f16") 8727ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 8737ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, "v_cmpx_"+COMPF[i]+"_f16") 8747ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 8757ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, "v_cmp_"+COMPF[i+8]+"_f16") 8767ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 8777ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, "v_cmpx_"+COMPF[i+8]+"_f16") 8787ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 8797ec681f3Smrg 8807ec681f3Smrgfor i in range(16): 8817ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f32") 8827ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 8837ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, "v_cmpx_"+COMPF[i]+"_f32") 8847ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 8857ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64") 8867ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False) 8877ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, "v_cmpx_"+COMPF[i]+"_f64") 8887ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False) 8897ec681f3Smrg # GFX_6_7 8907ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x40+i, 0x40+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32") 8917ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x50+i, 0x50+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32") 8927ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x60+i, 0x60+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f64") 8937ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x70+i, 0x70+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f64") 8947ec681f3Smrg 8957ec681f3SmrgCOMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"] 8967ec681f3Smrg 8977ec681f3Smrg# GFX_8_9 8987ec681f3Smrgfor i in [0,7]: # only 0 and 7 8997ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, "v_cmp_"+COMPI[i]+"_i16") 9007ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9017ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, "v_cmpx_"+COMPI[i]+"_i16") 9027ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9037ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, "v_cmp_"+COMPI[i]+"_u16") 9047ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9057ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, "v_cmpx_"+COMPI[i]+"_u16") 9067ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9077ec681f3Smrg 9087ec681f3Smrgfor i in range(1, 7): # [1..6] 9097ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, "v_cmp_"+COMPI[i]+"_i16") 9107ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9117ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, "v_cmpx_"+COMPI[i]+"_i16") 9127ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9137ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, "v_cmp_"+COMPI[i]+"_u16") 9147ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9157ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16") 9167ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9177ec681f3Smrg 9187ec681f3Smrgfor i in range(8): 9197ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, "v_cmp_"+COMPI[i]+"_i32") 9207ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9217ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, "v_cmpx_"+COMPI[i]+"_i32") 9227ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9237ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, "v_cmp_"+COMPI[i]+"_i64") 9247ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64) 9257ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i64") 9267ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64) 9277ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, "v_cmp_"+COMPI[i]+"_u32") 9287ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9297ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_u32") 9307ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 9317ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, "v_cmp_"+COMPI[i]+"_u64") 9327ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64) 9337ec681f3Smrg (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, "v_cmpx_"+COMPI[i]+"_u64") 9347ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64) 9357ec681f3Smrg 9367ec681f3Smrg 9377ec681f3Smrg# VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output 9387ec681f3SmrgVOPP = { 9397ec681f3Smrg # opcode, name, input/output modifiers 9407ec681f3Smrg (0x00, "v_pk_mad_i16", False), 9417ec681f3Smrg (0x01, "v_pk_mul_lo_u16", False), 9427ec681f3Smrg (0x02, "v_pk_add_i16", False), 9437ec681f3Smrg (0x03, "v_pk_sub_i16", False), 9447ec681f3Smrg (0x04, "v_pk_lshlrev_b16", False), 9457ec681f3Smrg (0x05, "v_pk_lshrrev_b16", False), 9467ec681f3Smrg (0x06, "v_pk_ashrrev_i16", False), 9477ec681f3Smrg (0x07, "v_pk_max_i16", False), 9487ec681f3Smrg (0x08, "v_pk_min_i16", False), 9497ec681f3Smrg (0x09, "v_pk_mad_u16", False), 9507ec681f3Smrg (0x0a, "v_pk_add_u16", False), 9517ec681f3Smrg (0x0b, "v_pk_sub_u16", False), 9527ec681f3Smrg (0x0c, "v_pk_max_u16", False), 9537ec681f3Smrg (0x0d, "v_pk_min_u16", False), 9547ec681f3Smrg (0x0e, "v_pk_fma_f16", True), 9557ec681f3Smrg (0x0f, "v_pk_add_f16", True), 9567ec681f3Smrg (0x10, "v_pk_mul_f16", True), 9577ec681f3Smrg (0x11, "v_pk_min_f16", True), 9587ec681f3Smrg (0x12, "v_pk_max_f16", True), 9597ec681f3Smrg (0x20, "v_fma_mix_f32", True), # v_mad_mix_f32 in VEGA ISA, v_fma_mix_f32 in RDNA ISA 9607ec681f3Smrg (0x21, "v_fma_mixlo_f16", True), # v_mad_mixlo_f16 in VEGA ISA, v_fma_mixlo_f16 in RDNA ISA 9617ec681f3Smrg (0x22, "v_fma_mixhi_f16", True), # v_mad_mixhi_f16 in VEGA ISA, v_fma_mixhi_f16 in RDNA ISA 9627ec681f3Smrg} 9637ec681f3Smrg# note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here 9647ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, -1, code, code, name) 9657ec681f3Smrgfor (code, name, modifiers) in VOPP: 9667ec681f3Smrg opcode(name, -1, code, code, Format.VOP3P, InstrClass.Valu32, modifiers, modifiers) 9677ec681f3Smrgopcode("v_dot2_i32_i16", -1, 0x26, 0x14, Format.VOP3P, InstrClass.Valu32) 9687ec681f3Smrgopcode("v_dot2_u32_u16", -1, 0x27, 0x15, Format.VOP3P, InstrClass.Valu32) 9697ec681f3Smrgopcode("v_dot4_i32_i8", -1, 0x28, 0x16, Format.VOP3P, InstrClass.Valu32) 9707ec681f3Smrgopcode("v_dot4_u32_u8", -1, 0x29, 0x17, Format.VOP3P, InstrClass.Valu32) 9717ec681f3Smrg 9727ec681f3Smrg 9737ec681f3Smrg# VINTERP instructions: 9747ec681f3SmrgVINTRP = { 9757ec681f3Smrg (0x00, "v_interp_p1_f32"), 9767ec681f3Smrg (0x01, "v_interp_p2_f32"), 9777ec681f3Smrg (0x02, "v_interp_mov_f32"), 9787ec681f3Smrg} 9797ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) 9807ec681f3Smrgfor (code, name) in VINTRP: 9817ec681f3Smrg opcode(name, code, code, code, Format.VINTRP, InstrClass.Valu32) 9827ec681f3Smrg 9837ec681f3Smrg# VOP3 instructions: 3 inputs, 1 output 9847ec681f3Smrg# VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out 9857ec681f3SmrgVOP3 = { 9867ec681f3Smrg (0x140, 0x140, 0x1c0, 0x1c0, 0x140, "v_mad_legacy_f32", True, True), # GFX6-GFX10 9877ec681f3Smrg (0x141, 0x141, 0x1c1, 0x1c1, 0x141, "v_mad_f32", True, True), 9887ec681f3Smrg (0x142, 0x142, 0x1c2, 0x1c2, 0x142, "v_mad_i32_i24", False, False), 9897ec681f3Smrg (0x143, 0x143, 0x1c3, 0x1c3, 0x143, "v_mad_u32_u24", False, False), 9907ec681f3Smrg (0x144, 0x144, 0x1c4, 0x1c4, 0x144, "v_cubeid_f32", True, True), 9917ec681f3Smrg (0x145, 0x145, 0x1c5, 0x1c5, 0x145, "v_cubesc_f32", True, True), 9927ec681f3Smrg (0x146, 0x146, 0x1c6, 0x1c6, 0x146, "v_cubetc_f32", True, True), 9937ec681f3Smrg (0x147, 0x147, 0x1c7, 0x1c7, 0x147, "v_cubema_f32", True, True), 9947ec681f3Smrg (0x148, 0x148, 0x1c8, 0x1c8, 0x148, "v_bfe_u32", False, False), 9957ec681f3Smrg (0x149, 0x149, 0x1c9, 0x1c9, 0x149, "v_bfe_i32", False, False), 9967ec681f3Smrg (0x14a, 0x14a, 0x1ca, 0x1ca, 0x14a, "v_bfi_b32", False, False), 9977ec681f3Smrg (0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, "v_fma_f32", True, True, InstrClass.ValuFma), 9987ec681f3Smrg (0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, "v_fma_f64", True, True, InstrClass.ValuDouble), 9997ec681f3Smrg (0x14d, 0x14d, 0x1cd, 0x1cd, 0x14d, "v_lerp_u8", False, False), 10007ec681f3Smrg (0x14e, 0x14e, 0x1ce, 0x1ce, 0x14e, "v_alignbit_b32", False, False), 10017ec681f3Smrg (0x14f, 0x14f, 0x1cf, 0x1cf, 0x14f, "v_alignbyte_b32", False, False), 10027ec681f3Smrg (0x150, 0x150, -1, -1, 0x150, "v_mullit_f32", True, True), 10037ec681f3Smrg (0x151, 0x151, 0x1d0, 0x1d0, 0x151, "v_min3_f32", True, True), 10047ec681f3Smrg (0x152, 0x152, 0x1d1, 0x1d1, 0x152, "v_min3_i32", False, False), 10057ec681f3Smrg (0x153, 0x153, 0x1d2, 0x1d2, 0x153, "v_min3_u32", False, False), 10067ec681f3Smrg (0x154, 0x154, 0x1d3, 0x1d3, 0x154, "v_max3_f32", True, True), 10077ec681f3Smrg (0x155, 0x155, 0x1d4, 0x1d4, 0x155, "v_max3_i32", False, False), 10087ec681f3Smrg (0x156, 0x156, 0x1d5, 0x1d5, 0x156, "v_max3_u32", False, False), 10097ec681f3Smrg (0x157, 0x157, 0x1d6, 0x1d6, 0x157, "v_med3_f32", True, True), 10107ec681f3Smrg (0x158, 0x158, 0x1d7, 0x1d7, 0x158, "v_med3_i32", False, False), 10117ec681f3Smrg (0x159, 0x159, 0x1d8, 0x1d8, 0x159, "v_med3_u32", False, False), 10127ec681f3Smrg (0x15a, 0x15a, 0x1d9, 0x1d9, 0x15a, "v_sad_u8", False, False), 10137ec681f3Smrg (0x15b, 0x15b, 0x1da, 0x1da, 0x15b, "v_sad_hi_u8", False, False), 10147ec681f3Smrg (0x15c, 0x15c, 0x1db, 0x1db, 0x15c, "v_sad_u16", False, False), 10157ec681f3Smrg (0x15d, 0x15d, 0x1dc, 0x1dc, 0x15d, "v_sad_u32", False, False), 10167ec681f3Smrg (0x15e, 0x15e, 0x1dd, 0x1dd, 0x15e, "v_cvt_pk_u8_f32", True, False), 10177ec681f3Smrg (0x15f, 0x15f, 0x1de, 0x1de, 0x15f, "v_div_fixup_f32", True, True), 10187ec681f3Smrg (0x160, 0x160, 0x1df, 0x1df, 0x160, "v_div_fixup_f64", True, True), 10197ec681f3Smrg (0x161, 0x161, -1, -1, -1, "v_lshl_b64", False, False, InstrClass.Valu64), 10207ec681f3Smrg (0x162, 0x162, -1, -1, -1, "v_lshr_b64", False, False, InstrClass.Valu64), 10217ec681f3Smrg (0x163, 0x163, -1, -1, -1, "v_ashr_i64", False, False, InstrClass.Valu64), 10227ec681f3Smrg (0x164, 0x164, 0x280, 0x280, 0x164, "v_add_f64", True, True, InstrClass.ValuDoubleAdd), 10237ec681f3Smrg (0x165, 0x165, 0x281, 0x281, 0x165, "v_mul_f64", True, True, InstrClass.ValuDouble), 10247ec681f3Smrg (0x166, 0x166, 0x282, 0x282, 0x166, "v_min_f64", True, True, InstrClass.ValuDouble), 10257ec681f3Smrg (0x167, 0x167, 0x283, 0x283, 0x167, "v_max_f64", True, True, InstrClass.ValuDouble), 10267ec681f3Smrg (0x168, 0x168, 0x284, 0x284, 0x168, "v_ldexp_f64", False, True, InstrClass.ValuDouble), # src1 can take input modifiers 10277ec681f3Smrg (0x169, 0x169, 0x285, 0x285, 0x169, "v_mul_lo_u32", False, False, InstrClass.ValuQuarterRate32), 10287ec681f3Smrg (0x16a, 0x16a, 0x286, 0x286, 0x16a, "v_mul_hi_u32", False, False, InstrClass.ValuQuarterRate32), 10297ec681f3Smrg (0x16b, 0x16b, 0x285, 0x285, 0x16b, "v_mul_lo_i32", False, False, InstrClass.ValuQuarterRate32), # identical to v_mul_lo_u32 10307ec681f3Smrg (0x16c, 0x16c, 0x287, 0x287, 0x16c, "v_mul_hi_i32", False, False, InstrClass.ValuQuarterRate32), 10317ec681f3Smrg (0x16d, 0x16d, 0x1e0, 0x1e0, 0x16d, "v_div_scale_f32", True, True), # writes to VCC 10327ec681f3Smrg (0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, "v_div_scale_f64", True, True, InstrClass.ValuDouble), # writes to VCC 10337ec681f3Smrg (0x16f, 0x16f, 0x1e2, 0x1e2, 0x16f, "v_div_fmas_f32", True, True), # takes VCC input 10347ec681f3Smrg (0x170, 0x170, 0x1e3, 0x1e3, 0x170, "v_div_fmas_f64", True, True, InstrClass.ValuDouble), # takes VCC input 10357ec681f3Smrg (0x171, 0x171, 0x1e4, 0x1e4, 0x171, "v_msad_u8", False, False), 10367ec681f3Smrg (0x172, 0x172, 0x1e5, 0x1e5, 0x172, "v_qsad_pk_u16_u8", False, False), 10377ec681f3Smrg (0x172, -1, -1, -1, -1, "v_qsad_u8", False, False), # what's the difference? 10387ec681f3Smrg (0x173, 0x173, 0x1e6, 0x1e6, 0x173, "v_mqsad_pk_u16_u8", False, False), 10397ec681f3Smrg (0x173, -1, -1, -1, -1, "v_mqsad_u8", False, False), # what's the difference? 10407ec681f3Smrg (0x174, 0x174, 0x292, 0x292, 0x174, "v_trig_preop_f64", False, False, InstrClass.ValuDouble), 10417ec681f3Smrg ( -1, 0x175, 0x1e7, 0x1e7, 0x175, "v_mqsad_u32_u8", False, False), 10427ec681f3Smrg ( -1, 0x176, 0x1e8, 0x1e8, 0x176, "v_mad_u64_u32", False, False, InstrClass.Valu64), 10437ec681f3Smrg ( -1, 0x177, 0x1e9, 0x1e9, 0x177, "v_mad_i64_i32", False, False, InstrClass.Valu64), 10447ec681f3Smrg ( -1, -1, 0x1ea, 0x1ea, -1, "v_mad_legacy_f16", True, True), 10457ec681f3Smrg ( -1, -1, 0x1eb, 0x1eb, -1, "v_mad_legacy_u16", False, False), 10467ec681f3Smrg ( -1, -1, 0x1ec, 0x1ec, -1, "v_mad_legacy_i16", False, False), 10477ec681f3Smrg ( -1, -1, 0x1ed, 0x1ed, 0x344, "v_perm_b32", False, False), 10487ec681f3Smrg ( -1, -1, 0x1ee, 0x1ee, -1, "v_fma_legacy_f16", True, True, InstrClass.ValuFma), 10497ec681f3Smrg ( -1, -1, 0x1ef, 0x1ef, -1, "v_div_fixup_legacy_f16", True, True), 10507ec681f3Smrg (0x12c, 0x12c, 0x1f0, 0x1f0, -1, "v_cvt_pkaccum_u8_f32", True, False), 10517ec681f3Smrg ( -1, -1, -1, 0x1f1, 0x373, "v_mad_u32_u16", False, False), 10527ec681f3Smrg ( -1, -1, -1, 0x1f2, 0x375, "v_mad_i32_i16", False, False), 10537ec681f3Smrg ( -1, -1, -1, 0x1f3, 0x345, "v_xad_u32", False, False), 10547ec681f3Smrg ( -1, -1, -1, 0x1f4, 0x351, "v_min3_f16", True, True), 10557ec681f3Smrg ( -1, -1, -1, 0x1f5, 0x352, "v_min3_i16", False, False), 10567ec681f3Smrg ( -1, -1, -1, 0x1f6, 0x353, "v_min3_u16", False, False), 10577ec681f3Smrg ( -1, -1, -1, 0x1f7, 0x354, "v_max3_f16", True, True), 10587ec681f3Smrg ( -1, -1, -1, 0x1f8, 0x355, "v_max3_i16", False, False), 10597ec681f3Smrg ( -1, -1, -1, 0x1f9, 0x356, "v_max3_u16", False, False), 10607ec681f3Smrg ( -1, -1, -1, 0x1fa, 0x357, "v_med3_f16", True, True), 10617ec681f3Smrg ( -1, -1, -1, 0x1fb, 0x358, "v_med3_i16", False, False), 10627ec681f3Smrg ( -1, -1, -1, 0x1fc, 0x359, "v_med3_u16", False, False), 10637ec681f3Smrg ( -1, -1, -1, 0x1fd, 0x346, "v_lshl_add_u32", False, False), 10647ec681f3Smrg ( -1, -1, -1, 0x1fe, 0x347, "v_add_lshl_u32", False, False), 10657ec681f3Smrg ( -1, -1, -1, 0x1ff, 0x36d, "v_add3_u32", False, False), 10667ec681f3Smrg ( -1, -1, -1, 0x200, 0x36f, "v_lshl_or_b32", False, False), 10677ec681f3Smrg ( -1, -1, -1, 0x201, 0x371, "v_and_or_b32", False, False), 10687ec681f3Smrg ( -1, -1, -1, 0x202, 0x372, "v_or3_b32", False, False), 10697ec681f3Smrg ( -1, -1, -1, 0x203, -1, "v_mad_f16", True, True), 10707ec681f3Smrg ( -1, -1, -1, 0x204, 0x340, "v_mad_u16", False, False), 10717ec681f3Smrg ( -1, -1, -1, 0x205, 0x35e, "v_mad_i16", False, False), 10727ec681f3Smrg ( -1, -1, -1, 0x206, 0x34b, "v_fma_f16", True, True), 10737ec681f3Smrg ( -1, -1, -1, 0x207, 0x35f, "v_div_fixup_f16", True, True), 10747ec681f3Smrg ( -1, -1, 0x274, 0x274, 0x342, "v_interp_p1ll_f16", True, True), 10757ec681f3Smrg ( -1, -1, 0x275, 0x275, 0x343, "v_interp_p1lv_f16", True, True), 10767ec681f3Smrg ( -1, -1, 0x276, 0x276, -1, "v_interp_p2_legacy_f16", True, True), 10777ec681f3Smrg ( -1, -1, -1, 0x277, 0x35a, "v_interp_p2_f16", True, True), 10787ec681f3Smrg (0x12b, 0x12b, 0x288, 0x288, 0x362, "v_ldexp_f32", False, True), 10797ec681f3Smrg ( -1, -1, 0x289, 0x289, 0x360, "v_readlane_b32_e64", False, False), 10807ec681f3Smrg ( -1, -1, 0x28a, 0x28a, 0x361, "v_writelane_b32_e64", False, False), 10817ec681f3Smrg (0x122, 0x122, 0x28b, 0x28b, 0x364, "v_bcnt_u32_b32", False, False), 10827ec681f3Smrg (0x123, 0x123, 0x28c, 0x28c, 0x365, "v_mbcnt_lo_u32_b32", False, False), 10837ec681f3Smrg ( -1, -1, 0x28d, 0x28d, 0x366, "v_mbcnt_hi_u32_b32_e64", False, False), 10847ec681f3Smrg ( -1, -1, 0x28f, 0x28f, 0x2ff, "v_lshlrev_b64", False, False, InstrClass.Valu64), 10857ec681f3Smrg ( -1, -1, 0x290, 0x290, 0x300, "v_lshrrev_b64", False, False, InstrClass.Valu64), 10867ec681f3Smrg ( -1, -1, 0x291, 0x291, 0x301, "v_ashrrev_i64", False, False, InstrClass.Valu64), 10877ec681f3Smrg (0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False), 10887ec681f3Smrg (0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False), 10897ec681f3Smrg (0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False), 10907ec681f3Smrg (0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32_e64", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f 10917ec681f3Smrg (0x130, 0x130, 0x297, 0x297, 0x36a, "v_cvt_pk_u16_u32", False, False), 10927ec681f3Smrg (0x131, 0x131, 0x298, 0x298, 0x36b, "v_cvt_pk_i16_i32", False, False), 10937ec681f3Smrg ( -1, -1, -1, 0x299, 0x312, "v_cvt_pknorm_i16_f16", True, False), 10947ec681f3Smrg ( -1, -1, -1, 0x29a, 0x313, "v_cvt_pknorm_u16_f16", True, False), 10957ec681f3Smrg ( -1, -1, -1, 0x29c, 0x37f, "v_add_i32", False, False), 10967ec681f3Smrg ( -1, -1, -1, 0x29d, 0x376, "v_sub_i32", False, False), 10977ec681f3Smrg ( -1, -1, -1, 0x29e, 0x30d, "v_add_i16", False, False), 10987ec681f3Smrg ( -1, -1, -1, 0x29f, 0x30e, "v_sub_i16", False, False), 10997ec681f3Smrg ( -1, -1, -1, 0x2a0, 0x311, "v_pack_b32_f16", True, False), 11007ec681f3Smrg ( -1, -1, -1, -1, 0x178, "v_xor3_b32", False, False), 11017ec681f3Smrg ( -1, -1, -1, -1, 0x377, "v_permlane16_b32", False, False), 11027ec681f3Smrg ( -1, -1, -1, -1, 0x378, "v_permlanex16_b32", False, False), 11037ec681f3Smrg ( -1, -1, -1, -1, 0x30f, "v_add_co_u32_e64", False, False), 11047ec681f3Smrg ( -1, -1, -1, -1, 0x310, "v_sub_co_u32_e64", False, False), 11057ec681f3Smrg ( -1, -1, -1, -1, 0x319, "v_subrev_co_u32_e64", False, False), 11067ec681f3Smrg ( -1, -1, -1, -1, 0x303, "v_add_u16_e64", False, False), 11077ec681f3Smrg ( -1, -1, -1, -1, 0x304, "v_sub_u16_e64", False, False), 11087ec681f3Smrg ( -1, -1, -1, -1, 0x305, "v_mul_lo_u16_e64", False, False), 11097ec681f3Smrg ( -1, -1, -1, -1, 0x309, "v_max_u16_e64", False, False), 11107ec681f3Smrg ( -1, -1, -1, -1, 0x30a, "v_max_i16_e64", False, False), 11117ec681f3Smrg ( -1, -1, -1, -1, 0x30b, "v_min_u16_e64", False, False), 11127ec681f3Smrg ( -1, -1, -1, -1, 0x30c, "v_min_i16_e64", False, False), 11137ec681f3Smrg ( -1, -1, -1, -1, 0x307, "v_lshrrev_b16_e64", False, False), 11147ec681f3Smrg ( -1, -1, -1, -1, 0x308, "v_ashrrev_i16_e64", False, False), 11157ec681f3Smrg ( -1, -1, -1, -1, 0x314, "v_lshlrev_b16_e64", False, False), 11167ec681f3Smrg ( -1, -1, -1, -1, 0x140, "v_fma_legacy_f32", True, True, InstrClass.ValuFma), #GFX10.3+ 11177ec681f3Smrg} 11187ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP3, InstrClass.Valu32): 11197ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.VOP3, cls, in_mod, out_mod) 11207ec681f3Smrg 11217ec681f3Smrg 11227ec681f3Smrg# DS instructions: 3 inputs (1 addr, 2 data), 1 output 11237ec681f3SmrgDS = { 11247ec681f3Smrg (0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"), 11257ec681f3Smrg (0x01, 0x01, 0x01, 0x01, 0x01, "ds_sub_u32"), 11267ec681f3Smrg (0x02, 0x02, 0x02, 0x02, 0x02, "ds_rsub_u32"), 11277ec681f3Smrg (0x03, 0x03, 0x03, 0x03, 0x03, "ds_inc_u32"), 11287ec681f3Smrg (0x04, 0x04, 0x04, 0x04, 0x04, "ds_dec_u32"), 11297ec681f3Smrg (0x05, 0x05, 0x05, 0x05, 0x05, "ds_min_i32"), 11307ec681f3Smrg (0x06, 0x06, 0x06, 0x06, 0x06, "ds_max_i32"), 11317ec681f3Smrg (0x07, 0x07, 0x07, 0x07, 0x07, "ds_min_u32"), 11327ec681f3Smrg (0x08, 0x08, 0x08, 0x08, 0x08, "ds_max_u32"), 11337ec681f3Smrg (0x09, 0x09, 0x09, 0x09, 0x09, "ds_and_b32"), 11347ec681f3Smrg (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "ds_or_b32"), 11357ec681f3Smrg (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "ds_xor_b32"), 11367ec681f3Smrg (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "ds_mskor_b32"), 11377ec681f3Smrg (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "ds_write_b32"), 11387ec681f3Smrg (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "ds_write2_b32"), 11397ec681f3Smrg (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "ds_write2st64_b32"), 11407ec681f3Smrg (0x10, 0x10, 0x10, 0x10, 0x10, "ds_cmpst_b32"), 11417ec681f3Smrg (0x11, 0x11, 0x11, 0x11, 0x11, "ds_cmpst_f32"), 11427ec681f3Smrg (0x12, 0x12, 0x12, 0x12, 0x12, "ds_min_f32"), 11437ec681f3Smrg (0x13, 0x13, 0x13, 0x13, 0x13, "ds_max_f32"), 11447ec681f3Smrg ( -1, 0x14, 0x14, 0x14, 0x14, "ds_nop"), 11457ec681f3Smrg ( -1, -1, 0x15, 0x15, 0x15, "ds_add_f32"), 11467ec681f3Smrg ( -1, -1, 0x1d, 0x1d, 0xb0, "ds_write_addtid_b32"), 11477ec681f3Smrg (0x1e, 0x1e, 0x1e, 0x1e, 0x1e, "ds_write_b8"), 11487ec681f3Smrg (0x1f, 0x1f, 0x1f, 0x1f, 0x1f, "ds_write_b16"), 11497ec681f3Smrg (0x20, 0x20, 0x20, 0x20, 0x20, "ds_add_rtn_u32"), 11507ec681f3Smrg (0x21, 0x21, 0x21, 0x21, 0x21, "ds_sub_rtn_u32"), 11517ec681f3Smrg (0x22, 0x22, 0x22, 0x22, 0x22, "ds_rsub_rtn_u32"), 11527ec681f3Smrg (0x23, 0x23, 0x23, 0x23, 0x23, "ds_inc_rtn_u32"), 11537ec681f3Smrg (0x24, 0x24, 0x24, 0x24, 0x24, "ds_dec_rtn_u32"), 11547ec681f3Smrg (0x25, 0x25, 0x25, 0x25, 0x25, "ds_min_rtn_i32"), 11557ec681f3Smrg (0x26, 0x26, 0x26, 0x26, 0x26, "ds_max_rtn_i32"), 11567ec681f3Smrg (0x27, 0x27, 0x27, 0x27, 0x27, "ds_min_rtn_u32"), 11577ec681f3Smrg (0x28, 0x28, 0x28, 0x28, 0x28, "ds_max_rtn_u32"), 11587ec681f3Smrg (0x29, 0x29, 0x29, 0x29, 0x29, "ds_and_rtn_b32"), 11597ec681f3Smrg (0x2a, 0x2a, 0x2a, 0x2a, 0x2a, "ds_or_rtn_b32"), 11607ec681f3Smrg (0x2b, 0x2b, 0x2b, 0x2b, 0x2b, "ds_xor_rtn_b32"), 11617ec681f3Smrg (0x2c, 0x2c, 0x2c, 0x2c, 0x2c, "ds_mskor_rtn_b32"), 11627ec681f3Smrg (0x2d, 0x2d, 0x2d, 0x2d, 0x2d, "ds_wrxchg_rtn_b32"), 11637ec681f3Smrg (0x2e, 0x2e, 0x2e, 0x2e, 0x2e, "ds_wrxchg2_rtn_b32"), 11647ec681f3Smrg (0x2f, 0x2f, 0x2f, 0x2f, 0x2f, "ds_wrxchg2st64_rtn_b32"), 11657ec681f3Smrg (0x30, 0x30, 0x30, 0x30, 0x30, "ds_cmpst_rtn_b32"), 11667ec681f3Smrg (0x31, 0x31, 0x31, 0x31, 0x31, "ds_cmpst_rtn_f32"), 11677ec681f3Smrg (0x32, 0x32, 0x32, 0x32, 0x32, "ds_min_rtn_f32"), 11687ec681f3Smrg (0x33, 0x33, 0x33, 0x33, 0x33, "ds_max_rtn_f32"), 11697ec681f3Smrg ( -1, 0x34, 0x34, 0x34, 0x34, "ds_wrap_rtn_b32"), 11707ec681f3Smrg ( -1, -1, 0x35, 0x35, 0x55, "ds_add_rtn_f32"), 11717ec681f3Smrg (0x36, 0x36, 0x36, 0x36, 0x36, "ds_read_b32"), 11727ec681f3Smrg (0x37, 0x37, 0x37, 0x37, 0x37, "ds_read2_b32"), 11737ec681f3Smrg (0x38, 0x38, 0x38, 0x38, 0x38, "ds_read2st64_b32"), 11747ec681f3Smrg (0x39, 0x39, 0x39, 0x39, 0x39, "ds_read_i8"), 11757ec681f3Smrg (0x3a, 0x3a, 0x3a, 0x3a, 0x3a, "ds_read_u8"), 11767ec681f3Smrg (0x3b, 0x3b, 0x3b, 0x3b, 0x3b, "ds_read_i16"), 11777ec681f3Smrg (0x3c, 0x3c, 0x3c, 0x3c, 0x3c, "ds_read_u16"), 11787ec681f3Smrg (0x35, 0x35, 0x3d, 0x3d, 0x35, "ds_swizzle_b32"), #data1 & offset, no addr/data2 11797ec681f3Smrg ( -1, -1, 0x3e, 0x3e, 0xb2, "ds_permute_b32"), 11807ec681f3Smrg ( -1, -1, 0x3f, 0x3f, 0xb3, "ds_bpermute_b32"), 11817ec681f3Smrg (0x40, 0x40, 0x40, 0x40, 0x40, "ds_add_u64"), 11827ec681f3Smrg (0x41, 0x41, 0x41, 0x41, 0x41, "ds_sub_u64"), 11837ec681f3Smrg (0x42, 0x42, 0x42, 0x42, 0x42, "ds_rsub_u64"), 11847ec681f3Smrg (0x43, 0x43, 0x43, 0x43, 0x43, "ds_inc_u64"), 11857ec681f3Smrg (0x44, 0x44, 0x44, 0x44, 0x44, "ds_dec_u64"), 11867ec681f3Smrg (0x45, 0x45, 0x45, 0x45, 0x45, "ds_min_i64"), 11877ec681f3Smrg (0x46, 0x46, 0x46, 0x46, 0x46, "ds_max_i64"), 11887ec681f3Smrg (0x47, 0x47, 0x47, 0x47, 0x47, "ds_min_u64"), 11897ec681f3Smrg (0x48, 0x48, 0x48, 0x48, 0x48, "ds_max_u64"), 11907ec681f3Smrg (0x49, 0x49, 0x49, 0x49, 0x49, "ds_and_b64"), 11917ec681f3Smrg (0x4a, 0x4a, 0x4a, 0x4a, 0x4a, "ds_or_b64"), 11927ec681f3Smrg (0x4b, 0x4b, 0x4b, 0x4b, 0x4b, "ds_xor_b64"), 11937ec681f3Smrg (0x4c, 0x4c, 0x4c, 0x4c, 0x4c, "ds_mskor_b64"), 11947ec681f3Smrg (0x4d, 0x4d, 0x4d, 0x4d, 0x4d, "ds_write_b64"), 11957ec681f3Smrg (0x4e, 0x4e, 0x4e, 0x4e, 0x4e, "ds_write2_b64"), 11967ec681f3Smrg (0x4f, 0x4f, 0x4f, 0x4f, 0x4f, "ds_write2st64_b64"), 11977ec681f3Smrg (0x50, 0x50, 0x50, 0x50, 0x50, "ds_cmpst_b64"), 11987ec681f3Smrg (0x51, 0x51, 0x51, 0x51, 0x51, "ds_cmpst_f64"), 11997ec681f3Smrg (0x52, 0x52, 0x52, 0x52, 0x52, "ds_min_f64"), 12007ec681f3Smrg (0x53, 0x53, 0x53, 0x53, 0x53, "ds_max_f64"), 12017ec681f3Smrg ( -1, -1, -1, 0x54, 0xa0, "ds_write_b8_d16_hi"), 12027ec681f3Smrg ( -1, -1, -1, 0x55, 0xa1, "ds_write_b16_d16_hi"), 12037ec681f3Smrg ( -1, -1, -1, 0x56, 0xa2, "ds_read_u8_d16"), 12047ec681f3Smrg ( -1, -1, -1, 0x57, 0xa3, "ds_read_u8_d16_hi"), 12057ec681f3Smrg ( -1, -1, -1, 0x58, 0xa4, "ds_read_i8_d16"), 12067ec681f3Smrg ( -1, -1, -1, 0x59, 0xa5, "ds_read_i8_d16_hi"), 12077ec681f3Smrg ( -1, -1, -1, 0x5a, 0xa6, "ds_read_u16_d16"), 12087ec681f3Smrg ( -1, -1, -1, 0x5b, 0xa7, "ds_read_u16_d16_hi"), 12097ec681f3Smrg (0x60, 0x60, 0x60, 0x60, 0x60, "ds_add_rtn_u64"), 12107ec681f3Smrg (0x61, 0x61, 0x61, 0x61, 0x61, "ds_sub_rtn_u64"), 12117ec681f3Smrg (0x62, 0x62, 0x62, 0x62, 0x62, "ds_rsub_rtn_u64"), 12127ec681f3Smrg (0x63, 0x63, 0x63, 0x63, 0x63, "ds_inc_rtn_u64"), 12137ec681f3Smrg (0x64, 0x64, 0x64, 0x64, 0x64, "ds_dec_rtn_u64"), 12147ec681f3Smrg (0x65, 0x65, 0x65, 0x65, 0x65, "ds_min_rtn_i64"), 12157ec681f3Smrg (0x66, 0x66, 0x66, 0x66, 0x66, "ds_max_rtn_i64"), 12167ec681f3Smrg (0x67, 0x67, 0x67, 0x67, 0x67, "ds_min_rtn_u64"), 12177ec681f3Smrg (0x68, 0x68, 0x68, 0x68, 0x68, "ds_max_rtn_u64"), 12187ec681f3Smrg (0x69, 0x69, 0x69, 0x69, 0x69, "ds_and_rtn_b64"), 12197ec681f3Smrg (0x6a, 0x6a, 0x6a, 0x6a, 0x6a, "ds_or_rtn_b64"), 12207ec681f3Smrg (0x6b, 0x6b, 0x6b, 0x6b, 0x6b, "ds_xor_rtn_b64"), 12217ec681f3Smrg (0x6c, 0x6c, 0x6c, 0x6c, 0x6c, "ds_mskor_rtn_b64"), 12227ec681f3Smrg (0x6d, 0x6d, 0x6d, 0x6d, 0x6d, "ds_wrxchg_rtn_b64"), 12237ec681f3Smrg (0x6e, 0x6e, 0x6e, 0x6e, 0x6e, "ds_wrxchg2_rtn_b64"), 12247ec681f3Smrg (0x6f, 0x6f, 0x6f, 0x6f, 0x6f, "ds_wrxchg2st64_rtn_b64"), 12257ec681f3Smrg (0x70, 0x70, 0x70, 0x70, 0x70, "ds_cmpst_rtn_b64"), 12267ec681f3Smrg (0x71, 0x71, 0x71, 0x71, 0x71, "ds_cmpst_rtn_f64"), 12277ec681f3Smrg (0x72, 0x72, 0x72, 0x72, 0x72, "ds_min_rtn_f64"), 12287ec681f3Smrg (0x73, 0x73, 0x73, 0x73, 0x73, "ds_max_rtn_f64"), 12297ec681f3Smrg (0x76, 0x76, 0x76, 0x76, 0x76, "ds_read_b64"), 12307ec681f3Smrg (0x77, 0x77, 0x77, 0x77, 0x77, "ds_read2_b64"), 12317ec681f3Smrg (0x78, 0x78, 0x78, 0x78, 0x78, "ds_read2st64_b64"), 12327ec681f3Smrg ( -1, 0x7e, 0x7e, 0x7e, 0x7e, "ds_condxchg32_rtn_b64"), 12337ec681f3Smrg (0x80, 0x80, 0x80, 0x80, 0x80, "ds_add_src2_u32"), 12347ec681f3Smrg (0x81, 0x81, 0x81, 0x81, 0x81, "ds_sub_src2_u32"), 12357ec681f3Smrg (0x82, 0x82, 0x82, 0x82, 0x82, "ds_rsub_src2_u32"), 12367ec681f3Smrg (0x83, 0x83, 0x83, 0x83, 0x83, "ds_inc_src2_u32"), 12377ec681f3Smrg (0x84, 0x84, 0x84, 0x84, 0x84, "ds_dec_src2_u32"), 12387ec681f3Smrg (0x85, 0x85, 0x85, 0x85, 0x85, "ds_min_src2_i32"), 12397ec681f3Smrg (0x86, 0x86, 0x86, 0x86, 0x86, "ds_max_src2_i32"), 12407ec681f3Smrg (0x87, 0x87, 0x87, 0x87, 0x87, "ds_min_src2_u32"), 12417ec681f3Smrg (0x88, 0x88, 0x88, 0x88, 0x88, "ds_max_src2_u32"), 12427ec681f3Smrg (0x89, 0x89, 0x89, 0x89, 0x89, "ds_and_src2_b32"), 12437ec681f3Smrg (0x8a, 0x8a, 0x8a, 0x8a, 0x8a, "ds_or_src2_b32"), 12447ec681f3Smrg (0x8b, 0x8b, 0x8b, 0x8b, 0x8b, "ds_xor_src2_b32"), 12457ec681f3Smrg (0x8d, 0x8d, 0x8d, 0x8d, 0x8d, "ds_write_src2_b32"), 12467ec681f3Smrg (0x92, 0x92, 0x92, 0x92, 0x92, "ds_min_src2_f32"), 12477ec681f3Smrg (0x93, 0x93, 0x93, 0x93, 0x93, "ds_max_src2_f32"), 12487ec681f3Smrg ( -1, -1, 0x95, 0x95, 0x95, "ds_add_src2_f32"), 12497ec681f3Smrg ( -1, 0x18, 0x98, 0x98, 0x18, "ds_gws_sema_release_all"), 12507ec681f3Smrg (0x19, 0x19, 0x99, 0x99, 0x19, "ds_gws_init"), 12517ec681f3Smrg (0x1a, 0x1a, 0x9a, 0x9a, 0x1a, "ds_gws_sema_v"), 12527ec681f3Smrg (0x1b, 0x1b, 0x9b, 0x9b, 0x1b, "ds_gws_sema_br"), 12537ec681f3Smrg (0x1c, 0x1c, 0x9c, 0x9c, 0x1c, "ds_gws_sema_p"), 12547ec681f3Smrg (0x1d, 0x1d, 0x9d, 0x9d, 0x1d, "ds_gws_barrier"), 12557ec681f3Smrg ( -1, -1, 0xb6, 0xb6, 0xb1, "ds_read_addtid_b32"), 12567ec681f3Smrg (0x3d, 0x3d, 0xbd, 0xbd, 0x3d, "ds_consume"), 12577ec681f3Smrg (0x3e, 0x3e, 0xbe, 0xbe, 0x3e, "ds_append"), 12587ec681f3Smrg (0x3f, 0x3f, 0xbf, 0xbf, 0x3f, "ds_ordered_count"), 12597ec681f3Smrg (0xc0, 0xc0, 0xc0, 0xc0, 0xc0, "ds_add_src2_u64"), 12607ec681f3Smrg (0xc1, 0xc1, 0xc1, 0xc1, 0xc1, "ds_sub_src2_u64"), 12617ec681f3Smrg (0xc2, 0xc2, 0xc2, 0xc2, 0xc2, "ds_rsub_src2_u64"), 12627ec681f3Smrg (0xc3, 0xc3, 0xc3, 0xc3, 0xc3, "ds_inc_src2_u64"), 12637ec681f3Smrg (0xc4, 0xc4, 0xc4, 0xc4, 0xc4, "ds_dec_src2_u64"), 12647ec681f3Smrg (0xc5, 0xc5, 0xc5, 0xc5, 0xc5, "ds_min_src2_i64"), 12657ec681f3Smrg (0xc6, 0xc6, 0xc6, 0xc6, 0xc6, "ds_max_src2_i64"), 12667ec681f3Smrg (0xc7, 0xc7, 0xc7, 0xc7, 0xc7, "ds_min_src2_u64"), 12677ec681f3Smrg (0xc8, 0xc8, 0xc8, 0xc8, 0xc8, "ds_max_src2_u64"), 12687ec681f3Smrg (0xc9, 0xc9, 0xc9, 0xc9, 0xc9, "ds_and_src2_b64"), 12697ec681f3Smrg (0xca, 0xca, 0xca, 0xca, 0xca, "ds_or_src2_b64"), 12707ec681f3Smrg (0xcb, 0xcb, 0xcb, 0xcb, 0xcb, "ds_xor_src2_b64"), 12717ec681f3Smrg (0xcd, 0xcd, 0xcd, 0xcd, 0xcd, "ds_write_src2_b64"), 12727ec681f3Smrg (0xd2, 0xd2, 0xd2, 0xd2, 0xd2, "ds_min_src2_f64"), 12737ec681f3Smrg (0xd3, 0xd3, 0xd3, 0xd3, 0xd3, "ds_max_src2_f64"), 12747ec681f3Smrg ( -1, 0xde, 0xde, 0xde, 0xde, "ds_write_b96"), 12757ec681f3Smrg ( -1, 0xdf, 0xdf, 0xdf, 0xdf, "ds_write_b128"), 12767ec681f3Smrg ( -1, 0xfd, 0xfd, -1, -1, "ds_condxchg32_rtn_b128"), 12777ec681f3Smrg ( -1, 0xfe, 0xfe, 0xfe, 0xfe, "ds_read_b96"), 12787ec681f3Smrg ( -1, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"), 12797ec681f3Smrg} 12807ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name) in DS: 12817ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.DS, InstrClass.DS) 12827ec681f3Smrg 12837ec681f3Smrg# MUBUF instructions: 12847ec681f3SmrgMUBUF = { 12857ec681f3Smrg (0x00, 0x00, 0x00, 0x00, 0x00, "buffer_load_format_x"), 12867ec681f3Smrg (0x01, 0x01, 0x01, 0x01, 0x01, "buffer_load_format_xy"), 12877ec681f3Smrg (0x02, 0x02, 0x02, 0x02, 0x02, "buffer_load_format_xyz"), 12887ec681f3Smrg (0x03, 0x03, 0x03, 0x03, 0x03, "buffer_load_format_xyzw"), 12897ec681f3Smrg (0x04, 0x04, 0x04, 0x04, 0x04, "buffer_store_format_x"), 12907ec681f3Smrg (0x05, 0x05, 0x05, 0x05, 0x05, "buffer_store_format_xy"), 12917ec681f3Smrg (0x06, 0x06, 0x06, 0x06, 0x06, "buffer_store_format_xyz"), 12927ec681f3Smrg (0x07, 0x07, 0x07, 0x07, 0x07, "buffer_store_format_xyzw"), 12937ec681f3Smrg ( -1, -1, 0x08, 0x08, 0x80, "buffer_load_format_d16_x"), 12947ec681f3Smrg ( -1, -1, 0x09, 0x09, 0x81, "buffer_load_format_d16_xy"), 12957ec681f3Smrg ( -1, -1, 0x0a, 0x0a, 0x82, "buffer_load_format_d16_xyz"), 12967ec681f3Smrg ( -1, -1, 0x0b, 0x0b, 0x83, "buffer_load_format_d16_xyzw"), 12977ec681f3Smrg ( -1, -1, 0x0c, 0x0c, 0x84, "buffer_store_format_d16_x"), 12987ec681f3Smrg ( -1, -1, 0x0d, 0x0d, 0x85, "buffer_store_format_d16_xy"), 12997ec681f3Smrg ( -1, -1, 0x0e, 0x0e, 0x86, "buffer_store_format_d16_xyz"), 13007ec681f3Smrg ( -1, -1, 0x0f, 0x0f, 0x87, "buffer_store_format_d16_xyzw"), 13017ec681f3Smrg (0x08, 0x08, 0x10, 0x10, 0x08, "buffer_load_ubyte"), 13027ec681f3Smrg (0x09, 0x09, 0x11, 0x11, 0x09, "buffer_load_sbyte"), 13037ec681f3Smrg (0x0a, 0x0a, 0x12, 0x12, 0x0a, "buffer_load_ushort"), 13047ec681f3Smrg (0x0b, 0x0b, 0x13, 0x13, 0x0b, "buffer_load_sshort"), 13057ec681f3Smrg (0x0c, 0x0c, 0x14, 0x14, 0x0c, "buffer_load_dword"), 13067ec681f3Smrg (0x0d, 0x0d, 0x15, 0x15, 0x0d, "buffer_load_dwordx2"), 13077ec681f3Smrg ( -1, 0x0f, 0x16, 0x16, 0x0f, "buffer_load_dwordx3"), 13087ec681f3Smrg (0x0f, 0x0e, 0x17, 0x17, 0x0e, "buffer_load_dwordx4"), 13097ec681f3Smrg (0x18, 0x18, 0x18, 0x18, 0x18, "buffer_store_byte"), 13107ec681f3Smrg ( -1, -1, -1, 0x19, 0x19, "buffer_store_byte_d16_hi"), 13117ec681f3Smrg (0x1a, 0x1a, 0x1a, 0x1a, 0x1a, "buffer_store_short"), 13127ec681f3Smrg ( -1, -1, -1, 0x1b, 0x1b, "buffer_store_short_d16_hi"), 13137ec681f3Smrg (0x1c, 0x1c, 0x1c, 0x1c, 0x1c, "buffer_store_dword"), 13147ec681f3Smrg (0x1d, 0x1d, 0x1d, 0x1d, 0x1d, "buffer_store_dwordx2"), 13157ec681f3Smrg ( -1, 0x1f, 0x1e, 0x1e, 0x1f, "buffer_store_dwordx3"), 13167ec681f3Smrg (0x1e, 0x1e, 0x1f, 0x1f, 0x1e, "buffer_store_dwordx4"), 13177ec681f3Smrg ( -1, -1, -1, 0x20, 0x20, "buffer_load_ubyte_d16"), 13187ec681f3Smrg ( -1, -1, -1, 0x21, 0x21, "buffer_load_ubyte_d16_hi"), 13197ec681f3Smrg ( -1, -1, -1, 0x22, 0x22, "buffer_load_sbyte_d16"), 13207ec681f3Smrg ( -1, -1, -1, 0x23, 0x23, "buffer_load_sbyte_d16_hi"), 13217ec681f3Smrg ( -1, -1, -1, 0x24, 0x24, "buffer_load_short_d16"), 13227ec681f3Smrg ( -1, -1, -1, 0x25, 0x25, "buffer_load_short_d16_hi"), 13237ec681f3Smrg ( -1, -1, -1, 0x26, 0x26, "buffer_load_format_d16_hi_x"), 13247ec681f3Smrg ( -1, -1, -1, 0x27, 0x27, "buffer_store_format_d16_hi_x"), 13257ec681f3Smrg ( -1, -1, 0x3d, 0x3d, -1, "buffer_store_lds_dword"), 13267ec681f3Smrg (0x71, 0x71, 0x3e, 0x3e, -1, "buffer_wbinvl1"), 13277ec681f3Smrg (0x70, 0x70, 0x3f, 0x3f, -1, "buffer_wbinvl1_vol"), 13287ec681f3Smrg (0x30, 0x30, 0x40, 0x40, 0x30, "buffer_atomic_swap"), 13297ec681f3Smrg (0x31, 0x31, 0x41, 0x41, 0x31, "buffer_atomic_cmpswap"), 13307ec681f3Smrg (0x32, 0x32, 0x42, 0x42, 0x32, "buffer_atomic_add"), 13317ec681f3Smrg (0x33, 0x33, 0x43, 0x43, 0x33, "buffer_atomic_sub"), 13327ec681f3Smrg (0x34, -1, -1, -1, -1, "buffer_atomic_rsub"), 13337ec681f3Smrg (0x35, 0x35, 0x44, 0x44, 0x35, "buffer_atomic_smin"), 13347ec681f3Smrg (0x36, 0x36, 0x45, 0x45, 0x36, "buffer_atomic_umin"), 13357ec681f3Smrg (0x37, 0x37, 0x46, 0x46, 0x37, "buffer_atomic_smax"), 13367ec681f3Smrg (0x38, 0x38, 0x47, 0x47, 0x38, "buffer_atomic_umax"), 13377ec681f3Smrg (0x39, 0x39, 0x48, 0x48, 0x39, "buffer_atomic_and"), 13387ec681f3Smrg (0x3a, 0x3a, 0x49, 0x49, 0x3a, "buffer_atomic_or"), 13397ec681f3Smrg (0x3b, 0x3b, 0x4a, 0x4a, 0x3b, "buffer_atomic_xor"), 13407ec681f3Smrg (0x3c, 0x3c, 0x4b, 0x4b, 0x3c, "buffer_atomic_inc"), 13417ec681f3Smrg (0x3d, 0x3d, 0x4c, 0x4c, 0x3d, "buffer_atomic_dec"), 13427ec681f3Smrg (0x3e, 0x3e, -1, -1, 0x3e, "buffer_atomic_fcmpswap"), 13437ec681f3Smrg (0x3f, 0x3f, -1, -1, 0x3f, "buffer_atomic_fmin"), 13447ec681f3Smrg (0x40, 0x40, -1, -1, 0x40, "buffer_atomic_fmax"), 13457ec681f3Smrg (0x50, 0x50, 0x60, 0x60, 0x50, "buffer_atomic_swap_x2"), 13467ec681f3Smrg (0x51, 0x51, 0x61, 0x61, 0x51, "buffer_atomic_cmpswap_x2"), 13477ec681f3Smrg (0x52, 0x52, 0x62, 0x62, 0x52, "buffer_atomic_add_x2"), 13487ec681f3Smrg (0x53, 0x53, 0x63, 0x63, 0x53, "buffer_atomic_sub_x2"), 13497ec681f3Smrg (0x54, -1, -1, -1, -1, "buffer_atomic_rsub_x2"), 13507ec681f3Smrg (0x55, 0x55, 0x64, 0x64, 0x55, "buffer_atomic_smin_x2"), 13517ec681f3Smrg (0x56, 0x56, 0x65, 0x65, 0x56, "buffer_atomic_umin_x2"), 13527ec681f3Smrg (0x57, 0x57, 0x66, 0x66, 0x57, "buffer_atomic_smax_x2"), 13537ec681f3Smrg (0x58, 0x58, 0x67, 0x67, 0x58, "buffer_atomic_umax_x2"), 13547ec681f3Smrg (0x59, 0x59, 0x68, 0x68, 0x59, "buffer_atomic_and_x2"), 13557ec681f3Smrg (0x5a, 0x5a, 0x69, 0x69, 0x5a, "buffer_atomic_or_x2"), 13567ec681f3Smrg (0x5b, 0x5b, 0x6a, 0x6a, 0x5b, "buffer_atomic_xor_x2"), 13577ec681f3Smrg (0x5c, 0x5c, 0x6b, 0x6b, 0x5c, "buffer_atomic_inc_x2"), 13587ec681f3Smrg (0x5d, 0x5d, 0x6c, 0x6c, 0x5d, "buffer_atomic_dec_x2"), 13597ec681f3Smrg (0x5e, 0x5e, -1, -1, 0x5e, "buffer_atomic_fcmpswap_x2"), 13607ec681f3Smrg (0x5f, 0x5f, -1, -1, 0x5f, "buffer_atomic_fmin_x2"), 13617ec681f3Smrg (0x60, 0x60, -1, -1, 0x60, "buffer_atomic_fmax_x2"), 13627ec681f3Smrg ( -1, -1, -1, -1, 0x71, "buffer_gl0_inv"), 13637ec681f3Smrg ( -1, -1, -1, -1, 0x72, "buffer_gl1_inv"), 13647ec681f3Smrg ( -1, -1, -1, -1, 0x34, "buffer_atomic_csub"), #GFX10.3+. seems glc must be set 13657ec681f3Smrg} 13667ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF: 13677ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.MUBUF, InstrClass.VMem, is_atomic = "atomic" in name) 13687ec681f3Smrg 13697ec681f3SmrgMTBUF = { 13707ec681f3Smrg (0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"), 13717ec681f3Smrg (0x01, 0x01, 0x01, 0x01, 0x01, "tbuffer_load_format_xy"), 13727ec681f3Smrg (0x02, 0x02, 0x02, 0x02, 0x02, "tbuffer_load_format_xyz"), 13737ec681f3Smrg (0x03, 0x03, 0x03, 0x03, 0x03, "tbuffer_load_format_xyzw"), 13747ec681f3Smrg (0x04, 0x04, 0x04, 0x04, 0x04, "tbuffer_store_format_x"), 13757ec681f3Smrg (0x05, 0x05, 0x05, 0x05, 0x05, "tbuffer_store_format_xy"), 13767ec681f3Smrg (0x06, 0x06, 0x06, 0x06, 0x06, "tbuffer_store_format_xyz"), 13777ec681f3Smrg (0x07, 0x07, 0x07, 0x07, 0x07, "tbuffer_store_format_xyzw"), 13787ec681f3Smrg ( -1, -1, 0x08, 0x08, 0x08, "tbuffer_load_format_d16_x"), 13797ec681f3Smrg ( -1, -1, 0x09, 0x09, 0x09, "tbuffer_load_format_d16_xy"), 13807ec681f3Smrg ( -1, -1, 0x0a, 0x0a, 0x0a, "tbuffer_load_format_d16_xyz"), 13817ec681f3Smrg ( -1, -1, 0x0b, 0x0b, 0x0b, "tbuffer_load_format_d16_xyzw"), 13827ec681f3Smrg ( -1, -1, 0x0c, 0x0c, 0x0c, "tbuffer_store_format_d16_x"), 13837ec681f3Smrg ( -1, -1, 0x0d, 0x0d, 0x0d, "tbuffer_store_format_d16_xy"), 13847ec681f3Smrg ( -1, -1, 0x0e, 0x0e, 0x0e, "tbuffer_store_format_d16_xyz"), 13857ec681f3Smrg ( -1, -1, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"), 13867ec681f3Smrg} 13877ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MTBUF: 13887ec681f3Smrg opcode(name, gfx7, gfx9, gfx10, Format.MTBUF, InstrClass.VMem) 13897ec681f3Smrg 13907ec681f3Smrg 13917ec681f3SmrgIMAGE = { 13927ec681f3Smrg (0x00, "image_load"), 13937ec681f3Smrg (0x01, "image_load_mip"), 13947ec681f3Smrg (0x02, "image_load_pck"), 13957ec681f3Smrg (0x03, "image_load_pck_sgn"), 13967ec681f3Smrg (0x04, "image_load_mip_pck"), 13977ec681f3Smrg (0x05, "image_load_mip_pck_sgn"), 13987ec681f3Smrg (0x08, "image_store"), 13997ec681f3Smrg (0x09, "image_store_mip"), 14007ec681f3Smrg (0x0a, "image_store_pck"), 14017ec681f3Smrg (0x0b, "image_store_mip_pck"), 14027ec681f3Smrg (0x0e, "image_get_resinfo"), 14037ec681f3Smrg (0x60, "image_get_lod"), 14047ec681f3Smrg} 14057ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) 14067ec681f3Smrgfor (code, name) in IMAGE: 14077ec681f3Smrg opcode(name, code, code, code, Format.MIMG, InstrClass.VMem) 14087ec681f3Smrg 14097ec681f3Smrgopcode("image_msaa_load", -1, -1, 0x80, Format.MIMG, InstrClass.VMem) #GFX10.3+ 14107ec681f3Smrg 14117ec681f3SmrgIMAGE_ATOMIC = { 14127ec681f3Smrg (0x0f, 0x0f, 0x10, "image_atomic_swap"), 14137ec681f3Smrg (0x10, 0x10, 0x11, "image_atomic_cmpswap"), 14147ec681f3Smrg (0x11, 0x11, 0x12, "image_atomic_add"), 14157ec681f3Smrg (0x12, 0x12, 0x13, "image_atomic_sub"), 14167ec681f3Smrg (0x13, -1, -1, "image_atomic_rsub"), 14177ec681f3Smrg (0x14, 0x14, 0x14, "image_atomic_smin"), 14187ec681f3Smrg (0x15, 0x15, 0x15, "image_atomic_umin"), 14197ec681f3Smrg (0x16, 0x16, 0x16, "image_atomic_smax"), 14207ec681f3Smrg (0x17, 0x17, 0x17, "image_atomic_umax"), 14217ec681f3Smrg (0x18, 0x18, 0x18, "image_atomic_and"), 14227ec681f3Smrg (0x19, 0x19, 0x19, "image_atomic_or"), 14237ec681f3Smrg (0x1a, 0x1a, 0x1a, "image_atomic_xor"), 14247ec681f3Smrg (0x1b, 0x1b, 0x1b, "image_atomic_inc"), 14257ec681f3Smrg (0x1c, 0x1c, 0x1c, "image_atomic_dec"), 14267ec681f3Smrg (0x1d, 0x1d, -1, "image_atomic_fcmpswap"), 14277ec681f3Smrg (0x1e, 0x1e, -1, "image_atomic_fmin"), 14287ec681f3Smrg (0x1f, 0x1f, -1, "image_atomic_fmax"), 14297ec681f3Smrg} 14307ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name) 14317ec681f3Smrg# gfx7 and gfx10 opcodes are the same here 14327ec681f3Smrgfor (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC: 14337ec681f3Smrg opcode(name, gfx7, gfx89, gfx7, Format.MIMG, InstrClass.VMem, is_atomic = True) 14347ec681f3Smrg 14357ec681f3SmrgIMAGE_SAMPLE = { 14367ec681f3Smrg (0x20, "image_sample"), 14377ec681f3Smrg (0x21, "image_sample_cl"), 14387ec681f3Smrg (0x22, "image_sample_d"), 14397ec681f3Smrg (0x23, "image_sample_d_cl"), 14407ec681f3Smrg (0x24, "image_sample_l"), 14417ec681f3Smrg (0x25, "image_sample_b"), 14427ec681f3Smrg (0x26, "image_sample_b_cl"), 14437ec681f3Smrg (0x27, "image_sample_lz"), 14447ec681f3Smrg (0x28, "image_sample_c"), 14457ec681f3Smrg (0x29, "image_sample_c_cl"), 14467ec681f3Smrg (0x2a, "image_sample_c_d"), 14477ec681f3Smrg (0x2b, "image_sample_c_d_cl"), 14487ec681f3Smrg (0x2c, "image_sample_c_l"), 14497ec681f3Smrg (0x2d, "image_sample_c_b"), 14507ec681f3Smrg (0x2e, "image_sample_c_b_cl"), 14517ec681f3Smrg (0x2f, "image_sample_c_lz"), 14527ec681f3Smrg (0x30, "image_sample_o"), 14537ec681f3Smrg (0x31, "image_sample_cl_o"), 14547ec681f3Smrg (0x32, "image_sample_d_o"), 14557ec681f3Smrg (0x33, "image_sample_d_cl_o"), 14567ec681f3Smrg (0x34, "image_sample_l_o"), 14577ec681f3Smrg (0x35, "image_sample_b_o"), 14587ec681f3Smrg (0x36, "image_sample_b_cl_o"), 14597ec681f3Smrg (0x37, "image_sample_lz_o"), 14607ec681f3Smrg (0x38, "image_sample_c_o"), 14617ec681f3Smrg (0x39, "image_sample_c_cl_o"), 14627ec681f3Smrg (0x3a, "image_sample_c_d_o"), 14637ec681f3Smrg (0x3b, "image_sample_c_d_cl_o"), 14647ec681f3Smrg (0x3c, "image_sample_c_l_o"), 14657ec681f3Smrg (0x3d, "image_sample_c_b_o"), 14667ec681f3Smrg (0x3e, "image_sample_c_b_cl_o"), 14677ec681f3Smrg (0x3f, "image_sample_c_lz_o"), 14687ec681f3Smrg (0x68, "image_sample_cd"), 14697ec681f3Smrg (0x69, "image_sample_cd_cl"), 14707ec681f3Smrg (0x6a, "image_sample_c_cd"), 14717ec681f3Smrg (0x6b, "image_sample_c_cd_cl"), 14727ec681f3Smrg (0x6c, "image_sample_cd_o"), 14737ec681f3Smrg (0x6d, "image_sample_cd_cl_o"), 14747ec681f3Smrg (0x6e, "image_sample_c_cd_o"), 14757ec681f3Smrg (0x6f, "image_sample_c_cd_cl_o"), 14767ec681f3Smrg} 14777ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) 14787ec681f3Smrgfor (code, name) in IMAGE_SAMPLE: 14797ec681f3Smrg opcode(name, code, code, code, Format.MIMG, InstrClass.VMem) 14807ec681f3Smrg 14817ec681f3SmrgIMAGE_GATHER4 = { 14827ec681f3Smrg (0x40, "image_gather4"), 14837ec681f3Smrg (0x41, "image_gather4_cl"), 14847ec681f3Smrg #(0x42, "image_gather4h"), VEGA only? 14857ec681f3Smrg (0x44, "image_gather4_l"), # following instructions have different opcodes according to ISA sheet. 14867ec681f3Smrg (0x45, "image_gather4_b"), 14877ec681f3Smrg (0x46, "image_gather4_b_cl"), 14887ec681f3Smrg (0x47, "image_gather4_lz"), 14897ec681f3Smrg (0x48, "image_gather4_c"), 14907ec681f3Smrg (0x49, "image_gather4_c_cl"), # previous instructions have different opcodes according to ISA sheet. 14917ec681f3Smrg #(0x4a, "image_gather4h_pck"), VEGA only? 14927ec681f3Smrg #(0x4b, "image_gather8h_pck"), VGEA only? 14937ec681f3Smrg (0x4c, "image_gather4_c_l"), 14947ec681f3Smrg (0x4d, "image_gather4_c_b"), 14957ec681f3Smrg (0x4e, "image_gather4_c_b_cl"), 14967ec681f3Smrg (0x4f, "image_gather4_c_lz"), 14977ec681f3Smrg (0x50, "image_gather4_o"), 14987ec681f3Smrg (0x51, "image_gather4_cl_o"), 14997ec681f3Smrg (0x54, "image_gather4_l_o"), 15007ec681f3Smrg (0x55, "image_gather4_b_o"), 15017ec681f3Smrg (0x56, "image_gather4_b_cl_o"), 15027ec681f3Smrg (0x57, "image_gather4_lz_o"), 15037ec681f3Smrg (0x58, "image_gather4_c_o"), 15047ec681f3Smrg (0x59, "image_gather4_c_cl_o"), 15057ec681f3Smrg (0x5c, "image_gather4_c_l_o"), 15067ec681f3Smrg (0x5d, "image_gather4_c_b_o"), 15077ec681f3Smrg (0x5e, "image_gather4_c_b_cl_o"), 15087ec681f3Smrg (0x5f, "image_gather4_c_lz_o"), 15097ec681f3Smrg} 15107ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) 15117ec681f3Smrgfor (code, name) in IMAGE_GATHER4: 15127ec681f3Smrg opcode(name, code, code, code, Format.MIMG, InstrClass.VMem) 15137ec681f3Smrg 15147ec681f3Smrgopcode("image_bvh64_intersect_ray", -1, -1, 231, Format.MIMG, InstrClass.VMem) 15157ec681f3Smrg 15167ec681f3SmrgFLAT = { 15177ec681f3Smrg #GFX7, GFX8_9, GFX10 15187ec681f3Smrg (0x08, 0x10, 0x08, "flat_load_ubyte"), 15197ec681f3Smrg (0x09, 0x11, 0x09, "flat_load_sbyte"), 15207ec681f3Smrg (0x0a, 0x12, 0x0a, "flat_load_ushort"), 15217ec681f3Smrg (0x0b, 0x13, 0x0b, "flat_load_sshort"), 15227ec681f3Smrg (0x0c, 0x14, 0x0c, "flat_load_dword"), 15237ec681f3Smrg (0x0d, 0x15, 0x0d, "flat_load_dwordx2"), 15247ec681f3Smrg (0x0f, 0x16, 0x0f, "flat_load_dwordx3"), 15257ec681f3Smrg (0x0e, 0x17, 0x0e, "flat_load_dwordx4"), 15267ec681f3Smrg (0x18, 0x18, 0x18, "flat_store_byte"), 15277ec681f3Smrg ( -1, 0x19, 0x19, "flat_store_byte_d16_hi"), 15287ec681f3Smrg (0x1a, 0x1a, 0x1a, "flat_store_short"), 15297ec681f3Smrg ( -1, 0x1b, 0x1b, "flat_store_short_d16_hi"), 15307ec681f3Smrg (0x1c, 0x1c, 0x1c, "flat_store_dword"), 15317ec681f3Smrg (0x1d, 0x1d, 0x1d, "flat_store_dwordx2"), 15327ec681f3Smrg (0x1f, 0x1e, 0x1f, "flat_store_dwordx3"), 15337ec681f3Smrg (0x1e, 0x1f, 0x1e, "flat_store_dwordx4"), 15347ec681f3Smrg ( -1, 0x20, 0x20, "flat_load_ubyte_d16"), 15357ec681f3Smrg ( -1, 0x21, 0x21, "flat_load_ubyte_d16_hi"), 15367ec681f3Smrg ( -1, 0x22, 0x22, "flat_load_sbyte_d16"), 15377ec681f3Smrg ( -1, 0x23, 0x23, "flat_load_sbyte_d16_hi"), 15387ec681f3Smrg ( -1, 0x24, 0x24, "flat_load_short_d16"), 15397ec681f3Smrg ( -1, 0x25, 0x25, "flat_load_short_d16_hi"), 15407ec681f3Smrg (0x30, 0x40, 0x30, "flat_atomic_swap"), 15417ec681f3Smrg (0x31, 0x41, 0x31, "flat_atomic_cmpswap"), 15427ec681f3Smrg (0x32, 0x42, 0x32, "flat_atomic_add"), 15437ec681f3Smrg (0x33, 0x43, 0x33, "flat_atomic_sub"), 15447ec681f3Smrg (0x35, 0x44, 0x35, "flat_atomic_smin"), 15457ec681f3Smrg (0x36, 0x45, 0x36, "flat_atomic_umin"), 15467ec681f3Smrg (0x37, 0x46, 0x37, "flat_atomic_smax"), 15477ec681f3Smrg (0x38, 0x47, 0x38, "flat_atomic_umax"), 15487ec681f3Smrg (0x39, 0x48, 0x39, "flat_atomic_and"), 15497ec681f3Smrg (0x3a, 0x49, 0x3a, "flat_atomic_or"), 15507ec681f3Smrg (0x3b, 0x4a, 0x3b, "flat_atomic_xor"), 15517ec681f3Smrg (0x3c, 0x4b, 0x3c, "flat_atomic_inc"), 15527ec681f3Smrg (0x3d, 0x4c, 0x3d, "flat_atomic_dec"), 15537ec681f3Smrg (0x3e, -1, 0x3e, "flat_atomic_fcmpswap"), 15547ec681f3Smrg (0x3f, -1, 0x3f, "flat_atomic_fmin"), 15557ec681f3Smrg (0x40, -1, 0x40, "flat_atomic_fmax"), 15567ec681f3Smrg (0x50, 0x60, 0x50, "flat_atomic_swap_x2"), 15577ec681f3Smrg (0x51, 0x61, 0x51, "flat_atomic_cmpswap_x2"), 15587ec681f3Smrg (0x52, 0x62, 0x52, "flat_atomic_add_x2"), 15597ec681f3Smrg (0x53, 0x63, 0x53, "flat_atomic_sub_x2"), 15607ec681f3Smrg (0x55, 0x64, 0x55, "flat_atomic_smin_x2"), 15617ec681f3Smrg (0x56, 0x65, 0x56, "flat_atomic_umin_x2"), 15627ec681f3Smrg (0x57, 0x66, 0x57, "flat_atomic_smax_x2"), 15637ec681f3Smrg (0x58, 0x67, 0x58, "flat_atomic_umax_x2"), 15647ec681f3Smrg (0x59, 0x68, 0x59, "flat_atomic_and_x2"), 15657ec681f3Smrg (0x5a, 0x69, 0x5a, "flat_atomic_or_x2"), 15667ec681f3Smrg (0x5b, 0x6a, 0x5b, "flat_atomic_xor_x2"), 15677ec681f3Smrg (0x5c, 0x6b, 0x5c, "flat_atomic_inc_x2"), 15687ec681f3Smrg (0x5d, 0x6c, 0x5d, "flat_atomic_dec_x2"), 15697ec681f3Smrg (0x5e, -1, 0x5e, "flat_atomic_fcmpswap_x2"), 15707ec681f3Smrg (0x5f, -1, 0x5f, "flat_atomic_fmin_x2"), 15717ec681f3Smrg (0x60, -1, 0x60, "flat_atomic_fmax_x2"), 15727ec681f3Smrg} 15737ec681f3Smrgfor (gfx7, gfx8, gfx10, name) in FLAT: 15747ec681f3Smrg opcode(name, gfx7, gfx8, gfx10, Format.FLAT, InstrClass.VMem, is_atomic = "atomic" in name) #TODO: also LDS? 15757ec681f3Smrg 15767ec681f3SmrgGLOBAL = { 15777ec681f3Smrg #GFX8_9, GFX10 15787ec681f3Smrg (0x10, 0x08, "global_load_ubyte"), 15797ec681f3Smrg (0x11, 0x09, "global_load_sbyte"), 15807ec681f3Smrg (0x12, 0x0a, "global_load_ushort"), 15817ec681f3Smrg (0x13, 0x0b, "global_load_sshort"), 15827ec681f3Smrg (0x14, 0x0c, "global_load_dword"), 15837ec681f3Smrg (0x15, 0x0d, "global_load_dwordx2"), 15847ec681f3Smrg (0x16, 0x0f, "global_load_dwordx3"), 15857ec681f3Smrg (0x17, 0x0e, "global_load_dwordx4"), 15867ec681f3Smrg (0x18, 0x18, "global_store_byte"), 15877ec681f3Smrg (0x19, 0x19, "global_store_byte_d16_hi"), 15887ec681f3Smrg (0x1a, 0x1a, "global_store_short"), 15897ec681f3Smrg (0x1b, 0x1b, "global_store_short_d16_hi"), 15907ec681f3Smrg (0x1c, 0x1c, "global_store_dword"), 15917ec681f3Smrg (0x1d, 0x1d, "global_store_dwordx2"), 15927ec681f3Smrg (0x1e, 0x1f, "global_store_dwordx3"), 15937ec681f3Smrg (0x1f, 0x1e, "global_store_dwordx4"), 15947ec681f3Smrg (0x20, 0x20, "global_load_ubyte_d16"), 15957ec681f3Smrg (0x21, 0x21, "global_load_ubyte_d16_hi"), 15967ec681f3Smrg (0x22, 0x22, "global_load_sbyte_d16"), 15977ec681f3Smrg (0x23, 0x23, "global_load_sbyte_d16_hi"), 15987ec681f3Smrg (0x24, 0x24, "global_load_short_d16"), 15997ec681f3Smrg (0x25, 0x25, "global_load_short_d16_hi"), 16007ec681f3Smrg (0x40, 0x30, "global_atomic_swap"), 16017ec681f3Smrg (0x41, 0x31, "global_atomic_cmpswap"), 16027ec681f3Smrg (0x42, 0x32, "global_atomic_add"), 16037ec681f3Smrg (0x43, 0x33, "global_atomic_sub"), 16047ec681f3Smrg (0x44, 0x35, "global_atomic_smin"), 16057ec681f3Smrg (0x45, 0x36, "global_atomic_umin"), 16067ec681f3Smrg (0x46, 0x37, "global_atomic_smax"), 16077ec681f3Smrg (0x47, 0x38, "global_atomic_umax"), 16087ec681f3Smrg (0x48, 0x39, "global_atomic_and"), 16097ec681f3Smrg (0x49, 0x3a, "global_atomic_or"), 16107ec681f3Smrg (0x4a, 0x3b, "global_atomic_xor"), 16117ec681f3Smrg (0x4b, 0x3c, "global_atomic_inc"), 16127ec681f3Smrg (0x4c, 0x3d, "global_atomic_dec"), 16137ec681f3Smrg ( -1, 0x3e, "global_atomic_fcmpswap"), 16147ec681f3Smrg ( -1, 0x3f, "global_atomic_fmin"), 16157ec681f3Smrg ( -1, 0x40, "global_atomic_fmax"), 16167ec681f3Smrg (0x60, 0x50, "global_atomic_swap_x2"), 16177ec681f3Smrg (0x61, 0x51, "global_atomic_cmpswap_x2"), 16187ec681f3Smrg (0x62, 0x52, "global_atomic_add_x2"), 16197ec681f3Smrg (0x63, 0x53, "global_atomic_sub_x2"), 16207ec681f3Smrg (0x64, 0x55, "global_atomic_smin_x2"), 16217ec681f3Smrg (0x65, 0x56, "global_atomic_umin_x2"), 16227ec681f3Smrg (0x66, 0x57, "global_atomic_smax_x2"), 16237ec681f3Smrg (0x67, 0x58, "global_atomic_umax_x2"), 16247ec681f3Smrg (0x68, 0x59, "global_atomic_and_x2"), 16257ec681f3Smrg (0x69, 0x5a, "global_atomic_or_x2"), 16267ec681f3Smrg (0x6a, 0x5b, "global_atomic_xor_x2"), 16277ec681f3Smrg (0x6b, 0x5c, "global_atomic_inc_x2"), 16287ec681f3Smrg (0x6c, 0x5d, "global_atomic_dec_x2"), 16297ec681f3Smrg ( -1, 0x5e, "global_atomic_fcmpswap_x2"), 16307ec681f3Smrg ( -1, 0x5f, "global_atomic_fmin_x2"), 16317ec681f3Smrg ( -1, 0x60, "global_atomic_fmax_x2"), 16327ec681f3Smrg ( -1, 0x16, "global_load_dword_addtid"), #GFX10.3+ 16337ec681f3Smrg ( -1, 0x17, "global_store_dword_addtid"), #GFX10.3+ 16347ec681f3Smrg ( -1, 0x34, "global_atomic_csub"), #GFX10.3+. seems glc must be set 16357ec681f3Smrg} 16367ec681f3Smrgfor (gfx8, gfx10, name) in GLOBAL: 16377ec681f3Smrg opcode(name, -1, gfx8, gfx10, Format.GLOBAL, InstrClass.VMem, is_atomic = "atomic" in name) 16387ec681f3Smrg 16397ec681f3SmrgSCRATCH = { 16407ec681f3Smrg #GFX8_9, GFX10 16417ec681f3Smrg (0x10, 0x08, "scratch_load_ubyte"), 16427ec681f3Smrg (0x11, 0x09, "scratch_load_sbyte"), 16437ec681f3Smrg (0x12, 0x0a, "scratch_load_ushort"), 16447ec681f3Smrg (0x13, 0x0b, "scratch_load_sshort"), 16457ec681f3Smrg (0x14, 0x0c, "scratch_load_dword"), 16467ec681f3Smrg (0x15, 0x0d, "scratch_load_dwordx2"), 16477ec681f3Smrg (0x16, 0x0f, "scratch_load_dwordx3"), 16487ec681f3Smrg (0x17, 0x0e, "scratch_load_dwordx4"), 16497ec681f3Smrg (0x18, 0x18, "scratch_store_byte"), 16507ec681f3Smrg (0x19, 0x19, "scratch_store_byte_d16_hi"), 16517ec681f3Smrg (0x1a, 0x1a, "scratch_store_short"), 16527ec681f3Smrg (0x1b, 0x1b, "scratch_store_short_d16_hi"), 16537ec681f3Smrg (0x1c, 0x1c, "scratch_store_dword"), 16547ec681f3Smrg (0x1d, 0x1d, "scratch_store_dwordx2"), 16557ec681f3Smrg (0x1e, 0x1f, "scratch_store_dwordx3"), 16567ec681f3Smrg (0x1f, 0x1e, "scratch_store_dwordx4"), 16577ec681f3Smrg (0x20, 0x20, "scratch_load_ubyte_d16"), 16587ec681f3Smrg (0x21, 0x21, "scratch_load_ubyte_d16_hi"), 16597ec681f3Smrg (0x22, 0x22, "scratch_load_sbyte_d16"), 16607ec681f3Smrg (0x23, 0x23, "scratch_load_sbyte_d16_hi"), 16617ec681f3Smrg (0x24, 0x24, "scratch_load_short_d16"), 16627ec681f3Smrg (0x25, 0x25, "scratch_load_short_d16_hi"), 16637ec681f3Smrg} 16647ec681f3Smrgfor (gfx8, gfx10, name) in SCRATCH: 16657ec681f3Smrg opcode(name, -1, gfx8, gfx10, Format.SCRATCH, InstrClass.VMem) 16667ec681f3Smrg 16677ec681f3Smrg# check for duplicate opcode numbers 16687ec681f3Smrgfor ver in ['gfx9', 'gfx10']: 16697ec681f3Smrg op_to_name = {} 16707ec681f3Smrg for op in opcodes.values(): 16717ec681f3Smrg if op.format in [Format.PSEUDO, Format.PSEUDO_BRANCH, Format.PSEUDO_BARRIER, Format.PSEUDO_REDUCTION]: 16727ec681f3Smrg continue 16737ec681f3Smrg 16747ec681f3Smrg num = getattr(op, 'opcode_' + ver) 16757ec681f3Smrg if num == -1: 16767ec681f3Smrg continue 16777ec681f3Smrg 16787ec681f3Smrg key = (op.format, num) 16797ec681f3Smrg 16807ec681f3Smrg if key in op_to_name: 16817ec681f3Smrg # exceptions 16827ec681f3Smrg names = set([op_to_name[key], op.name]) 16837ec681f3Smrg if ver in ['gfx8', 'gfx9'] and names == set(['v_mul_lo_i32', 'v_mul_lo_u32']): 16847ec681f3Smrg continue 16857ec681f3Smrg # v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3 16867ec681f3Smrg if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']): 16877ec681f3Smrg continue 16887ec681f3Smrg 16897ec681f3Smrg print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver)) 16907ec681f3Smrg sys.exit(1) 16917ec681f3Smrg else: 16927ec681f3Smrg op_to_name[key] = op.name 16937ec681f3Smrg 1694