17ec681f3Smrg#
27ec681f3Smrg# Copyright (c) 2018 Valve Corporation
37ec681f3Smrg#
47ec681f3Smrg# Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg# copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg# to deal in the Software without restriction, including without limitation
77ec681f3Smrg# the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg# and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg# Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg#
117ec681f3Smrg# The above copyright notice and this permission notice (including the next
127ec681f3Smrg# paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg# Software.
147ec681f3Smrg#
157ec681f3Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg# IN THE SOFTWARE.
227ec681f3Smrg#
237ec681f3Smrg
247ec681f3Smrg# Class that represents all the information we have about the opcode
257ec681f3Smrg# NOTE: this must be kept in sync with aco_op_info
267ec681f3Smrg
277ec681f3Smrgimport sys
287ec681f3Smrgfrom enum import Enum
297ec681f3Smrg
307ec681f3Smrgclass InstrClass(Enum):
317ec681f3Smrg   Valu32 = 0
327ec681f3Smrg   ValuConvert32 = 1
337ec681f3Smrg   Valu64 = 2
347ec681f3Smrg   ValuQuarterRate32 = 3
357ec681f3Smrg   ValuFma = 4
367ec681f3Smrg   ValuTranscendental32 = 5
377ec681f3Smrg   ValuDouble = 6
387ec681f3Smrg   ValuDoubleAdd = 7
397ec681f3Smrg   ValuDoubleConvert = 8
407ec681f3Smrg   ValuDoubleTranscendental = 9
417ec681f3Smrg   Salu = 10
427ec681f3Smrg   SMem = 11
437ec681f3Smrg   Barrier = 12
447ec681f3Smrg   Branch = 13
457ec681f3Smrg   Sendmsg = 14
467ec681f3Smrg   DS = 15
477ec681f3Smrg   Export = 16
487ec681f3Smrg   VMem = 17
497ec681f3Smrg   Waitcnt = 18
507ec681f3Smrg   Other = 19
517ec681f3Smrg
527ec681f3Smrgclass Format(Enum):
537ec681f3Smrg   PSEUDO = 0
547ec681f3Smrg   SOP1 = 1
557ec681f3Smrg   SOP2 = 2
567ec681f3Smrg   SOPK = 3
577ec681f3Smrg   SOPP = 4
587ec681f3Smrg   SOPC = 5
597ec681f3Smrg   SMEM = 6
607ec681f3Smrg   DS = 8
617ec681f3Smrg   MTBUF = 9
627ec681f3Smrg   MUBUF = 10
637ec681f3Smrg   MIMG = 11
647ec681f3Smrg   EXP = 12
657ec681f3Smrg   FLAT = 13
667ec681f3Smrg   GLOBAL = 14
677ec681f3Smrg   SCRATCH = 15
687ec681f3Smrg   PSEUDO_BRANCH = 16
697ec681f3Smrg   PSEUDO_BARRIER = 17
707ec681f3Smrg   PSEUDO_REDUCTION = 18
717ec681f3Smrg   VOP3P = 19
727ec681f3Smrg   VOP1 = 1 << 8
737ec681f3Smrg   VOP2 = 1 << 9
747ec681f3Smrg   VOPC = 1 << 10
757ec681f3Smrg   VOP3 = 1 << 11
767ec681f3Smrg   VINTRP = 1 << 12
777ec681f3Smrg   DPP = 1 << 13
787ec681f3Smrg   SDWA = 1 << 14
797ec681f3Smrg
807ec681f3Smrg   def get_builder_fields(self):
817ec681f3Smrg      if self == Format.SOPK:
827ec681f3Smrg         return [('uint16_t', 'imm', None)]
837ec681f3Smrg      elif self == Format.SOPP:
847ec681f3Smrg         return [('uint32_t', 'block', '-1'),
857ec681f3Smrg                 ('uint32_t', 'imm', '0')]
867ec681f3Smrg      elif self == Format.SMEM:
877ec681f3Smrg         return [('memory_sync_info', 'sync', 'memory_sync_info()'),
887ec681f3Smrg                 ('bool', 'glc', 'false'),
897ec681f3Smrg                 ('bool', 'dlc', 'false'),
907ec681f3Smrg                 ('bool', 'nv', 'false')]
917ec681f3Smrg      elif self == Format.DS:
927ec681f3Smrg         return [('int16_t', 'offset0', '0'),
937ec681f3Smrg                 ('int8_t', 'offset1', '0'),
947ec681f3Smrg                 ('bool', 'gds', 'false')]
957ec681f3Smrg      elif self == Format.MTBUF:
967ec681f3Smrg         return [('unsigned', 'dfmt', None),
977ec681f3Smrg                 ('unsigned', 'nfmt', None),
987ec681f3Smrg                 ('unsigned', 'offset', None),
997ec681f3Smrg                 ('bool', 'offen', None),
1007ec681f3Smrg                 ('bool', 'idxen', 'false'),
1017ec681f3Smrg                 ('bool', 'disable_wqm', 'false'),
1027ec681f3Smrg                 ('bool', 'glc', 'false'),
1037ec681f3Smrg                 ('bool', 'dlc', 'false'),
1047ec681f3Smrg                 ('bool', 'slc', 'false'),
1057ec681f3Smrg                 ('bool', 'tfe', 'false')]
1067ec681f3Smrg      elif self == Format.MUBUF:
1077ec681f3Smrg         return [('unsigned', 'offset', None),
1087ec681f3Smrg                 ('bool', 'offen', None),
1097ec681f3Smrg                 ('bool', 'swizzled', 'false'),
1107ec681f3Smrg                 ('bool', 'idxen', 'false'),
1117ec681f3Smrg                 ('bool', 'addr64', 'false'),
1127ec681f3Smrg                 ('bool', 'disable_wqm', 'false'),
1137ec681f3Smrg                 ('bool', 'glc', 'false'),
1147ec681f3Smrg                 ('bool', 'dlc', 'false'),
1157ec681f3Smrg                 ('bool', 'slc', 'false'),
1167ec681f3Smrg                 ('bool', 'tfe', 'false'),
1177ec681f3Smrg                 ('bool', 'lds', 'false')]
1187ec681f3Smrg      elif self == Format.MIMG:
1197ec681f3Smrg         return [('unsigned', 'dmask', '0xF'),
1207ec681f3Smrg                 ('bool', 'da', 'false'),
1217ec681f3Smrg                 ('bool', 'unrm', 'true'),
1227ec681f3Smrg                 ('bool', 'disable_wqm', 'false'),
1237ec681f3Smrg                 ('bool', 'glc', 'false'),
1247ec681f3Smrg                 ('bool', 'dlc', 'false'),
1257ec681f3Smrg                 ('bool', 'slc', 'false'),
1267ec681f3Smrg                 ('bool', 'tfe', 'false'),
1277ec681f3Smrg                 ('bool', 'lwe', 'false'),
1287ec681f3Smrg                 ('bool', 'r128_a16', 'false', 'r128'),
1297ec681f3Smrg                 ('bool', 'd16', 'false')]
1307ec681f3Smrg         return [('unsigned', 'attribute', None),
1317ec681f3Smrg                 ('unsigned', 'component', None)]
1327ec681f3Smrg      elif self == Format.EXP:
1337ec681f3Smrg         return [('unsigned', 'enabled_mask', None),
1347ec681f3Smrg                 ('unsigned', 'dest', None),
1357ec681f3Smrg                 ('bool', 'compr', 'false', 'compressed'),
1367ec681f3Smrg                 ('bool', 'done', 'false'),
1377ec681f3Smrg                 ('bool', 'vm', 'false', 'valid_mask')]
1387ec681f3Smrg      elif self == Format.PSEUDO_BRANCH:
1397ec681f3Smrg         return [('uint32_t', 'target0', '0', 'target[0]'),
1407ec681f3Smrg                 ('uint32_t', 'target1', '0', 'target[1]')]
1417ec681f3Smrg      elif self == Format.PSEUDO_REDUCTION:
1427ec681f3Smrg         return [('ReduceOp', 'op', None, 'reduce_op'),
1437ec681f3Smrg                 ('unsigned', 'cluster_size', '0')]
1447ec681f3Smrg      elif self == Format.PSEUDO_BARRIER:
1457ec681f3Smrg         return [('memory_sync_info', 'sync', None),
1467ec681f3Smrg                 ('sync_scope', 'exec_scope', 'scope_invocation')]
1477ec681f3Smrg      elif self == Format.VINTRP:
1487ec681f3Smrg         return [('unsigned', 'attribute', None),
1497ec681f3Smrg                 ('unsigned', 'component', None)]
1507ec681f3Smrg      elif self == Format.DPP:
1517ec681f3Smrg         return [('uint16_t', 'dpp_ctrl', None),
1527ec681f3Smrg                 ('uint8_t', 'row_mask', '0xF'),
1537ec681f3Smrg                 ('uint8_t', 'bank_mask', '0xF'),
1547ec681f3Smrg                 ('bool', 'bound_ctrl', 'true')]
1557ec681f3Smrg      elif self == Format.VOP3P:
1567ec681f3Smrg         return [('uint8_t', 'opsel_lo', None),
1577ec681f3Smrg                 ('uint8_t', 'opsel_hi', None)]
1587ec681f3Smrg      elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]:
1597ec681f3Smrg         return [('uint16_t', 'offset', 0),
1607ec681f3Smrg                 ('memory_sync_info', 'sync', 'memory_sync_info()'),
1617ec681f3Smrg                 ('bool', 'glc', 'false'),
1627ec681f3Smrg                 ('bool', 'slc', 'false'),
1637ec681f3Smrg                 ('bool', 'lds', 'false'),
1647ec681f3Smrg                 ('bool', 'nv', 'false')]
1657ec681f3Smrg      else:
1667ec681f3Smrg         return []
1677ec681f3Smrg
1687ec681f3Smrg   def get_builder_field_names(self):
1697ec681f3Smrg      return [f[1] for f in self.get_builder_fields()]
1707ec681f3Smrg
1717ec681f3Smrg   def get_builder_field_dests(self):
1727ec681f3Smrg      return [(f[3] if len(f) >= 4 else f[1]) for f in self.get_builder_fields()]
1737ec681f3Smrg
1747ec681f3Smrg   def get_builder_field_decls(self):
1757ec681f3Smrg      return [('%s %s=%s' % (f[0], f[1], f[2]) if f[2] != None else '%s %s' % (f[0], f[1])) for f in self.get_builder_fields()]
1767ec681f3Smrg
1777ec681f3Smrg   def get_builder_initialization(self, num_operands):
1787ec681f3Smrg      res = ''
1797ec681f3Smrg      if self == Format.SDWA:
1807ec681f3Smrg         for i in range(min(num_operands, 2)):
1817ec681f3Smrg            res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i)
1827ec681f3Smrg         res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n'
1837ec681f3Smrg      return res
1847ec681f3Smrg
1857ec681f3Smrg
1867ec681f3Smrgclass Opcode(object):
1877ec681f3Smrg   """Class that represents all the information we have about the opcode
1887ec681f3Smrg   NOTE: this must be kept in sync with aco_op_info
1897ec681f3Smrg   """
1907ec681f3Smrg   def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls):
1917ec681f3Smrg      """Parameters:
1927ec681f3Smrg
1937ec681f3Smrg      - name is the name of the opcode (prepend nir_op_ for the enum name)
1947ec681f3Smrg      - all types are strings that get nir_type_ prepended to them
1957ec681f3Smrg      - input_types is a list of types
1967ec681f3Smrg      - algebraic_properties is a space-seperated string, where nir_op_is_ is
1977ec681f3Smrg        prepended before each entry
1987ec681f3Smrg      - const_expr is an expression or series of statements that computes the
1997ec681f3Smrg        constant value of the opcode given the constant values of its inputs.
2007ec681f3Smrg      """
2017ec681f3Smrg      assert isinstance(name, str)
2027ec681f3Smrg      assert isinstance(opcode_gfx7, int)
2037ec681f3Smrg      assert isinstance(opcode_gfx9, int)
2047ec681f3Smrg      assert isinstance(opcode_gfx10, int)
2057ec681f3Smrg      assert isinstance(format, Format)
2067ec681f3Smrg      assert isinstance(input_mod, bool)
2077ec681f3Smrg      assert isinstance(output_mod, bool)
2087ec681f3Smrg
2097ec681f3Smrg      self.name = name
2107ec681f3Smrg      self.opcode_gfx7 = opcode_gfx7
2117ec681f3Smrg      self.opcode_gfx9 = opcode_gfx9
2127ec681f3Smrg      self.opcode_gfx10 = opcode_gfx10
2137ec681f3Smrg      self.input_mod = "1" if input_mod else "0"
2147ec681f3Smrg      self.output_mod = "1" if output_mod else "0"
2157ec681f3Smrg      self.is_atomic = "1" if is_atomic else "0"
2167ec681f3Smrg      self.format = format
2177ec681f3Smrg      self.cls = cls
2187ec681f3Smrg
2197ec681f3Smrg      parts = name.replace('_e64', '').rsplit('_', 2)
2207ec681f3Smrg      op_dtype = parts[-1]
2217ec681f3Smrg
2227ec681f3Smrg      op_dtype_sizes = {'{}{}'.format(prefix, size) : size for prefix in 'biuf' for size in [64, 32, 24, 16]}
2237ec681f3Smrg      # inline constants are 32-bit for 16-bit integer/typeless instructions: https://reviews.llvm.org/D81841
2247ec681f3Smrg      op_dtype_sizes['b16'] = 32
2257ec681f3Smrg      op_dtype_sizes['i16'] = 32
2267ec681f3Smrg      op_dtype_sizes['u16'] = 32
2277ec681f3Smrg
2287ec681f3Smrg      # If we can't tell the operand size, default to 32.
2297ec681f3Smrg      self.operand_size = op_dtype_sizes.get(op_dtype, 32)
2307ec681f3Smrg
2317ec681f3Smrg      # exceptions for operands:
2327ec681f3Smrg      if 'qsad_' in name:
2337ec681f3Smrg        self.operand_size = 0
2347ec681f3Smrg      elif 'sad_' in name:
2357ec681f3Smrg        self.operand_size = 32
2367ec681f3Smrg      elif name in ['v_mad_u64_u32', 'v_mad_i64_i32']:
2377ec681f3Smrg        self.operand_size = 0
2387ec681f3Smrg      elif self.operand_size == 24:
2397ec681f3Smrg        self.operand_size = 32
2407ec681f3Smrg      elif op_dtype == 'u8' or op_dtype == 'i8':
2417ec681f3Smrg        self.operand_size = 32
2427ec681f3Smrg      elif name in ['v_cvt_f32_ubyte0', 'v_cvt_f32_ubyte1',
2437ec681f3Smrg                    'v_cvt_f32_ubyte2', 'v_cvt_f32_ubyte3']:
2447ec681f3Smrg        self.operand_size = 32
2457ec681f3Smrg
2467ec681f3Smrg# global dictionary of opcodes
2477ec681f3Smrgopcodes = {}
2487ec681f3Smrg
2497ec681f3Smrgdef opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, cls = InstrClass.Other, input_mod = False, output_mod = False, is_atomic = False):
2507ec681f3Smrg   assert name not in opcodes
2517ec681f3Smrg   opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls)
2527ec681f3Smrg
2537ec681f3Smrgdef default_class(opcodes, cls):
2547ec681f3Smrg   for op in opcodes:
2557ec681f3Smrg      if isinstance(op[-1], InstrClass):
2567ec681f3Smrg         yield op
2577ec681f3Smrg      else:
2587ec681f3Smrg         yield op + (cls,)
2597ec681f3Smrg
2607ec681f3Smrgopcode("exp", 0, 0, 0, format = Format.EXP, cls = InstrClass.Export)
2617ec681f3Smrgopcode("p_parallelcopy")
2627ec681f3Smrgopcode("p_startpgm")
2637ec681f3Smrgopcode("p_phi")
2647ec681f3Smrgopcode("p_linear_phi")
2657ec681f3Smrgopcode("p_as_uniform")
2667ec681f3Smrgopcode("p_unit_test")
2677ec681f3Smrg
2687ec681f3Smrgopcode("p_create_vector")
2697ec681f3Smrgopcode("p_extract_vector")
2707ec681f3Smrgopcode("p_split_vector")
2717ec681f3Smrg
2727ec681f3Smrg# start/end the parts where we can use exec based instructions
2737ec681f3Smrg# implicitly
2747ec681f3Smrgopcode("p_logical_start")
2757ec681f3Smrgopcode("p_logical_end")
2767ec681f3Smrg
2777ec681f3Smrg# e.g. subgroupMin() in SPIR-V
2787ec681f3Smrgopcode("p_reduce", format=Format.PSEUDO_REDUCTION)
2797ec681f3Smrg# e.g. subgroupInclusiveMin()
2807ec681f3Smrgopcode("p_inclusive_scan", format=Format.PSEUDO_REDUCTION)
2817ec681f3Smrg# e.g. subgroupExclusiveMin()
2827ec681f3Smrgopcode("p_exclusive_scan", format=Format.PSEUDO_REDUCTION)
2837ec681f3Smrg
2847ec681f3Smrgopcode("p_branch", format=Format.PSEUDO_BRANCH)
2857ec681f3Smrgopcode("p_cbranch", format=Format.PSEUDO_BRANCH)
2867ec681f3Smrgopcode("p_cbranch_z", format=Format.PSEUDO_BRANCH)
2877ec681f3Smrgopcode("p_cbranch_nz", format=Format.PSEUDO_BRANCH)
2887ec681f3Smrg
2897ec681f3Smrgopcode("p_barrier", format=Format.PSEUDO_BARRIER)
2907ec681f3Smrg
2917ec681f3Smrgopcode("p_spill")
2927ec681f3Smrgopcode("p_reload")
2937ec681f3Smrg
2947ec681f3Smrg# start/end linear vgprs
2957ec681f3Smrgopcode("p_start_linear_vgpr")
2967ec681f3Smrgopcode("p_end_linear_vgpr")
2977ec681f3Smrg
2987ec681f3Smrgopcode("p_wqm")
2997ec681f3Smrgopcode("p_discard_if")
3007ec681f3Smrgopcode("p_demote_to_helper")
3017ec681f3Smrgopcode("p_is_helper")
3027ec681f3Smrgopcode("p_exit_early_if")
3037ec681f3Smrg
3047ec681f3Smrg# simulates proper bpermute behavior when it's unsupported, eg. GFX10 wave64
3057ec681f3Smrgopcode("p_bpermute")
3067ec681f3Smrg
3077ec681f3Smrg# creates a lane mask where only the first active lane is selected
3087ec681f3Smrgopcode("p_elect")
3097ec681f3Smrg
3107ec681f3Smrgopcode("p_constaddr")
3117ec681f3Smrg
3127ec681f3Smrg# These don't have to be pseudo-ops, but it makes optimization easier to only
3137ec681f3Smrg# have to consider two instructions.
3147ec681f3Smrg# (src0 >> (index * bits)) & ((1 << bits) - 1) with optional sign extension
3157ec681f3Smrgopcode("p_extract") # src1=index, src2=bits, src3=signext
3167ec681f3Smrg# (src0 & ((1 << bits) - 1)) << (index * bits)
3177ec681f3Smrgopcode("p_insert") # src1=index, src2=bits
3187ec681f3Smrg
3197ec681f3Smrg
3207ec681f3Smrg# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
3217ec681f3SmrgSOP2 = {
3227ec681f3Smrg  # GFX6, GFX7, GFX8, GFX9, GFX10, name
3237ec681f3Smrg   (0x00, 0x00, 0x00, 0x00, 0x00, "s_add_u32"),
3247ec681f3Smrg   (0x01, 0x01, 0x01, 0x01, 0x01, "s_sub_u32"),
3257ec681f3Smrg   (0x02, 0x02, 0x02, 0x02, 0x02, "s_add_i32"),
3267ec681f3Smrg   (0x03, 0x03, 0x03, 0x03, 0x03, "s_sub_i32"),
3277ec681f3Smrg   (0x04, 0x04, 0x04, 0x04, 0x04, "s_addc_u32"),
3287ec681f3Smrg   (0x05, 0x05, 0x05, 0x05, 0x05, "s_subb_u32"),
3297ec681f3Smrg   (0x06, 0x06, 0x06, 0x06, 0x06, "s_min_i32"),
3307ec681f3Smrg   (0x07, 0x07, 0x07, 0x07, 0x07, "s_min_u32"),
3317ec681f3Smrg   (0x08, 0x08, 0x08, 0x08, 0x08, "s_max_i32"),
3327ec681f3Smrg   (0x09, 0x09, 0x09, 0x09, 0x09, "s_max_u32"),
3337ec681f3Smrg   (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cselect_b32"),
3347ec681f3Smrg   (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cselect_b64"),
3357ec681f3Smrg   (0x0e, 0x0e, 0x0c, 0x0c, 0x0e, "s_and_b32"),
3367ec681f3Smrg   (0x0f, 0x0f, 0x0d, 0x0d, 0x0f, "s_and_b64"),
3377ec681f3Smrg   (0x10, 0x10, 0x0e, 0x0e, 0x10, "s_or_b32"),
3387ec681f3Smrg   (0x11, 0x11, 0x0f, 0x0f, 0x11, "s_or_b64"),
3397ec681f3Smrg   (0x12, 0x12, 0x10, 0x10, 0x12, "s_xor_b32"),
3407ec681f3Smrg   (0x13, 0x13, 0x11, 0x11, 0x13, "s_xor_b64"),
3417ec681f3Smrg   (0x14, 0x14, 0x12, 0x12, 0x14, "s_andn2_b32"),
3427ec681f3Smrg   (0x15, 0x15, 0x13, 0x13, 0x15, "s_andn2_b64"),
3437ec681f3Smrg   (0x16, 0x16, 0x14, 0x14, 0x16, "s_orn2_b32"),
3447ec681f3Smrg   (0x17, 0x17, 0x15, 0x15, 0x17, "s_orn2_b64"),
3457ec681f3Smrg   (0x18, 0x18, 0x16, 0x16, 0x18, "s_nand_b32"),
3467ec681f3Smrg   (0x19, 0x19, 0x17, 0x17, 0x19, "s_nand_b64"),
3477ec681f3Smrg   (0x1a, 0x1a, 0x18, 0x18, 0x1a, "s_nor_b32"),
3487ec681f3Smrg   (0x1b, 0x1b, 0x19, 0x19, 0x1b, "s_nor_b64"),
3497ec681f3Smrg   (0x1c, 0x1c, 0x1a, 0x1a, 0x1c, "s_xnor_b32"),
3507ec681f3Smrg   (0x1d, 0x1d, 0x1b, 0x1b, 0x1d, "s_xnor_b64"),
3517ec681f3Smrg   (0x1e, 0x1e, 0x1c, 0x1c, 0x1e, "s_lshl_b32"),
3527ec681f3Smrg   (0x1f, 0x1f, 0x1d, 0x1d, 0x1f, "s_lshl_b64"),
3537ec681f3Smrg   (0x20, 0x20, 0x1e, 0x1e, 0x20, "s_lshr_b32"),
3547ec681f3Smrg   (0x21, 0x21, 0x1f, 0x1f, 0x21, "s_lshr_b64"),
3557ec681f3Smrg   (0x22, 0x22, 0x20, 0x20, 0x22, "s_ashr_i32"),
3567ec681f3Smrg   (0x23, 0x23, 0x21, 0x21, 0x23, "s_ashr_i64"),
3577ec681f3Smrg   (0x24, 0x24, 0x22, 0x22, 0x24, "s_bfm_b32"),
3587ec681f3Smrg   (0x25, 0x25, 0x23, 0x23, 0x25, "s_bfm_b64"),
3597ec681f3Smrg   (0x26, 0x26, 0x24, 0x24, 0x26, "s_mul_i32"),
3607ec681f3Smrg   (0x27, 0x27, 0x25, 0x25, 0x27, "s_bfe_u32"),
3617ec681f3Smrg   (0x28, 0x28, 0x26, 0x26, 0x28, "s_bfe_i32"),
3627ec681f3Smrg   (0x29, 0x29, 0x27, 0x27, 0x29, "s_bfe_u64"),
3637ec681f3Smrg   (0x2a, 0x2a, 0x28, 0x28, 0x2a, "s_bfe_i64"),
3647ec681f3Smrg   (0x2b, 0x2b, 0x29, 0x29,   -1, "s_cbranch_g_fork", InstrClass.Branch),
3657ec681f3Smrg   (0x2c, 0x2c, 0x2a, 0x2a, 0x2c, "s_absdiff_i32"),
3667ec681f3Smrg   (  -1,   -1, 0x2b, 0x2b,   -1, "s_rfe_restore_b64", InstrClass.Branch),
3677ec681f3Smrg   (  -1,   -1,   -1, 0x2e, 0x2e, "s_lshl1_add_u32"),
3687ec681f3Smrg   (  -1,   -1,   -1, 0x2f, 0x2f, "s_lshl2_add_u32"),
3697ec681f3Smrg   (  -1,   -1,   -1, 0x30, 0x30, "s_lshl3_add_u32"),
3707ec681f3Smrg   (  -1,   -1,   -1, 0x31, 0x31, "s_lshl4_add_u32"),
3717ec681f3Smrg   (  -1,   -1,   -1, 0x32, 0x32, "s_pack_ll_b32_b16"),
3727ec681f3Smrg   (  -1,   -1,   -1, 0x33, 0x33, "s_pack_lh_b32_b16"),
3737ec681f3Smrg   (  -1,   -1,   -1, 0x34, 0x34, "s_pack_hh_b32_b16"),
3747ec681f3Smrg   (  -1,   -1,   -1, 0x2c, 0x35, "s_mul_hi_u32"),
3757ec681f3Smrg   (  -1,   -1,   -1, 0x2d, 0x36, "s_mul_hi_i32"),
3767ec681f3Smrg   # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2.
3777ec681f3Smrg   (  -1,   -1,   -1,   -1,   -1, "p_constaddr_addlo"),
3787ec681f3Smrg}
3797ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP2, InstrClass.Salu):
3807ec681f3Smrg    opcode(name, gfx7, gfx9, gfx10, Format.SOP2, cls)
3817ec681f3Smrg
3827ec681f3Smrg
3837ec681f3Smrg# SOPK instructions: 0 input (+ imm), 1 output + optional scc
3847ec681f3SmrgSOPK = {
3857ec681f3Smrg  # GFX6, GFX7, GFX8, GFX9, GFX10, name
3867ec681f3Smrg   (0x00, 0x00, 0x00, 0x00, 0x00, "s_movk_i32"),
3877ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x01, "s_version"), # GFX10+
3887ec681f3Smrg   (0x02, 0x02, 0x01, 0x01, 0x02, "s_cmovk_i32"), # GFX8_GFX9
3897ec681f3Smrg   (0x03, 0x03, 0x02, 0x02, 0x03, "s_cmpk_eq_i32"),
3907ec681f3Smrg   (0x04, 0x04, 0x03, 0x03, 0x04, "s_cmpk_lg_i32"),
3917ec681f3Smrg   (0x05, 0x05, 0x04, 0x04, 0x05, "s_cmpk_gt_i32"),
3927ec681f3Smrg   (0x06, 0x06, 0x05, 0x05, 0x06, "s_cmpk_ge_i32"),
3937ec681f3Smrg   (0x07, 0x07, 0x06, 0x06, 0x07, "s_cmpk_lt_i32"),
3947ec681f3Smrg   (0x08, 0x08, 0x07, 0x07, 0x08, "s_cmpk_le_i32"),
3957ec681f3Smrg   (0x09, 0x09, 0x08, 0x08, 0x09, "s_cmpk_eq_u32"),
3967ec681f3Smrg   (0x0a, 0x0a, 0x09, 0x09, 0x0a, "s_cmpk_lg_u32"),
3977ec681f3Smrg   (0x0b, 0x0b, 0x0a, 0x0a, 0x0b, "s_cmpk_gt_u32"),
3987ec681f3Smrg   (0x0c, 0x0c, 0x0b, 0x0b, 0x0c, "s_cmpk_ge_u32"),
3997ec681f3Smrg   (0x0d, 0x0d, 0x0c, 0x0c, 0x0d, "s_cmpk_lt_u32"),
4007ec681f3Smrg   (0x0e, 0x0e, 0x0d, 0x0d, 0x0e, "s_cmpk_le_u32"),
4017ec681f3Smrg   (0x0f, 0x0f, 0x0e, 0x0e, 0x0f, "s_addk_i32"),
4027ec681f3Smrg   (0x10, 0x10, 0x0f, 0x0f, 0x10, "s_mulk_i32"),
4037ec681f3Smrg   (0x11, 0x11, 0x10, 0x10,   -1, "s_cbranch_i_fork", InstrClass.Branch),
4047ec681f3Smrg   (0x12, 0x12, 0x11, 0x11, 0x12, "s_getreg_b32"),
4057ec681f3Smrg   (0x13, 0x13, 0x12, 0x12, 0x13, "s_setreg_b32"),
4067ec681f3Smrg   (0x15, 0x15, 0x14, 0x14, 0x15, "s_setreg_imm32_b32"), # requires 32bit literal
4077ec681f3Smrg   (  -1,   -1, 0x15, 0x15, 0x16, "s_call_b64", InstrClass.Branch),
4087ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x17, "s_waitcnt_vscnt", InstrClass.Waitcnt),
4097ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x18, "s_waitcnt_vmcnt", InstrClass.Waitcnt),
4107ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x19, "s_waitcnt_expcnt", InstrClass.Waitcnt),
4117ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x1a, "s_waitcnt_lgkmcnt", InstrClass.Waitcnt),
4127ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x1b, "s_subvector_loop_begin", InstrClass.Branch),
4137ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x1c, "s_subvector_loop_end", InstrClass.Branch),
4147ec681f3Smrg}
4157ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPK, InstrClass.Salu):
4167ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.SOPK, cls)
4177ec681f3Smrg
4187ec681f3Smrg
4197ec681f3Smrg# SOP1 instructions: 1 input, 1 output (+optional SCC)
4207ec681f3SmrgSOP1 = {
4217ec681f3Smrg  # GFX6, GFX7, GFX8, GFX9, GFX10, name
4227ec681f3Smrg   (0x03, 0x03, 0x00, 0x00, 0x03, "s_mov_b32"),
4237ec681f3Smrg   (0x04, 0x04, 0x01, 0x01, 0x04, "s_mov_b64"),
4247ec681f3Smrg   (0x05, 0x05, 0x02, 0x02, 0x05, "s_cmov_b32"),
4257ec681f3Smrg   (0x06, 0x06, 0x03, 0x03, 0x06, "s_cmov_b64"),
4267ec681f3Smrg   (0x07, 0x07, 0x04, 0x04, 0x07, "s_not_b32"),
4277ec681f3Smrg   (0x08, 0x08, 0x05, 0x05, 0x08, "s_not_b64"),
4287ec681f3Smrg   (0x09, 0x09, 0x06, 0x06, 0x09, "s_wqm_b32"),
4297ec681f3Smrg   (0x0a, 0x0a, 0x07, 0x07, 0x0a, "s_wqm_b64"),
4307ec681f3Smrg   (0x0b, 0x0b, 0x08, 0x08, 0x0b, "s_brev_b32"),
4317ec681f3Smrg   (0x0c, 0x0c, 0x09, 0x09, 0x0c, "s_brev_b64"),
4327ec681f3Smrg   (0x0d, 0x0d, 0x0a, 0x0a, 0x0d, "s_bcnt0_i32_b32"),
4337ec681f3Smrg   (0x0e, 0x0e, 0x0b, 0x0b, 0x0e, "s_bcnt0_i32_b64"),
4347ec681f3Smrg   (0x0f, 0x0f, 0x0c, 0x0c, 0x0f, "s_bcnt1_i32_b32"),
4357ec681f3Smrg   (0x10, 0x10, 0x0d, 0x0d, 0x10, "s_bcnt1_i32_b64"),
4367ec681f3Smrg   (0x11, 0x11, 0x0e, 0x0e, 0x11, "s_ff0_i32_b32"),
4377ec681f3Smrg   (0x12, 0x12, 0x0f, 0x0f, 0x12, "s_ff0_i32_b64"),
4387ec681f3Smrg   (0x13, 0x13, 0x10, 0x10, 0x13, "s_ff1_i32_b32"),
4397ec681f3Smrg   (0x14, 0x14, 0x11, 0x11, 0x14, "s_ff1_i32_b64"),
4407ec681f3Smrg   (0x15, 0x15, 0x12, 0x12, 0x15, "s_flbit_i32_b32"),
4417ec681f3Smrg   (0x16, 0x16, 0x13, 0x13, 0x16, "s_flbit_i32_b64"),
4427ec681f3Smrg   (0x17, 0x17, 0x14, 0x14, 0x17, "s_flbit_i32"),
4437ec681f3Smrg   (0x18, 0x18, 0x15, 0x15, 0x18, "s_flbit_i32_i64"),
4447ec681f3Smrg   (0x19, 0x19, 0x16, 0x16, 0x19, "s_sext_i32_i8"),
4457ec681f3Smrg   (0x1a, 0x1a, 0x17, 0x17, 0x1a, "s_sext_i32_i16"),
4467ec681f3Smrg   (0x1b, 0x1b, 0x18, 0x18, 0x1b, "s_bitset0_b32"),
4477ec681f3Smrg   (0x1c, 0x1c, 0x19, 0x19, 0x1c, "s_bitset0_b64"),
4487ec681f3Smrg   (0x1d, 0x1d, 0x1a, 0x1a, 0x1d, "s_bitset1_b32"),
4497ec681f3Smrg   (0x1e, 0x1e, 0x1b, 0x1b, 0x1e, "s_bitset1_b64"),
4507ec681f3Smrg   (0x1f, 0x1f, 0x1c, 0x1c, 0x1f, "s_getpc_b64"),
4517ec681f3Smrg   (0x20, 0x20, 0x1d, 0x1d, 0x20, "s_setpc_b64", InstrClass.Branch),
4527ec681f3Smrg   (0x21, 0x21, 0x1e, 0x1e, 0x21, "s_swappc_b64", InstrClass.Branch),
4537ec681f3Smrg   (0x22, 0x22, 0x1f, 0x1f, 0x22, "s_rfe_b64", InstrClass.Branch),
4547ec681f3Smrg   (0x24, 0x24, 0x20, 0x20, 0x24, "s_and_saveexec_b64"),
4557ec681f3Smrg   (0x25, 0x25, 0x21, 0x21, 0x25, "s_or_saveexec_b64"),
4567ec681f3Smrg   (0x26, 0x26, 0x22, 0x22, 0x26, "s_xor_saveexec_b64"),
4577ec681f3Smrg   (0x27, 0x27, 0x23, 0x23, 0x27, "s_andn2_saveexec_b64"),
4587ec681f3Smrg   (0x28, 0x28, 0x24, 0x24, 0x28, "s_orn2_saveexec_b64"),
4597ec681f3Smrg   (0x29, 0x29, 0x25, 0x25, 0x29, "s_nand_saveexec_b64"),
4607ec681f3Smrg   (0x2a, 0x2a, 0x26, 0x26, 0x2a, "s_nor_saveexec_b64"),
4617ec681f3Smrg   (0x2b, 0x2b, 0x27, 0x27, 0x2b, "s_xnor_saveexec_b64"),
4627ec681f3Smrg   (0x2c, 0x2c, 0x28, 0x28, 0x2c, "s_quadmask_b32"),
4637ec681f3Smrg   (0x2d, 0x2d, 0x29, 0x29, 0x2d, "s_quadmask_b64"),
4647ec681f3Smrg   (0x2e, 0x2e, 0x2a, 0x2a, 0x2e, "s_movrels_b32"),
4657ec681f3Smrg   (0x2f, 0x2f, 0x2b, 0x2b, 0x2f, "s_movrels_b64"),
4667ec681f3Smrg   (0x30, 0x30, 0x2c, 0x2c, 0x30, "s_movreld_b32"),
4677ec681f3Smrg   (0x31, 0x31, 0x2d, 0x2d, 0x31, "s_movreld_b64"),
4687ec681f3Smrg   (0x32, 0x32, 0x2e, 0x2e,   -1, "s_cbranch_join", InstrClass.Branch),
4697ec681f3Smrg   (0x34, 0x34, 0x30, 0x30, 0x34, "s_abs_i32"),
4707ec681f3Smrg   (0x35, 0x35,   -1,   -1, 0x35, "s_mov_fed_b32"),
4717ec681f3Smrg   (  -1,   -1, 0x32, 0x32,   -1, "s_set_gpr_idx_idx"),
4727ec681f3Smrg   (  -1,   -1,   -1, 0x33, 0x37, "s_andn1_saveexec_b64"),
4737ec681f3Smrg   (  -1,   -1,   -1, 0x34, 0x38, "s_orn1_saveexec_b64"),
4747ec681f3Smrg   (  -1,   -1,   -1, 0x35, 0x39, "s_andn1_wrexec_b64"),
4757ec681f3Smrg   (  -1,   -1,   -1, 0x36, 0x3a, "s_andn2_wrexec_b64"),
4767ec681f3Smrg   (  -1,   -1,   -1, 0x37, 0x3b, "s_bitreplicate_b64_b32"),
4777ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x3c, "s_and_saveexec_b32"),
4787ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x3d, "s_or_saveexec_b32"),
4797ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x3e, "s_xor_saveexec_b32"),
4807ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x3f, "s_andn2_saveexec_b32"),
4817ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x40, "s_orn2_saveexec_b32"),
4827ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x41, "s_nand_saveexec_b32"),
4837ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x42, "s_nor_saveexec_b32"),
4847ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x43, "s_xnor_saveexec_b32"),
4857ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x44, "s_andn1_saveexec_b32"),
4867ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x45, "s_orn1_saveexec_b32"),
4877ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x46, "s_andn1_wrexec_b32"),
4887ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x47, "s_andn2_wrexec_b32"),
4897ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x49, "s_movrelsd_2_b32"),
4907ec681f3Smrg   # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1.
4917ec681f3Smrg   (  -1,   -1,   -1,   -1,   -1, "p_constaddr_getpc"),
4927ec681f3Smrg}
4937ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP1, InstrClass.Salu):
4947ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.SOP1, cls)
4957ec681f3Smrg
4967ec681f3Smrg
4977ec681f3Smrg# SOPC instructions: 2 inputs and 0 outputs (+SCC)
4987ec681f3SmrgSOPC = {
4997ec681f3Smrg  # GFX6, GFX7, GFX8, GFX9, GFX10, name
5007ec681f3Smrg   (0x00, 0x00, 0x00, 0x00, 0x00, "s_cmp_eq_i32"),
5017ec681f3Smrg   (0x01, 0x01, 0x01, 0x01, 0x01, "s_cmp_lg_i32"),
5027ec681f3Smrg   (0x02, 0x02, 0x02, 0x02, 0x02, "s_cmp_gt_i32"),
5037ec681f3Smrg   (0x03, 0x03, 0x03, 0x03, 0x03, "s_cmp_ge_i32"),
5047ec681f3Smrg   (0x04, 0x04, 0x04, 0x04, 0x04, "s_cmp_lt_i32"),
5057ec681f3Smrg   (0x05, 0x05, 0x05, 0x05, 0x05, "s_cmp_le_i32"),
5067ec681f3Smrg   (0x06, 0x06, 0x06, 0x06, 0x06, "s_cmp_eq_u32"),
5077ec681f3Smrg   (0x07, 0x07, 0x07, 0x07, 0x07, "s_cmp_lg_u32"),
5087ec681f3Smrg   (0x08, 0x08, 0x08, 0x08, 0x08, "s_cmp_gt_u32"),
5097ec681f3Smrg   (0x09, 0x09, 0x09, 0x09, 0x09, "s_cmp_ge_u32"),
5107ec681f3Smrg   (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cmp_lt_u32"),
5117ec681f3Smrg   (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cmp_le_u32"),
5127ec681f3Smrg   (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_bitcmp0_b32"),
5137ec681f3Smrg   (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_bitcmp1_b32"),
5147ec681f3Smrg   (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_bitcmp0_b64"),
5157ec681f3Smrg   (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_bitcmp1_b64"),
5167ec681f3Smrg   (0x10, 0x10, 0x10, 0x10,   -1, "s_setvskip"),
5177ec681f3Smrg   (  -1,   -1, 0x11, 0x11,   -1, "s_set_gpr_idx_on"),
5187ec681f3Smrg   (  -1,   -1, 0x12, 0x12, 0x12, "s_cmp_eq_u64"),
5197ec681f3Smrg   (  -1,   -1, 0x13, 0x13, 0x13, "s_cmp_lg_u64"),
5207ec681f3Smrg}
5217ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPC:
5227ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.SOPC, InstrClass.Salu)
5237ec681f3Smrg
5247ec681f3Smrg
5257ec681f3Smrg# SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs
5267ec681f3SmrgSOPP = {
5277ec681f3Smrg  # GFX6, GFX7, GFX8, GFX9, GFX10, name
5287ec681f3Smrg   (0x00, 0x00, 0x00, 0x00, 0x00, "s_nop"),
5297ec681f3Smrg   (0x01, 0x01, 0x01, 0x01, 0x01, "s_endpgm"),
5307ec681f3Smrg   (0x02, 0x02, 0x02, 0x02, 0x02, "s_branch", InstrClass.Branch),
5317ec681f3Smrg   (  -1,   -1, 0x03, 0x03, 0x03, "s_wakeup"),
5327ec681f3Smrg   (0x04, 0x04, 0x04, 0x04, 0x04, "s_cbranch_scc0", InstrClass.Branch),
5337ec681f3Smrg   (0x05, 0x05, 0x05, 0x05, 0x05, "s_cbranch_scc1", InstrClass.Branch),
5347ec681f3Smrg   (0x06, 0x06, 0x06, 0x06, 0x06, "s_cbranch_vccz", InstrClass.Branch),
5357ec681f3Smrg   (0x07, 0x07, 0x07, 0x07, 0x07, "s_cbranch_vccnz", InstrClass.Branch),
5367ec681f3Smrg   (0x08, 0x08, 0x08, 0x08, 0x08, "s_cbranch_execz", InstrClass.Branch),
5377ec681f3Smrg   (0x09, 0x09, 0x09, 0x09, 0x09, "s_cbranch_execnz", InstrClass.Branch),
5387ec681f3Smrg   (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_barrier", InstrClass.Barrier),
5397ec681f3Smrg   (  -1, 0x0b, 0x0b, 0x0b, 0x0b, "s_setkill"),
5407ec681f3Smrg   (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_waitcnt", InstrClass.Waitcnt),
5417ec681f3Smrg   (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_sethalt"),
5427ec681f3Smrg   (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_sleep"),
5437ec681f3Smrg   (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_setprio"),
5447ec681f3Smrg   (0x10, 0x10, 0x10, 0x10, 0x10, "s_sendmsg", InstrClass.Sendmsg),
5457ec681f3Smrg   (0x11, 0x11, 0x11, 0x11, 0x11, "s_sendmsghalt", InstrClass.Sendmsg),
5467ec681f3Smrg   (0x12, 0x12, 0x12, 0x12, 0x12, "s_trap", InstrClass.Branch),
5477ec681f3Smrg   (0x13, 0x13, 0x13, 0x13, 0x13, "s_icache_inv"),
5487ec681f3Smrg   (0x14, 0x14, 0x14, 0x14, 0x14, "s_incperflevel"),
5497ec681f3Smrg   (0x15, 0x15, 0x15, 0x15, 0x15, "s_decperflevel"),
5507ec681f3Smrg   (0x16, 0x16, 0x16, 0x16, 0x16, "s_ttracedata"),
5517ec681f3Smrg   (  -1, 0x17, 0x17, 0x17, 0x17, "s_cbranch_cdbgsys", InstrClass.Branch),
5527ec681f3Smrg   (  -1, 0x18, 0x18, 0x18, 0x18, "s_cbranch_cdbguser", InstrClass.Branch),
5537ec681f3Smrg   (  -1, 0x19, 0x19, 0x19, 0x19, "s_cbranch_cdbgsys_or_user", InstrClass.Branch),
5547ec681f3Smrg   (  -1, 0x1a, 0x1a, 0x1a, 0x1a, "s_cbranch_cdbgsys_and_user", InstrClass.Branch),
5557ec681f3Smrg   (  -1,   -1, 0x1b, 0x1b, 0x1b, "s_endpgm_saved"),
5567ec681f3Smrg   (  -1,   -1, 0x1c, 0x1c,   -1, "s_set_gpr_idx_off"),
5577ec681f3Smrg   (  -1,   -1, 0x1d, 0x1d,   -1, "s_set_gpr_idx_mode"),
5587ec681f3Smrg   (  -1,   -1,   -1, 0x1e, 0x1e, "s_endpgm_ordered_ps_done"),
5597ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x1f, "s_code_end"),
5607ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x20, "s_inst_prefetch"),
5617ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x21, "s_clause"),
5627ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x22, "s_wait_idle"),
5637ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x23, "s_waitcnt_depctr"),
5647ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x24, "s_round_mode"),
5657ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x25, "s_denorm_mode"),
5667ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x26, "s_ttracedata_imm"),
5677ec681f3Smrg}
5687ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPP, InstrClass.Salu):
5697ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.SOPP, cls)
5707ec681f3Smrg
5717ec681f3Smrg
5727ec681f3Smrg# SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output
5737ec681f3Smrg# Unlike GFX10, GFX10.3 does not have SMEM store, atomic or scratch instructions
5747ec681f3SmrgSMEM = {
5757ec681f3Smrg  # GFX6, GFX7, GFX8, GFX9, GFX10, name
5767ec681f3Smrg   (0x00, 0x00, 0x00, 0x00, 0x00, "s_load_dword"),
5777ec681f3Smrg   (0x01, 0x01, 0x01, 0x01, 0x01, "s_load_dwordx2"),
5787ec681f3Smrg   (0x02, 0x02, 0x02, 0x02, 0x02, "s_load_dwordx4"),
5797ec681f3Smrg   (0x03, 0x03, 0x03, 0x03, 0x03, "s_load_dwordx8"),
5807ec681f3Smrg   (0x04, 0x04, 0x04, 0x04, 0x04, "s_load_dwordx16"),
5817ec681f3Smrg   (  -1,   -1,   -1, 0x05, 0x05, "s_scratch_load_dword"),
5827ec681f3Smrg   (  -1,   -1,   -1, 0x06, 0x06, "s_scratch_load_dwordx2"),
5837ec681f3Smrg   (  -1,   -1,   -1, 0x07, 0x07, "s_scratch_load_dwordx4"),
5847ec681f3Smrg   (0x08, 0x08, 0x08, 0x08, 0x08, "s_buffer_load_dword"),
5857ec681f3Smrg   (0x09, 0x09, 0x09, 0x09, 0x09, "s_buffer_load_dwordx2"),
5867ec681f3Smrg   (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_buffer_load_dwordx4"),
5877ec681f3Smrg   (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_buffer_load_dwordx8"),
5887ec681f3Smrg   (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_buffer_load_dwordx16"),
5897ec681f3Smrg   (  -1,   -1, 0x10, 0x10, 0x10, "s_store_dword"),
5907ec681f3Smrg   (  -1,   -1, 0x11, 0x11, 0x11, "s_store_dwordx2"),
5917ec681f3Smrg   (  -1,   -1, 0x12, 0x12, 0x12, "s_store_dwordx4"),
5927ec681f3Smrg   (  -1,   -1,   -1, 0x15, 0x15, "s_scratch_store_dword"),
5937ec681f3Smrg   (  -1,   -1,   -1, 0x16, 0x16, "s_scratch_store_dwordx2"),
5947ec681f3Smrg   (  -1,   -1,   -1, 0x17, 0x17, "s_scratch_store_dwordx4"),
5957ec681f3Smrg   (  -1,   -1, 0x18, 0x18, 0x18, "s_buffer_store_dword"),
5967ec681f3Smrg   (  -1,   -1, 0x19, 0x19, 0x19, "s_buffer_store_dwordx2"),
5977ec681f3Smrg   (  -1,   -1, 0x1a, 0x1a, 0x1a, "s_buffer_store_dwordx4"),
5987ec681f3Smrg   (  -1,   -1, 0x1f, 0x1f, 0x1f, "s_gl1_inv"),
5997ec681f3Smrg   (0x1f, 0x1f, 0x20, 0x20, 0x20, "s_dcache_inv"),
6007ec681f3Smrg   (  -1,   -1, 0x21, 0x21, 0x21, "s_dcache_wb"),
6017ec681f3Smrg   (  -1, 0x1d, 0x22, 0x22,   -1, "s_dcache_inv_vol"),
6027ec681f3Smrg   (  -1,   -1, 0x23, 0x23,   -1, "s_dcache_wb_vol"),
6037ec681f3Smrg   (0x1e, 0x1e, 0x24, 0x24, 0x24, "s_memtime"), #GFX6-GFX10
6047ec681f3Smrg   (  -1,   -1, 0x25, 0x25, 0x25, "s_memrealtime"),
6057ec681f3Smrg   (  -1,   -1, 0x26, 0x26, 0x26, "s_atc_probe"),
6067ec681f3Smrg   (  -1,   -1, 0x27, 0x27, 0x27, "s_atc_probe_buffer"),
6077ec681f3Smrg   (  -1,   -1,   -1, 0x28, 0x28, "s_dcache_discard"),
6087ec681f3Smrg   (  -1,   -1,   -1, 0x29, 0x29, "s_dcache_discard_x2"),
6097ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x2a, "s_get_waveid_in_workgroup"),
6107ec681f3Smrg   (  -1,   -1,   -1, 0x40, 0x40, "s_buffer_atomic_swap"),
6117ec681f3Smrg   (  -1,   -1,   -1, 0x41, 0x41, "s_buffer_atomic_cmpswap"),
6127ec681f3Smrg   (  -1,   -1,   -1, 0x42, 0x42, "s_buffer_atomic_add"),
6137ec681f3Smrg   (  -1,   -1,   -1, 0x43, 0x43, "s_buffer_atomic_sub"),
6147ec681f3Smrg   (  -1,   -1,   -1, 0x44, 0x44, "s_buffer_atomic_smin"),
6157ec681f3Smrg   (  -1,   -1,   -1, 0x45, 0x45, "s_buffer_atomic_umin"),
6167ec681f3Smrg   (  -1,   -1,   -1, 0x46, 0x46, "s_buffer_atomic_smax"),
6177ec681f3Smrg   (  -1,   -1,   -1, 0x47, 0x47, "s_buffer_atomic_umax"),
6187ec681f3Smrg   (  -1,   -1,   -1, 0x48, 0x48, "s_buffer_atomic_and"),
6197ec681f3Smrg   (  -1,   -1,   -1, 0x49, 0x49, "s_buffer_atomic_or"),
6207ec681f3Smrg   (  -1,   -1,   -1, 0x4a, 0x4a, "s_buffer_atomic_xor"),
6217ec681f3Smrg   (  -1,   -1,   -1, 0x4b, 0x4b, "s_buffer_atomic_inc"),
6227ec681f3Smrg   (  -1,   -1,   -1, 0x4c, 0x4c, "s_buffer_atomic_dec"),
6237ec681f3Smrg   (  -1,   -1,   -1, 0x60, 0x60, "s_buffer_atomic_swap_x2"),
6247ec681f3Smrg   (  -1,   -1,   -1, 0x61, 0x61, "s_buffer_atomic_cmpswap_x2"),
6257ec681f3Smrg   (  -1,   -1,   -1, 0x62, 0x62, "s_buffer_atomic_add_x2"),
6267ec681f3Smrg   (  -1,   -1,   -1, 0x63, 0x63, "s_buffer_atomic_sub_x2"),
6277ec681f3Smrg   (  -1,   -1,   -1, 0x64, 0x64, "s_buffer_atomic_smin_x2"),
6287ec681f3Smrg   (  -1,   -1,   -1, 0x65, 0x65, "s_buffer_atomic_umin_x2"),
6297ec681f3Smrg   (  -1,   -1,   -1, 0x66, 0x66, "s_buffer_atomic_smax_x2"),
6307ec681f3Smrg   (  -1,   -1,   -1, 0x67, 0x67, "s_buffer_atomic_umax_x2"),
6317ec681f3Smrg   (  -1,   -1,   -1, 0x68, 0x68, "s_buffer_atomic_and_x2"),
6327ec681f3Smrg   (  -1,   -1,   -1, 0x69, 0x69, "s_buffer_atomic_or_x2"),
6337ec681f3Smrg   (  -1,   -1,   -1, 0x6a, 0x6a, "s_buffer_atomic_xor_x2"),
6347ec681f3Smrg   (  -1,   -1,   -1, 0x6b, 0x6b, "s_buffer_atomic_inc_x2"),
6357ec681f3Smrg   (  -1,   -1,   -1, 0x6c, 0x6c, "s_buffer_atomic_dec_x2"),
6367ec681f3Smrg   (  -1,   -1,   -1, 0x80, 0x80, "s_atomic_swap"),
6377ec681f3Smrg   (  -1,   -1,   -1, 0x81, 0x81, "s_atomic_cmpswap"),
6387ec681f3Smrg   (  -1,   -1,   -1, 0x82, 0x82, "s_atomic_add"),
6397ec681f3Smrg   (  -1,   -1,   -1, 0x83, 0x83, "s_atomic_sub"),
6407ec681f3Smrg   (  -1,   -1,   -1, 0x84, 0x84, "s_atomic_smin"),
6417ec681f3Smrg   (  -1,   -1,   -1, 0x85, 0x85, "s_atomic_umin"),
6427ec681f3Smrg   (  -1,   -1,   -1, 0x86, 0x86, "s_atomic_smax"),
6437ec681f3Smrg   (  -1,   -1,   -1, 0x87, 0x87, "s_atomic_umax"),
6447ec681f3Smrg   (  -1,   -1,   -1, 0x88, 0x88, "s_atomic_and"),
6457ec681f3Smrg   (  -1,   -1,   -1, 0x89, 0x89, "s_atomic_or"),
6467ec681f3Smrg   (  -1,   -1,   -1, 0x8a, 0x8a, "s_atomic_xor"),
6477ec681f3Smrg   (  -1,   -1,   -1, 0x8b, 0x8b, "s_atomic_inc"),
6487ec681f3Smrg   (  -1,   -1,   -1, 0x8c, 0x8c, "s_atomic_dec"),
6497ec681f3Smrg   (  -1,   -1,   -1, 0xa0, 0xa0, "s_atomic_swap_x2"),
6507ec681f3Smrg   (  -1,   -1,   -1, 0xa1, 0xa1, "s_atomic_cmpswap_x2"),
6517ec681f3Smrg   (  -1,   -1,   -1, 0xa2, 0xa2, "s_atomic_add_x2"),
6527ec681f3Smrg   (  -1,   -1,   -1, 0xa3, 0xa3, "s_atomic_sub_x2"),
6537ec681f3Smrg   (  -1,   -1,   -1, 0xa4, 0xa4, "s_atomic_smin_x2"),
6547ec681f3Smrg   (  -1,   -1,   -1, 0xa5, 0xa5, "s_atomic_umin_x2"),
6557ec681f3Smrg   (  -1,   -1,   -1, 0xa6, 0xa6, "s_atomic_smax_x2"),
6567ec681f3Smrg   (  -1,   -1,   -1, 0xa7, 0xa7, "s_atomic_umax_x2"),
6577ec681f3Smrg   (  -1,   -1,   -1, 0xa8, 0xa8, "s_atomic_and_x2"),
6587ec681f3Smrg   (  -1,   -1,   -1, 0xa9, 0xa9, "s_atomic_or_x2"),
6597ec681f3Smrg   (  -1,   -1,   -1, 0xaa, 0xaa, "s_atomic_xor_x2"),
6607ec681f3Smrg   (  -1,   -1,   -1, 0xab, 0xab, "s_atomic_inc_x2"),
6617ec681f3Smrg   (  -1,   -1,   -1, 0xac, 0xac, "s_atomic_dec_x2"),
6627ec681f3Smrg}
6637ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM:
6647ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.SMEM, InstrClass.SMem, is_atomic = "atomic" in name)
6657ec681f3Smrg
6667ec681f3Smrg
6677ec681f3Smrg# VOP2 instructions: 2 inputs, 1 output (+ optional vcc)
6687ec681f3Smrg# TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8
6697ec681f3SmrgVOP2 = {
6707ec681f3Smrg  # GFX6, GFX7, GFX8, GFX9, GFX10, name, input/output modifiers
6717ec681f3Smrg   (0x01, 0x01,   -1,   -1,   -1, "v_readlane_b32", False),
6727ec681f3Smrg   (0x02, 0x02,   -1,   -1,   -1, "v_writelane_b32", False),
6737ec681f3Smrg   (0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True),
6747ec681f3Smrg   (0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True),
6757ec681f3Smrg   (0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True),
6767ec681f3Smrg   (0x06, 0x06,   -1,   -1, 0x06, "v_mac_legacy_f32", True),
6777ec681f3Smrg   (0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True),
6787ec681f3Smrg   (0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True),
6797ec681f3Smrg   (0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False),
6807ec681f3Smrg   (0x0a, 0x0a, 0x07, 0x07, 0x0a, "v_mul_hi_i32_i24", False),
6817ec681f3Smrg   (0x0b, 0x0b, 0x08, 0x08, 0x0b, "v_mul_u32_u24", False),
6827ec681f3Smrg   (0x0c, 0x0c, 0x09, 0x09, 0x0c, "v_mul_hi_u32_u24", False),
6837ec681f3Smrg   (  -1,   -1,   -1, 0x39, 0x0d, "v_dot4c_i32_i8", False),
6847ec681f3Smrg   (0x0d, 0x0d,   -1,   -1,   -1, "v_min_legacy_f32", True),
6857ec681f3Smrg   (0x0e, 0x0e,   -1,   -1,   -1, "v_max_legacy_f32", True),
6867ec681f3Smrg   (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, "v_min_f32", True),
6877ec681f3Smrg   (0x10, 0x10, 0x0b, 0x0b, 0x10, "v_max_f32", True),
6887ec681f3Smrg   (0x11, 0x11, 0x0c, 0x0c, 0x11, "v_min_i32", False),
6897ec681f3Smrg   (0x12, 0x12, 0x0d, 0x0d, 0x12, "v_max_i32", False),
6907ec681f3Smrg   (0x13, 0x13, 0x0e, 0x0e, 0x13, "v_min_u32", False),
6917ec681f3Smrg   (0x14, 0x14, 0x0f, 0x0f, 0x14, "v_max_u32", False),
6927ec681f3Smrg   (0x15, 0x15,   -1,   -1,   -1, "v_lshr_b32", False),
6937ec681f3Smrg   (0x16, 0x16, 0x10, 0x10, 0x16, "v_lshrrev_b32", False),
6947ec681f3Smrg   (0x17, 0x17,   -1,   -1,   -1, "v_ashr_i32", False),
6957ec681f3Smrg   (0x18, 0x18, 0x11, 0x11, 0x18, "v_ashrrev_i32", False),
6967ec681f3Smrg   (0x19, 0x19,   -1,   -1,   -1, "v_lshl_b32", False),
6977ec681f3Smrg   (0x1a, 0x1a, 0x12, 0x12, 0x1a, "v_lshlrev_b32", False),
6987ec681f3Smrg   (0x1b, 0x1b, 0x13, 0x13, 0x1b, "v_and_b32", False),
6997ec681f3Smrg   (0x1c, 0x1c, 0x14, 0x14, 0x1c, "v_or_b32", False),
7007ec681f3Smrg   (0x1d, 0x1d, 0x15, 0x15, 0x1d, "v_xor_b32", False),
7017ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x1e, "v_xnor_b32", False),
7027ec681f3Smrg   (0x1f, 0x1f, 0x16, 0x16, 0x1f, "v_mac_f32", True),
7037ec681f3Smrg   (0x20, 0x20, 0x17, 0x17, 0x20, "v_madmk_f32", False),
7047ec681f3Smrg   (0x21, 0x21, 0x18, 0x18, 0x21, "v_madak_f32", False),
7057ec681f3Smrg   (0x24, 0x24,   -1,   -1,   -1, "v_mbcnt_hi_u32_b32", False),
7067ec681f3Smrg   (0x25, 0x25, 0x19, 0x19,   -1, "v_add_co_u32", False), # VOP3B only in RDNA
7077ec681f3Smrg   (0x26, 0x26, 0x1a, 0x1a,   -1, "v_sub_co_u32", False), # VOP3B only in RDNA
7087ec681f3Smrg   (0x27, 0x27, 0x1b, 0x1b,   -1, "v_subrev_co_u32", False), # VOP3B only in RDNA
7097ec681f3Smrg   (0x28, 0x28, 0x1c, 0x1c, 0x28, "v_addc_co_u32", False), # v_add_co_ci_u32 in RDNA
7107ec681f3Smrg   (0x29, 0x29, 0x1d, 0x1d, 0x29, "v_subb_co_u32", False), # v_sub_co_ci_u32 in RDNA
7117ec681f3Smrg   (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, "v_subbrev_co_u32", False), # v_subrev_co_ci_u32 in RDNA
7127ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x2b, "v_fmac_f32", True),
7137ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x2c, "v_fmamk_f32", True),
7147ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x2d, "v_fmaak_f32", True),
7157ec681f3Smrg   (0x2f, 0x2f,   -1,   -1, 0x2f, "v_cvt_pkrtz_f16_f32", True),
7167ec681f3Smrg   (  -1,   -1, 0x1f, 0x1f, 0x32, "v_add_f16", True),
7177ec681f3Smrg   (  -1,   -1, 0x20, 0x20, 0x33, "v_sub_f16", True),
7187ec681f3Smrg   (  -1,   -1, 0x21, 0x21, 0x34, "v_subrev_f16", True),
7197ec681f3Smrg   (  -1,   -1, 0x22, 0x22, 0x35, "v_mul_f16", True),
7207ec681f3Smrg   (  -1,   -1, 0x23, 0x23,   -1, "v_mac_f16", True),
7217ec681f3Smrg   (  -1,   -1, 0x24, 0x24,   -1, "v_madmk_f16", False),
7227ec681f3Smrg   (  -1,   -1, 0x25, 0x25,   -1, "v_madak_f16", False),
7237ec681f3Smrg   (  -1,   -1, 0x26, 0x26,   -1, "v_add_u16", False),
7247ec681f3Smrg   (  -1,   -1, 0x27, 0x27,   -1, "v_sub_u16", False),
7257ec681f3Smrg   (  -1,   -1, 0x28, 0x28,   -1, "v_subrev_u16", False),
7267ec681f3Smrg   (  -1,   -1, 0x29, 0x29,   -1, "v_mul_lo_u16", False),
7277ec681f3Smrg   (  -1,   -1, 0x2a, 0x2a,   -1, "v_lshlrev_b16", False),
7287ec681f3Smrg   (  -1,   -1, 0x2b, 0x2b,   -1, "v_lshrrev_b16", False),
7297ec681f3Smrg   (  -1,   -1, 0x2c, 0x2c,   -1, "v_ashrrev_i16", False),
7307ec681f3Smrg   (  -1,   -1, 0x2d, 0x2d, 0x39, "v_max_f16", True),
7317ec681f3Smrg   (  -1,   -1, 0x2e, 0x2e, 0x3a, "v_min_f16", True),
7327ec681f3Smrg   (  -1,   -1, 0x2f, 0x2f,   -1, "v_max_u16", False),
7337ec681f3Smrg   (  -1,   -1, 0x30, 0x30,   -1, "v_max_i16", False),
7347ec681f3Smrg   (  -1,   -1, 0x31, 0x31,   -1, "v_min_u16", False),
7357ec681f3Smrg   (  -1,   -1, 0x32, 0x32,   -1, "v_min_i16", False),
7367ec681f3Smrg   (  -1,   -1, 0x33, 0x33, 0x3b, "v_ldexp_f16", False),
7377ec681f3Smrg   (  -1,   -1,   -1, 0x34, 0x25, "v_add_u32", False), # use v_add_co_u32 on GFX8, called v_add_nc_u32 in RDNA
7387ec681f3Smrg   (  -1,   -1,   -1, 0x35, 0x26, "v_sub_u32", False), # use v_sub_co_u32 on GFX8, called v_sub_nc_u32 in RDNA
7397ec681f3Smrg   (  -1,   -1,   -1, 0x36, 0x27, "v_subrev_u32", False), # use v_subrev_co_u32 on GFX8, called v_subrev_nc_u32 in RDNA
7407ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x36, "v_fmac_f16", False),
7417ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x37, "v_fmamk_f16", False),
7427ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x38, "v_fmaak_f16", False),
7437ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x3c, "v_pk_fmac_f16", False),
7447ec681f3Smrg}
7457ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, modifiers) in VOP2:
7467ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, modifiers, modifiers)
7477ec681f3Smrg
7487ec681f3Smrgif True:
7497ec681f3Smrg    # v_cndmask_b32 can use input modifiers but not output modifiers
7507ec681f3Smrg    (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00, 0x00, 0x00, 0x00, 0x01, "v_cndmask_b32")
7517ec681f3Smrg    opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, True, False)
7527ec681f3Smrg
7537ec681f3Smrg
7547ec681f3Smrg# VOP1 instructions: instructions with 1 input and 1 output
7557ec681f3SmrgVOP1 = {
7567ec681f3Smrg  # GFX6, GFX7, GFX8, GFX9, GFX10, name, input_modifiers, output_modifiers
7577ec681f3Smrg   (0x00, 0x00, 0x00, 0x00, 0x00, "v_nop", False, False),
7587ec681f3Smrg   (0x01, 0x01, 0x01, 0x01, 0x01, "v_mov_b32", False, False),
7597ec681f3Smrg   (0x02, 0x02, 0x02, 0x02, 0x02, "v_readfirstlane_b32", False, False),
7607ec681f3Smrg   (0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False, InstrClass.ValuDoubleConvert),
7617ec681f3Smrg   (0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True, InstrClass.ValuDoubleConvert),
7627ec681f3Smrg   (0x05, 0x05, 0x05, 0x05, 0x05, "v_cvt_f32_i32", False, True),
7637ec681f3Smrg   (0x06, 0x06, 0x06, 0x06, 0x06, "v_cvt_f32_u32", False, True),
7647ec681f3Smrg   (0x07, 0x07, 0x07, 0x07, 0x07, "v_cvt_u32_f32", True, False),
7657ec681f3Smrg   (0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),
7667ec681f3Smrg   (0x09, 0x09,   -1,   -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9
7677ec681f3Smrg   (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),
7687ec681f3Smrg   (  -1,   -1,   -1,   -1,   -1, "p_cvt_f16_f32_rtne", True, True),
7697ec681f3Smrg   (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),
7707ec681f3Smrg   (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False),
7717ec681f3Smrg   (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),
7727ec681f3Smrg   (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "v_cvt_off_f32_i4", False, True),
7737ec681f3Smrg   (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True, InstrClass.ValuDoubleConvert),
7747ec681f3Smrg   (0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True, InstrClass.ValuDoubleConvert),
7757ec681f3Smrg   (0x11, 0x11, 0x11, 0x11, 0x11, "v_cvt_f32_ubyte0", False, True),
7767ec681f3Smrg   (0x12, 0x12, 0x12, 0x12, 0x12, "v_cvt_f32_ubyte1", False, True),
7777ec681f3Smrg   (0x13, 0x13, 0x13, 0x13, 0x13, "v_cvt_f32_ubyte2", False, True),
7787ec681f3Smrg   (0x14, 0x14, 0x14, 0x14, 0x14, "v_cvt_f32_ubyte3", False, True),
7797ec681f3Smrg   (0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False, InstrClass.ValuDoubleConvert),
7807ec681f3Smrg   (0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True, InstrClass.ValuDoubleConvert),
7817ec681f3Smrg   (  -1, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True, InstrClass.ValuDouble),
7827ec681f3Smrg   (  -1, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True, InstrClass.ValuDouble),
7837ec681f3Smrg   (  -1, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True, InstrClass.ValuDouble),
7847ec681f3Smrg   (  -1, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True, InstrClass.ValuDouble),
7857ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x1b, "v_pipeflush", False, False),
7867ec681f3Smrg   (0x20, 0x20, 0x1b, 0x1b, 0x20, "v_fract_f32", True, True),
7877ec681f3Smrg   (0x21, 0x21, 0x1c, 0x1c, 0x21, "v_trunc_f32", True, True),
7887ec681f3Smrg   (0x22, 0x22, 0x1d, 0x1d, 0x22, "v_ceil_f32", True, True),
7897ec681f3Smrg   (0x23, 0x23, 0x1e, 0x1e, 0x23, "v_rndne_f32", True, True),
7907ec681f3Smrg   (0x24, 0x24, 0x1f, 0x1f, 0x24, "v_floor_f32", True, True),
7917ec681f3Smrg   (0x25, 0x25, 0x20, 0x20, 0x25, "v_exp_f32", True, True, InstrClass.ValuTranscendental32),
7927ec681f3Smrg   (0x26, 0x26,   -1,   -1,   -1, "v_log_clamp_f32", True, True, InstrClass.ValuTranscendental32),
7937ec681f3Smrg   (0x27, 0x27, 0x21, 0x21, 0x27, "v_log_f32", True, True, InstrClass.ValuTranscendental32),
7947ec681f3Smrg   (0x28, 0x28,   -1,   -1,   -1, "v_rcp_clamp_f32", True, True, InstrClass.ValuTranscendental32),
7957ec681f3Smrg   (0x29, 0x29,   -1,   -1,   -1, "v_rcp_legacy_f32", True, True, InstrClass.ValuTranscendental32),
7967ec681f3Smrg   (0x2a, 0x2a, 0x22, 0x22, 0x2a, "v_rcp_f32", True, True, InstrClass.ValuTranscendental32),
7977ec681f3Smrg   (0x2b, 0x2b, 0x23, 0x23, 0x2b, "v_rcp_iflag_f32", True, True, InstrClass.ValuTranscendental32),
7987ec681f3Smrg   (0x2c, 0x2c,   -1,   -1,   -1, "v_rsq_clamp_f32", True, True, InstrClass.ValuTranscendental32),
7997ec681f3Smrg   (0x2d, 0x2d,   -1,   -1,   -1, "v_rsq_legacy_f32", True, True, InstrClass.ValuTranscendental32),
8007ec681f3Smrg   (0x2e, 0x2e, 0x24, 0x24, 0x2e, "v_rsq_f32", True, True, InstrClass.ValuTranscendental32),
8017ec681f3Smrg   (0x2f, 0x2f, 0x25, 0x25, 0x2f, "v_rcp_f64", True, True, InstrClass.ValuDoubleTranscendental),
8027ec681f3Smrg   (0x30, 0x30,   -1,   -1,   -1, "v_rcp_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),
8037ec681f3Smrg   (0x31, 0x31, 0x26, 0x26, 0x31, "v_rsq_f64", True, True, InstrClass.ValuDoubleTranscendental),
8047ec681f3Smrg   (0x32, 0x32,   -1,   -1,   -1, "v_rsq_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),
8057ec681f3Smrg   (0x33, 0x33, 0x27, 0x27, 0x33, "v_sqrt_f32", True, True, InstrClass.ValuTranscendental32),
8067ec681f3Smrg   (0x34, 0x34, 0x28, 0x28, 0x34, "v_sqrt_f64", True, True, InstrClass.ValuDoubleTranscendental),
8077ec681f3Smrg   (0x35, 0x35, 0x29, 0x29, 0x35, "v_sin_f32", True, True, InstrClass.ValuTranscendental32),
8087ec681f3Smrg   (0x36, 0x36, 0x2a, 0x2a, 0x36, "v_cos_f32", True, True, InstrClass.ValuTranscendental32),
8097ec681f3Smrg   (0x37, 0x37, 0x2b, 0x2b, 0x37, "v_not_b32", False, False),
8107ec681f3Smrg   (0x38, 0x38, 0x2c, 0x2c, 0x38, "v_bfrev_b32", False, False),
8117ec681f3Smrg   (0x39, 0x39, 0x2d, 0x2d, 0x39, "v_ffbh_u32", False, False),
8127ec681f3Smrg   (0x3a, 0x3a, 0x2e, 0x2e, 0x3a, "v_ffbl_b32", False, False),
8137ec681f3Smrg   (0x3b, 0x3b, 0x2f, 0x2f, 0x3b, "v_ffbh_i32", False, False),
8147ec681f3Smrg   (0x3c, 0x3c, 0x30, 0x30, 0x3c, "v_frexp_exp_i32_f64", True, False, InstrClass.ValuDouble),
8157ec681f3Smrg   (0x3d, 0x3d, 0x31, 0x31, 0x3d, "v_frexp_mant_f64", True, False, InstrClass.ValuDouble),
8167ec681f3Smrg   (0x3e, 0x3e, 0x32, 0x32, 0x3e, "v_fract_f64", True, True, InstrClass.ValuDouble),
8177ec681f3Smrg   (0x3f, 0x3f, 0x33, 0x33, 0x3f, "v_frexp_exp_i32_f32", True, False),
8187ec681f3Smrg   (0x40, 0x40, 0x34, 0x34, 0x40, "v_frexp_mant_f32", True, False),
8197ec681f3Smrg   (0x41, 0x41, 0x35, 0x35, 0x41, "v_clrexcp", False, False),
8207ec681f3Smrg   (0x42, 0x42, 0x36,   -1, 0x42, "v_movreld_b32", False, False),
8217ec681f3Smrg   (0x43, 0x43, 0x37,   -1, 0x43, "v_movrels_b32", False, False),
8227ec681f3Smrg   (0x44, 0x44, 0x38,   -1, 0x44, "v_movrelsd_b32", False, False),
8237ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x48, "v_movrelsd_2_b32", False, False),
8247ec681f3Smrg   (  -1,   -1,   -1, 0x37,   -1, "v_screen_partition_4se_b32", False, False),
8257ec681f3Smrg   (  -1,   -1, 0x39, 0x39, 0x50, "v_cvt_f16_u16", False, True),
8267ec681f3Smrg   (  -1,   -1, 0x3a, 0x3a, 0x51, "v_cvt_f16_i16", False, True),
8277ec681f3Smrg   (  -1,   -1, 0x3b, 0x3b, 0x52, "v_cvt_u16_f16", True, False),
8287ec681f3Smrg   (  -1,   -1, 0x3c, 0x3c, 0x53, "v_cvt_i16_f16", True, False),
8297ec681f3Smrg   (  -1,   -1, 0x3d, 0x3d, 0x54, "v_rcp_f16", True, True, InstrClass.ValuTranscendental32),
8307ec681f3Smrg   (  -1,   -1, 0x3e, 0x3e, 0x55, "v_sqrt_f16", True, True, InstrClass.ValuTranscendental32),
8317ec681f3Smrg   (  -1,   -1, 0x3f, 0x3f, 0x56, "v_rsq_f16", True, True, InstrClass.ValuTranscendental32),
8327ec681f3Smrg   (  -1,   -1, 0x40, 0x40, 0x57, "v_log_f16", True, True, InstrClass.ValuTranscendental32),
8337ec681f3Smrg   (  -1,   -1, 0x41, 0x41, 0x58, "v_exp_f16", True, True, InstrClass.ValuTranscendental32),
8347ec681f3Smrg   (  -1,   -1, 0x42, 0x42, 0x59, "v_frexp_mant_f16", True, False),
8357ec681f3Smrg   (  -1,   -1, 0x43, 0x43, 0x5a, "v_frexp_exp_i16_f16", True, False),
8367ec681f3Smrg   (  -1,   -1, 0x44, 0x44, 0x5b, "v_floor_f16", True, True),
8377ec681f3Smrg   (  -1,   -1, 0x45, 0x45, 0x5c, "v_ceil_f16", True, True),
8387ec681f3Smrg   (  -1,   -1, 0x46, 0x46, 0x5d, "v_trunc_f16", True, True),
8397ec681f3Smrg   (  -1,   -1, 0x47, 0x47, 0x5e, "v_rndne_f16", True, True),
8407ec681f3Smrg   (  -1,   -1, 0x48, 0x48, 0x5f, "v_fract_f16", True, True),
8417ec681f3Smrg   (  -1,   -1, 0x49, 0x49, 0x60, "v_sin_f16", True, True, InstrClass.ValuTranscendental32),
8427ec681f3Smrg   (  -1,   -1, 0x4a, 0x4a, 0x61, "v_cos_f16", True, True, InstrClass.ValuTranscendental32),
8437ec681f3Smrg   (  -1, 0x46, 0x4b, 0x4b,   -1, "v_exp_legacy_f32", True, True, InstrClass.ValuTranscendental32),
8447ec681f3Smrg   (  -1, 0x45, 0x4c, 0x4c,   -1, "v_log_legacy_f32", True, True, InstrClass.ValuTranscendental32),
8457ec681f3Smrg   (  -1,   -1,   -1, 0x4f, 0x62, "v_sat_pk_u8_i16", False, False),
8467ec681f3Smrg   (  -1,   -1,   -1, 0x4d, 0x63, "v_cvt_norm_i16_f16", True, False),
8477ec681f3Smrg   (  -1,   -1,   -1, 0x4e, 0x64, "v_cvt_norm_u16_f16", True, False),
8487ec681f3Smrg   (  -1,   -1,   -1, 0x51, 0x65, "v_swap_b32", False, False),
8497ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x68, "v_swaprel_b32", False, False),
8507ec681f3Smrg}
8517ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP1, InstrClass.Valu32):
8527ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOP1, cls, in_mod, out_mod)
8537ec681f3Smrg
8547ec681f3Smrg
8557ec681f3Smrg# VOPC instructions:
8567ec681f3Smrg
8577ec681f3SmrgVOPC_CLASS = {
8587ec681f3Smrg   (0x88, 0x88, 0x10, 0x10, 0x88, "v_cmp_class_f32"),
8597ec681f3Smrg   (  -1,   -1, 0x14, 0x14, 0x8f, "v_cmp_class_f16"),
8607ec681f3Smrg   (0x98, 0x98, 0x11, 0x11, 0x98, "v_cmpx_class_f32"),
8617ec681f3Smrg   (  -1,   -1, 0x15, 0x15, 0x9f, "v_cmpx_class_f16"),
8627ec681f3Smrg   (0xa8, 0xa8, 0x12, 0x12, 0xa8, "v_cmp_class_f64", InstrClass.ValuDouble),
8637ec681f3Smrg   (0xb8, 0xb8, 0x13, 0x13, 0xb8, "v_cmpx_class_f64", InstrClass.ValuDouble),
8647ec681f3Smrg}
8657ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(VOPC_CLASS, InstrClass.Valu32):
8667ec681f3Smrg    opcode(name, gfx7, gfx9, gfx10, Format.VOPC, cls, True, False)
8677ec681f3Smrg
8687ec681f3SmrgCOMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"]
8697ec681f3Smrg
8707ec681f3Smrgfor i in range(8):
8717ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, "v_cmp_"+COMPF[i]+"_f16")
8727ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
8737ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, "v_cmpx_"+COMPF[i]+"_f16")
8747ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
8757ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, "v_cmp_"+COMPF[i+8]+"_f16")
8767ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
8777ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, "v_cmpx_"+COMPF[i+8]+"_f16")
8787ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
8797ec681f3Smrg
8807ec681f3Smrgfor i in range(16):
8817ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f32")
8827ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
8837ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, "v_cmpx_"+COMPF[i]+"_f32")
8847ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
8857ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64")
8867ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False)
8877ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, "v_cmpx_"+COMPF[i]+"_f64")
8887ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False)
8897ec681f3Smrg   # GFX_6_7
8907ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x40+i, 0x40+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32")
8917ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x50+i, 0x50+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32")
8927ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x60+i, 0x60+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f64")
8937ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x70+i, 0x70+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f64")
8947ec681f3Smrg
8957ec681f3SmrgCOMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"]
8967ec681f3Smrg
8977ec681f3Smrg# GFX_8_9
8987ec681f3Smrgfor i in [0,7]: # only 0 and 7
8997ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, "v_cmp_"+COMPI[i]+"_i16")
9007ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9017ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, "v_cmpx_"+COMPI[i]+"_i16")
9027ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9037ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, "v_cmp_"+COMPI[i]+"_u16")
9047ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9057ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, "v_cmpx_"+COMPI[i]+"_u16")
9067ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9077ec681f3Smrg
9087ec681f3Smrgfor i in range(1, 7): # [1..6]
9097ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, "v_cmp_"+COMPI[i]+"_i16")
9107ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9117ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, "v_cmpx_"+COMPI[i]+"_i16")
9127ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9137ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, "v_cmp_"+COMPI[i]+"_u16")
9147ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9157ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16")
9167ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9177ec681f3Smrg
9187ec681f3Smrgfor i in range(8):
9197ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, "v_cmp_"+COMPI[i]+"_i32")
9207ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9217ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, "v_cmpx_"+COMPI[i]+"_i32")
9227ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9237ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, "v_cmp_"+COMPI[i]+"_i64")
9247ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
9257ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i64")
9267ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
9277ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, "v_cmp_"+COMPI[i]+"_u32")
9287ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9297ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_u32")
9307ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
9317ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, "v_cmp_"+COMPI[i]+"_u64")
9327ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
9337ec681f3Smrg   (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, "v_cmpx_"+COMPI[i]+"_u64")
9347ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
9357ec681f3Smrg
9367ec681f3Smrg
9377ec681f3Smrg# VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output
9387ec681f3SmrgVOPP = {
9397ec681f3Smrg   # opcode, name, input/output modifiers
9407ec681f3Smrg   (0x00, "v_pk_mad_i16", False),
9417ec681f3Smrg   (0x01, "v_pk_mul_lo_u16", False),
9427ec681f3Smrg   (0x02, "v_pk_add_i16", False),
9437ec681f3Smrg   (0x03, "v_pk_sub_i16", False),
9447ec681f3Smrg   (0x04, "v_pk_lshlrev_b16", False),
9457ec681f3Smrg   (0x05, "v_pk_lshrrev_b16", False),
9467ec681f3Smrg   (0x06, "v_pk_ashrrev_i16", False),
9477ec681f3Smrg   (0x07, "v_pk_max_i16", False),
9487ec681f3Smrg   (0x08, "v_pk_min_i16", False),
9497ec681f3Smrg   (0x09, "v_pk_mad_u16", False),
9507ec681f3Smrg   (0x0a, "v_pk_add_u16", False),
9517ec681f3Smrg   (0x0b, "v_pk_sub_u16", False),
9527ec681f3Smrg   (0x0c, "v_pk_max_u16", False),
9537ec681f3Smrg   (0x0d, "v_pk_min_u16", False),
9547ec681f3Smrg   (0x0e, "v_pk_fma_f16", True),
9557ec681f3Smrg   (0x0f, "v_pk_add_f16", True),
9567ec681f3Smrg   (0x10, "v_pk_mul_f16", True),
9577ec681f3Smrg   (0x11, "v_pk_min_f16", True),
9587ec681f3Smrg   (0x12, "v_pk_max_f16", True),
9597ec681f3Smrg   (0x20, "v_fma_mix_f32", True), # v_mad_mix_f32 in VEGA ISA, v_fma_mix_f32 in RDNA ISA
9607ec681f3Smrg   (0x21, "v_fma_mixlo_f16", True), # v_mad_mixlo_f16 in VEGA ISA, v_fma_mixlo_f16 in RDNA ISA
9617ec681f3Smrg   (0x22, "v_fma_mixhi_f16", True), # v_mad_mixhi_f16 in VEGA ISA, v_fma_mixhi_f16 in RDNA ISA
9627ec681f3Smrg}
9637ec681f3Smrg# note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here
9647ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, -1, code, code, name)
9657ec681f3Smrgfor (code, name, modifiers) in VOPP:
9667ec681f3Smrg   opcode(name, -1, code, code, Format.VOP3P, InstrClass.Valu32, modifiers, modifiers)
9677ec681f3Smrgopcode("v_dot2_i32_i16", -1, 0x26, 0x14, Format.VOP3P, InstrClass.Valu32)
9687ec681f3Smrgopcode("v_dot2_u32_u16", -1, 0x27, 0x15, Format.VOP3P, InstrClass.Valu32)
9697ec681f3Smrgopcode("v_dot4_i32_i8", -1, 0x28, 0x16, Format.VOP3P, InstrClass.Valu32)
9707ec681f3Smrgopcode("v_dot4_u32_u8", -1, 0x29, 0x17, Format.VOP3P, InstrClass.Valu32)
9717ec681f3Smrg
9727ec681f3Smrg
9737ec681f3Smrg# VINTERP instructions:
9747ec681f3SmrgVINTRP = {
9757ec681f3Smrg   (0x00, "v_interp_p1_f32"),
9767ec681f3Smrg   (0x01, "v_interp_p2_f32"),
9777ec681f3Smrg   (0x02, "v_interp_mov_f32"),
9787ec681f3Smrg}
9797ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
9807ec681f3Smrgfor (code, name) in VINTRP:
9817ec681f3Smrg   opcode(name, code, code, code, Format.VINTRP, InstrClass.Valu32)
9827ec681f3Smrg
9837ec681f3Smrg# VOP3 instructions: 3 inputs, 1 output
9847ec681f3Smrg# VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out
9857ec681f3SmrgVOP3 = {
9867ec681f3Smrg   (0x140, 0x140, 0x1c0, 0x1c0, 0x140, "v_mad_legacy_f32", True, True), # GFX6-GFX10
9877ec681f3Smrg   (0x141, 0x141, 0x1c1, 0x1c1, 0x141, "v_mad_f32", True, True),
9887ec681f3Smrg   (0x142, 0x142, 0x1c2, 0x1c2, 0x142, "v_mad_i32_i24", False, False),
9897ec681f3Smrg   (0x143, 0x143, 0x1c3, 0x1c3, 0x143, "v_mad_u32_u24", False, False),
9907ec681f3Smrg   (0x144, 0x144, 0x1c4, 0x1c4, 0x144, "v_cubeid_f32", True, True),
9917ec681f3Smrg   (0x145, 0x145, 0x1c5, 0x1c5, 0x145, "v_cubesc_f32", True, True),
9927ec681f3Smrg   (0x146, 0x146, 0x1c6, 0x1c6, 0x146, "v_cubetc_f32", True, True),
9937ec681f3Smrg   (0x147, 0x147, 0x1c7, 0x1c7, 0x147, "v_cubema_f32", True, True),
9947ec681f3Smrg   (0x148, 0x148, 0x1c8, 0x1c8, 0x148, "v_bfe_u32", False, False),
9957ec681f3Smrg   (0x149, 0x149, 0x1c9, 0x1c9, 0x149, "v_bfe_i32", False, False),
9967ec681f3Smrg   (0x14a, 0x14a, 0x1ca, 0x1ca, 0x14a, "v_bfi_b32", False, False),
9977ec681f3Smrg   (0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, "v_fma_f32", True, True, InstrClass.ValuFma),
9987ec681f3Smrg   (0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, "v_fma_f64", True, True, InstrClass.ValuDouble),
9997ec681f3Smrg   (0x14d, 0x14d, 0x1cd, 0x1cd, 0x14d, "v_lerp_u8", False, False),
10007ec681f3Smrg   (0x14e, 0x14e, 0x1ce, 0x1ce, 0x14e, "v_alignbit_b32", False, False),
10017ec681f3Smrg   (0x14f, 0x14f, 0x1cf, 0x1cf, 0x14f, "v_alignbyte_b32", False, False),
10027ec681f3Smrg   (0x150, 0x150,    -1,    -1, 0x150, "v_mullit_f32", True, True),
10037ec681f3Smrg   (0x151, 0x151, 0x1d0, 0x1d0, 0x151, "v_min3_f32", True, True),
10047ec681f3Smrg   (0x152, 0x152, 0x1d1, 0x1d1, 0x152, "v_min3_i32", False, False),
10057ec681f3Smrg   (0x153, 0x153, 0x1d2, 0x1d2, 0x153, "v_min3_u32", False, False),
10067ec681f3Smrg   (0x154, 0x154, 0x1d3, 0x1d3, 0x154, "v_max3_f32", True, True),
10077ec681f3Smrg   (0x155, 0x155, 0x1d4, 0x1d4, 0x155, "v_max3_i32", False, False),
10087ec681f3Smrg   (0x156, 0x156, 0x1d5, 0x1d5, 0x156, "v_max3_u32", False, False),
10097ec681f3Smrg   (0x157, 0x157, 0x1d6, 0x1d6, 0x157, "v_med3_f32", True, True),
10107ec681f3Smrg   (0x158, 0x158, 0x1d7, 0x1d7, 0x158, "v_med3_i32", False, False),
10117ec681f3Smrg   (0x159, 0x159, 0x1d8, 0x1d8, 0x159, "v_med3_u32", False, False),
10127ec681f3Smrg   (0x15a, 0x15a, 0x1d9, 0x1d9, 0x15a, "v_sad_u8", False, False),
10137ec681f3Smrg   (0x15b, 0x15b, 0x1da, 0x1da, 0x15b, "v_sad_hi_u8", False, False),
10147ec681f3Smrg   (0x15c, 0x15c, 0x1db, 0x1db, 0x15c, "v_sad_u16", False, False),
10157ec681f3Smrg   (0x15d, 0x15d, 0x1dc, 0x1dc, 0x15d, "v_sad_u32", False, False),
10167ec681f3Smrg   (0x15e, 0x15e, 0x1dd, 0x1dd, 0x15e, "v_cvt_pk_u8_f32", True, False),
10177ec681f3Smrg   (0x15f, 0x15f, 0x1de, 0x1de, 0x15f, "v_div_fixup_f32", True, True),
10187ec681f3Smrg   (0x160, 0x160, 0x1df, 0x1df, 0x160, "v_div_fixup_f64", True, True),
10197ec681f3Smrg   (0x161, 0x161,    -1,    -1,    -1, "v_lshl_b64", False, False, InstrClass.Valu64),
10207ec681f3Smrg   (0x162, 0x162,    -1,    -1,    -1, "v_lshr_b64", False, False, InstrClass.Valu64),
10217ec681f3Smrg   (0x163, 0x163,    -1,    -1,    -1, "v_ashr_i64", False, False, InstrClass.Valu64),
10227ec681f3Smrg   (0x164, 0x164, 0x280, 0x280, 0x164, "v_add_f64", True, True, InstrClass.ValuDoubleAdd),
10237ec681f3Smrg   (0x165, 0x165, 0x281, 0x281, 0x165, "v_mul_f64", True, True, InstrClass.ValuDouble),
10247ec681f3Smrg   (0x166, 0x166, 0x282, 0x282, 0x166, "v_min_f64", True, True, InstrClass.ValuDouble),
10257ec681f3Smrg   (0x167, 0x167, 0x283, 0x283, 0x167, "v_max_f64", True, True, InstrClass.ValuDouble),
10267ec681f3Smrg   (0x168, 0x168, 0x284, 0x284, 0x168, "v_ldexp_f64", False, True, InstrClass.ValuDouble), # src1 can take input modifiers
10277ec681f3Smrg   (0x169, 0x169, 0x285, 0x285, 0x169, "v_mul_lo_u32", False, False, InstrClass.ValuQuarterRate32),
10287ec681f3Smrg   (0x16a, 0x16a, 0x286, 0x286, 0x16a, "v_mul_hi_u32", False, False, InstrClass.ValuQuarterRate32),
10297ec681f3Smrg   (0x16b, 0x16b, 0x285, 0x285, 0x16b, "v_mul_lo_i32", False, False, InstrClass.ValuQuarterRate32), # identical to v_mul_lo_u32
10307ec681f3Smrg   (0x16c, 0x16c, 0x287, 0x287, 0x16c, "v_mul_hi_i32", False, False, InstrClass.ValuQuarterRate32),
10317ec681f3Smrg   (0x16d, 0x16d, 0x1e0, 0x1e0, 0x16d, "v_div_scale_f32", True, True), # writes to VCC
10327ec681f3Smrg   (0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, "v_div_scale_f64", True, True, InstrClass.ValuDouble), # writes to VCC
10337ec681f3Smrg   (0x16f, 0x16f, 0x1e2, 0x1e2, 0x16f, "v_div_fmas_f32", True, True), # takes VCC input
10347ec681f3Smrg   (0x170, 0x170, 0x1e3, 0x1e3, 0x170, "v_div_fmas_f64", True, True, InstrClass.ValuDouble), # takes VCC input
10357ec681f3Smrg   (0x171, 0x171, 0x1e4, 0x1e4, 0x171, "v_msad_u8", False, False),
10367ec681f3Smrg   (0x172, 0x172, 0x1e5, 0x1e5, 0x172, "v_qsad_pk_u16_u8", False, False),
10377ec681f3Smrg   (0x172,    -1,    -1,    -1,    -1, "v_qsad_u8", False, False), # what's the difference?
10387ec681f3Smrg   (0x173, 0x173, 0x1e6, 0x1e6, 0x173, "v_mqsad_pk_u16_u8", False, False),
10397ec681f3Smrg   (0x173,    -1,    -1,    -1,    -1, "v_mqsad_u8", False, False), # what's the difference?
10407ec681f3Smrg   (0x174, 0x174, 0x292, 0x292, 0x174, "v_trig_preop_f64", False, False, InstrClass.ValuDouble),
10417ec681f3Smrg   (   -1, 0x175, 0x1e7, 0x1e7, 0x175, "v_mqsad_u32_u8", False, False),
10427ec681f3Smrg   (   -1, 0x176, 0x1e8, 0x1e8, 0x176, "v_mad_u64_u32", False, False, InstrClass.Valu64),
10437ec681f3Smrg   (   -1, 0x177, 0x1e9, 0x1e9, 0x177, "v_mad_i64_i32", False, False, InstrClass.Valu64),
10447ec681f3Smrg   (   -1,    -1, 0x1ea, 0x1ea,    -1, "v_mad_legacy_f16", True, True),
10457ec681f3Smrg   (   -1,    -1, 0x1eb, 0x1eb,    -1, "v_mad_legacy_u16", False, False),
10467ec681f3Smrg   (   -1,    -1, 0x1ec, 0x1ec,    -1, "v_mad_legacy_i16", False, False),
10477ec681f3Smrg   (   -1,    -1, 0x1ed, 0x1ed, 0x344, "v_perm_b32", False, False),
10487ec681f3Smrg   (   -1,    -1, 0x1ee, 0x1ee,    -1, "v_fma_legacy_f16", True, True, InstrClass.ValuFma),
10497ec681f3Smrg   (   -1,    -1, 0x1ef, 0x1ef,    -1, "v_div_fixup_legacy_f16", True, True),
10507ec681f3Smrg   (0x12c, 0x12c, 0x1f0, 0x1f0,    -1, "v_cvt_pkaccum_u8_f32", True, False),
10517ec681f3Smrg   (   -1,    -1,    -1, 0x1f1, 0x373, "v_mad_u32_u16", False, False),
10527ec681f3Smrg   (   -1,    -1,    -1, 0x1f2, 0x375, "v_mad_i32_i16", False, False),
10537ec681f3Smrg   (   -1,    -1,    -1, 0x1f3, 0x345, "v_xad_u32", False, False),
10547ec681f3Smrg   (   -1,    -1,    -1, 0x1f4, 0x351, "v_min3_f16", True, True),
10557ec681f3Smrg   (   -1,    -1,    -1, 0x1f5, 0x352, "v_min3_i16", False, False),
10567ec681f3Smrg   (   -1,    -1,    -1, 0x1f6, 0x353, "v_min3_u16", False, False),
10577ec681f3Smrg   (   -1,    -1,    -1, 0x1f7, 0x354, "v_max3_f16", True, True),
10587ec681f3Smrg   (   -1,    -1,    -1, 0x1f8, 0x355, "v_max3_i16", False, False),
10597ec681f3Smrg   (   -1,    -1,    -1, 0x1f9, 0x356, "v_max3_u16", False, False),
10607ec681f3Smrg   (   -1,    -1,    -1, 0x1fa, 0x357, "v_med3_f16", True, True),
10617ec681f3Smrg   (   -1,    -1,    -1, 0x1fb, 0x358, "v_med3_i16", False, False),
10627ec681f3Smrg   (   -1,    -1,    -1, 0x1fc, 0x359, "v_med3_u16", False, False),
10637ec681f3Smrg   (   -1,    -1,    -1, 0x1fd, 0x346, "v_lshl_add_u32", False, False),
10647ec681f3Smrg   (   -1,    -1,    -1, 0x1fe, 0x347, "v_add_lshl_u32", False, False),
10657ec681f3Smrg   (   -1,    -1,    -1, 0x1ff, 0x36d, "v_add3_u32", False, False),
10667ec681f3Smrg   (   -1,    -1,    -1, 0x200, 0x36f, "v_lshl_or_b32", False, False),
10677ec681f3Smrg   (   -1,    -1,    -1, 0x201, 0x371, "v_and_or_b32", False, False),
10687ec681f3Smrg   (   -1,    -1,    -1, 0x202, 0x372, "v_or3_b32", False, False),
10697ec681f3Smrg   (   -1,    -1,    -1, 0x203,    -1, "v_mad_f16", True, True),
10707ec681f3Smrg   (   -1,    -1,    -1, 0x204, 0x340, "v_mad_u16", False, False),
10717ec681f3Smrg   (   -1,    -1,    -1, 0x205, 0x35e, "v_mad_i16", False, False),
10727ec681f3Smrg   (   -1,    -1,    -1, 0x206, 0x34b, "v_fma_f16", True, True),
10737ec681f3Smrg   (   -1,    -1,    -1, 0x207, 0x35f, "v_div_fixup_f16", True, True),
10747ec681f3Smrg   (   -1,    -1, 0x274, 0x274, 0x342, "v_interp_p1ll_f16", True, True),
10757ec681f3Smrg   (   -1,    -1, 0x275, 0x275, 0x343, "v_interp_p1lv_f16", True, True),
10767ec681f3Smrg   (   -1,    -1, 0x276, 0x276,    -1, "v_interp_p2_legacy_f16", True, True),
10777ec681f3Smrg   (   -1,    -1,    -1, 0x277, 0x35a, "v_interp_p2_f16", True, True),
10787ec681f3Smrg   (0x12b, 0x12b, 0x288, 0x288, 0x362, "v_ldexp_f32", False, True),
10797ec681f3Smrg   (   -1,    -1, 0x289, 0x289, 0x360, "v_readlane_b32_e64", False, False),
10807ec681f3Smrg   (   -1,    -1, 0x28a, 0x28a, 0x361, "v_writelane_b32_e64", False, False),
10817ec681f3Smrg   (0x122, 0x122, 0x28b, 0x28b, 0x364, "v_bcnt_u32_b32", False, False),
10827ec681f3Smrg   (0x123, 0x123, 0x28c, 0x28c, 0x365, "v_mbcnt_lo_u32_b32", False, False),
10837ec681f3Smrg   (   -1,    -1, 0x28d, 0x28d, 0x366, "v_mbcnt_hi_u32_b32_e64", False, False),
10847ec681f3Smrg   (   -1,    -1, 0x28f, 0x28f, 0x2ff, "v_lshlrev_b64", False, False, InstrClass.Valu64),
10857ec681f3Smrg   (   -1,    -1, 0x290, 0x290, 0x300, "v_lshrrev_b64", False, False, InstrClass.Valu64),
10867ec681f3Smrg   (   -1,    -1, 0x291, 0x291, 0x301, "v_ashrrev_i64", False, False, InstrClass.Valu64),
10877ec681f3Smrg   (0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False),
10887ec681f3Smrg   (0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False),
10897ec681f3Smrg   (0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False),
10907ec681f3Smrg   (0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32_e64", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f
10917ec681f3Smrg   (0x130, 0x130, 0x297, 0x297, 0x36a, "v_cvt_pk_u16_u32", False, False),
10927ec681f3Smrg   (0x131, 0x131, 0x298, 0x298, 0x36b, "v_cvt_pk_i16_i32", False, False),
10937ec681f3Smrg   (   -1,    -1,    -1, 0x299, 0x312, "v_cvt_pknorm_i16_f16", True, False),
10947ec681f3Smrg   (   -1,    -1,    -1, 0x29a, 0x313, "v_cvt_pknorm_u16_f16", True, False),
10957ec681f3Smrg   (   -1,    -1,    -1, 0x29c, 0x37f, "v_add_i32", False, False),
10967ec681f3Smrg   (   -1,    -1,    -1, 0x29d, 0x376, "v_sub_i32", False, False),
10977ec681f3Smrg   (   -1,    -1,    -1, 0x29e, 0x30d, "v_add_i16", False, False),
10987ec681f3Smrg   (   -1,    -1,    -1, 0x29f, 0x30e, "v_sub_i16", False, False),
10997ec681f3Smrg   (   -1,    -1,    -1, 0x2a0, 0x311, "v_pack_b32_f16", True, False),
11007ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x178, "v_xor3_b32", False, False),
11017ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x377, "v_permlane16_b32", False, False),
11027ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x378, "v_permlanex16_b32", False, False),
11037ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x30f, "v_add_co_u32_e64", False, False),
11047ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x310, "v_sub_co_u32_e64", False, False),
11057ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x319, "v_subrev_co_u32_e64", False, False),
11067ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x303, "v_add_u16_e64", False, False),
11077ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x304, "v_sub_u16_e64", False, False),
11087ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x305, "v_mul_lo_u16_e64", False, False),
11097ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x309, "v_max_u16_e64", False, False),
11107ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x30a, "v_max_i16_e64", False, False),
11117ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x30b, "v_min_u16_e64", False, False),
11127ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x30c, "v_min_i16_e64", False, False),
11137ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x307, "v_lshrrev_b16_e64", False, False),
11147ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x308, "v_ashrrev_i16_e64", False, False),
11157ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x314, "v_lshlrev_b16_e64", False, False),
11167ec681f3Smrg   (   -1,    -1,    -1,    -1, 0x140, "v_fma_legacy_f32", True, True, InstrClass.ValuFma), #GFX10.3+
11177ec681f3Smrg}
11187ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP3, InstrClass.Valu32):
11197ec681f3Smrg   opcode(name, gfx7, gfx9, gfx10, Format.VOP3, cls, in_mod, out_mod)
11207ec681f3Smrg
11217ec681f3Smrg
11227ec681f3Smrg# DS instructions: 3 inputs (1 addr, 2 data), 1 output
11237ec681f3SmrgDS = {
11247ec681f3Smrg   (0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"),
11257ec681f3Smrg   (0x01, 0x01, 0x01, 0x01, 0x01, "ds_sub_u32"),
11267ec681f3Smrg   (0x02, 0x02, 0x02, 0x02, 0x02, "ds_rsub_u32"),
11277ec681f3Smrg   (0x03, 0x03, 0x03, 0x03, 0x03, "ds_inc_u32"),
11287ec681f3Smrg   (0x04, 0x04, 0x04, 0x04, 0x04, "ds_dec_u32"),
11297ec681f3Smrg   (0x05, 0x05, 0x05, 0x05, 0x05, "ds_min_i32"),
11307ec681f3Smrg   (0x06, 0x06, 0x06, 0x06, 0x06, "ds_max_i32"),
11317ec681f3Smrg   (0x07, 0x07, 0x07, 0x07, 0x07, "ds_min_u32"),
11327ec681f3Smrg   (0x08, 0x08, 0x08, 0x08, 0x08, "ds_max_u32"),
11337ec681f3Smrg   (0x09, 0x09, 0x09, 0x09, 0x09, "ds_and_b32"),
11347ec681f3Smrg   (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "ds_or_b32"),
11357ec681f3Smrg   (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "ds_xor_b32"),
11367ec681f3Smrg   (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "ds_mskor_b32"),
11377ec681f3Smrg   (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "ds_write_b32"),
11387ec681f3Smrg   (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "ds_write2_b32"),
11397ec681f3Smrg   (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "ds_write2st64_b32"),
11407ec681f3Smrg   (0x10, 0x10, 0x10, 0x10, 0x10, "ds_cmpst_b32"),
11417ec681f3Smrg   (0x11, 0x11, 0x11, 0x11, 0x11, "ds_cmpst_f32"),
11427ec681f3Smrg   (0x12, 0x12, 0x12, 0x12, 0x12, "ds_min_f32"),
11437ec681f3Smrg   (0x13, 0x13, 0x13, 0x13, 0x13, "ds_max_f32"),
11447ec681f3Smrg   (  -1, 0x14, 0x14, 0x14, 0x14, "ds_nop"),
11457ec681f3Smrg   (  -1,   -1, 0x15, 0x15, 0x15, "ds_add_f32"),
11467ec681f3Smrg   (  -1,   -1, 0x1d, 0x1d, 0xb0, "ds_write_addtid_b32"),
11477ec681f3Smrg   (0x1e, 0x1e, 0x1e, 0x1e, 0x1e, "ds_write_b8"),
11487ec681f3Smrg   (0x1f, 0x1f, 0x1f, 0x1f, 0x1f, "ds_write_b16"),
11497ec681f3Smrg   (0x20, 0x20, 0x20, 0x20, 0x20, "ds_add_rtn_u32"),
11507ec681f3Smrg   (0x21, 0x21, 0x21, 0x21, 0x21, "ds_sub_rtn_u32"),
11517ec681f3Smrg   (0x22, 0x22, 0x22, 0x22, 0x22, "ds_rsub_rtn_u32"),
11527ec681f3Smrg   (0x23, 0x23, 0x23, 0x23, 0x23, "ds_inc_rtn_u32"),
11537ec681f3Smrg   (0x24, 0x24, 0x24, 0x24, 0x24, "ds_dec_rtn_u32"),
11547ec681f3Smrg   (0x25, 0x25, 0x25, 0x25, 0x25, "ds_min_rtn_i32"),
11557ec681f3Smrg   (0x26, 0x26, 0x26, 0x26, 0x26, "ds_max_rtn_i32"),
11567ec681f3Smrg   (0x27, 0x27, 0x27, 0x27, 0x27, "ds_min_rtn_u32"),
11577ec681f3Smrg   (0x28, 0x28, 0x28, 0x28, 0x28, "ds_max_rtn_u32"),
11587ec681f3Smrg   (0x29, 0x29, 0x29, 0x29, 0x29, "ds_and_rtn_b32"),
11597ec681f3Smrg   (0x2a, 0x2a, 0x2a, 0x2a, 0x2a, "ds_or_rtn_b32"),
11607ec681f3Smrg   (0x2b, 0x2b, 0x2b, 0x2b, 0x2b, "ds_xor_rtn_b32"),
11617ec681f3Smrg   (0x2c, 0x2c, 0x2c, 0x2c, 0x2c, "ds_mskor_rtn_b32"),
11627ec681f3Smrg   (0x2d, 0x2d, 0x2d, 0x2d, 0x2d, "ds_wrxchg_rtn_b32"),
11637ec681f3Smrg   (0x2e, 0x2e, 0x2e, 0x2e, 0x2e, "ds_wrxchg2_rtn_b32"),
11647ec681f3Smrg   (0x2f, 0x2f, 0x2f, 0x2f, 0x2f, "ds_wrxchg2st64_rtn_b32"),
11657ec681f3Smrg   (0x30, 0x30, 0x30, 0x30, 0x30, "ds_cmpst_rtn_b32"),
11667ec681f3Smrg   (0x31, 0x31, 0x31, 0x31, 0x31, "ds_cmpst_rtn_f32"),
11677ec681f3Smrg   (0x32, 0x32, 0x32, 0x32, 0x32, "ds_min_rtn_f32"),
11687ec681f3Smrg   (0x33, 0x33, 0x33, 0x33, 0x33, "ds_max_rtn_f32"),
11697ec681f3Smrg   (  -1, 0x34, 0x34, 0x34, 0x34, "ds_wrap_rtn_b32"),
11707ec681f3Smrg   (  -1,   -1, 0x35, 0x35, 0x55, "ds_add_rtn_f32"),
11717ec681f3Smrg   (0x36, 0x36, 0x36, 0x36, 0x36, "ds_read_b32"),
11727ec681f3Smrg   (0x37, 0x37, 0x37, 0x37, 0x37, "ds_read2_b32"),
11737ec681f3Smrg   (0x38, 0x38, 0x38, 0x38, 0x38, "ds_read2st64_b32"),
11747ec681f3Smrg   (0x39, 0x39, 0x39, 0x39, 0x39, "ds_read_i8"),
11757ec681f3Smrg   (0x3a, 0x3a, 0x3a, 0x3a, 0x3a, "ds_read_u8"),
11767ec681f3Smrg   (0x3b, 0x3b, 0x3b, 0x3b, 0x3b, "ds_read_i16"),
11777ec681f3Smrg   (0x3c, 0x3c, 0x3c, 0x3c, 0x3c, "ds_read_u16"),
11787ec681f3Smrg   (0x35, 0x35, 0x3d, 0x3d, 0x35, "ds_swizzle_b32"), #data1 & offset, no addr/data2
11797ec681f3Smrg   (  -1,   -1, 0x3e, 0x3e, 0xb2, "ds_permute_b32"),
11807ec681f3Smrg   (  -1,   -1, 0x3f, 0x3f, 0xb3, "ds_bpermute_b32"),
11817ec681f3Smrg   (0x40, 0x40, 0x40, 0x40, 0x40, "ds_add_u64"),
11827ec681f3Smrg   (0x41, 0x41, 0x41, 0x41, 0x41, "ds_sub_u64"),
11837ec681f3Smrg   (0x42, 0x42, 0x42, 0x42, 0x42, "ds_rsub_u64"),
11847ec681f3Smrg   (0x43, 0x43, 0x43, 0x43, 0x43, "ds_inc_u64"),
11857ec681f3Smrg   (0x44, 0x44, 0x44, 0x44, 0x44, "ds_dec_u64"),
11867ec681f3Smrg   (0x45, 0x45, 0x45, 0x45, 0x45, "ds_min_i64"),
11877ec681f3Smrg   (0x46, 0x46, 0x46, 0x46, 0x46, "ds_max_i64"),
11887ec681f3Smrg   (0x47, 0x47, 0x47, 0x47, 0x47, "ds_min_u64"),
11897ec681f3Smrg   (0x48, 0x48, 0x48, 0x48, 0x48, "ds_max_u64"),
11907ec681f3Smrg   (0x49, 0x49, 0x49, 0x49, 0x49, "ds_and_b64"),
11917ec681f3Smrg   (0x4a, 0x4a, 0x4a, 0x4a, 0x4a, "ds_or_b64"),
11927ec681f3Smrg   (0x4b, 0x4b, 0x4b, 0x4b, 0x4b, "ds_xor_b64"),
11937ec681f3Smrg   (0x4c, 0x4c, 0x4c, 0x4c, 0x4c, "ds_mskor_b64"),
11947ec681f3Smrg   (0x4d, 0x4d, 0x4d, 0x4d, 0x4d, "ds_write_b64"),
11957ec681f3Smrg   (0x4e, 0x4e, 0x4e, 0x4e, 0x4e, "ds_write2_b64"),
11967ec681f3Smrg   (0x4f, 0x4f, 0x4f, 0x4f, 0x4f, "ds_write2st64_b64"),
11977ec681f3Smrg   (0x50, 0x50, 0x50, 0x50, 0x50, "ds_cmpst_b64"),
11987ec681f3Smrg   (0x51, 0x51, 0x51, 0x51, 0x51, "ds_cmpst_f64"),
11997ec681f3Smrg   (0x52, 0x52, 0x52, 0x52, 0x52, "ds_min_f64"),
12007ec681f3Smrg   (0x53, 0x53, 0x53, 0x53, 0x53, "ds_max_f64"),
12017ec681f3Smrg   (  -1,   -1,   -1, 0x54, 0xa0, "ds_write_b8_d16_hi"),
12027ec681f3Smrg   (  -1,   -1,   -1, 0x55, 0xa1, "ds_write_b16_d16_hi"),
12037ec681f3Smrg   (  -1,   -1,   -1, 0x56, 0xa2, "ds_read_u8_d16"),
12047ec681f3Smrg   (  -1,   -1,   -1, 0x57, 0xa3, "ds_read_u8_d16_hi"),
12057ec681f3Smrg   (  -1,   -1,   -1, 0x58, 0xa4, "ds_read_i8_d16"),
12067ec681f3Smrg   (  -1,   -1,   -1, 0x59, 0xa5, "ds_read_i8_d16_hi"),
12077ec681f3Smrg   (  -1,   -1,   -1, 0x5a, 0xa6, "ds_read_u16_d16"),
12087ec681f3Smrg   (  -1,   -1,   -1, 0x5b, 0xa7, "ds_read_u16_d16_hi"),
12097ec681f3Smrg   (0x60, 0x60, 0x60, 0x60, 0x60, "ds_add_rtn_u64"),
12107ec681f3Smrg   (0x61, 0x61, 0x61, 0x61, 0x61, "ds_sub_rtn_u64"),
12117ec681f3Smrg   (0x62, 0x62, 0x62, 0x62, 0x62, "ds_rsub_rtn_u64"),
12127ec681f3Smrg   (0x63, 0x63, 0x63, 0x63, 0x63, "ds_inc_rtn_u64"),
12137ec681f3Smrg   (0x64, 0x64, 0x64, 0x64, 0x64, "ds_dec_rtn_u64"),
12147ec681f3Smrg   (0x65, 0x65, 0x65, 0x65, 0x65, "ds_min_rtn_i64"),
12157ec681f3Smrg   (0x66, 0x66, 0x66, 0x66, 0x66, "ds_max_rtn_i64"),
12167ec681f3Smrg   (0x67, 0x67, 0x67, 0x67, 0x67, "ds_min_rtn_u64"),
12177ec681f3Smrg   (0x68, 0x68, 0x68, 0x68, 0x68, "ds_max_rtn_u64"),
12187ec681f3Smrg   (0x69, 0x69, 0x69, 0x69, 0x69, "ds_and_rtn_b64"),
12197ec681f3Smrg   (0x6a, 0x6a, 0x6a, 0x6a, 0x6a, "ds_or_rtn_b64"),
12207ec681f3Smrg   (0x6b, 0x6b, 0x6b, 0x6b, 0x6b, "ds_xor_rtn_b64"),
12217ec681f3Smrg   (0x6c, 0x6c, 0x6c, 0x6c, 0x6c, "ds_mskor_rtn_b64"),
12227ec681f3Smrg   (0x6d, 0x6d, 0x6d, 0x6d, 0x6d, "ds_wrxchg_rtn_b64"),
12237ec681f3Smrg   (0x6e, 0x6e, 0x6e, 0x6e, 0x6e, "ds_wrxchg2_rtn_b64"),
12247ec681f3Smrg   (0x6f, 0x6f, 0x6f, 0x6f, 0x6f, "ds_wrxchg2st64_rtn_b64"),
12257ec681f3Smrg   (0x70, 0x70, 0x70, 0x70, 0x70, "ds_cmpst_rtn_b64"),
12267ec681f3Smrg   (0x71, 0x71, 0x71, 0x71, 0x71, "ds_cmpst_rtn_f64"),
12277ec681f3Smrg   (0x72, 0x72, 0x72, 0x72, 0x72, "ds_min_rtn_f64"),
12287ec681f3Smrg   (0x73, 0x73, 0x73, 0x73, 0x73, "ds_max_rtn_f64"),
12297ec681f3Smrg   (0x76, 0x76, 0x76, 0x76, 0x76, "ds_read_b64"),
12307ec681f3Smrg   (0x77, 0x77, 0x77, 0x77, 0x77, "ds_read2_b64"),
12317ec681f3Smrg   (0x78, 0x78, 0x78, 0x78, 0x78, "ds_read2st64_b64"),
12327ec681f3Smrg   (  -1, 0x7e, 0x7e, 0x7e, 0x7e, "ds_condxchg32_rtn_b64"),
12337ec681f3Smrg   (0x80, 0x80, 0x80, 0x80, 0x80, "ds_add_src2_u32"),
12347ec681f3Smrg   (0x81, 0x81, 0x81, 0x81, 0x81, "ds_sub_src2_u32"),
12357ec681f3Smrg   (0x82, 0x82, 0x82, 0x82, 0x82, "ds_rsub_src2_u32"),
12367ec681f3Smrg   (0x83, 0x83, 0x83, 0x83, 0x83, "ds_inc_src2_u32"),
12377ec681f3Smrg   (0x84, 0x84, 0x84, 0x84, 0x84, "ds_dec_src2_u32"),
12387ec681f3Smrg   (0x85, 0x85, 0x85, 0x85, 0x85, "ds_min_src2_i32"),
12397ec681f3Smrg   (0x86, 0x86, 0x86, 0x86, 0x86, "ds_max_src2_i32"),
12407ec681f3Smrg   (0x87, 0x87, 0x87, 0x87, 0x87, "ds_min_src2_u32"),
12417ec681f3Smrg   (0x88, 0x88, 0x88, 0x88, 0x88, "ds_max_src2_u32"),
12427ec681f3Smrg   (0x89, 0x89, 0x89, 0x89, 0x89, "ds_and_src2_b32"),
12437ec681f3Smrg   (0x8a, 0x8a, 0x8a, 0x8a, 0x8a, "ds_or_src2_b32"),
12447ec681f3Smrg   (0x8b, 0x8b, 0x8b, 0x8b, 0x8b, "ds_xor_src2_b32"),
12457ec681f3Smrg   (0x8d, 0x8d, 0x8d, 0x8d, 0x8d, "ds_write_src2_b32"),
12467ec681f3Smrg   (0x92, 0x92, 0x92, 0x92, 0x92, "ds_min_src2_f32"),
12477ec681f3Smrg   (0x93, 0x93, 0x93, 0x93, 0x93, "ds_max_src2_f32"),
12487ec681f3Smrg   (  -1,   -1, 0x95, 0x95, 0x95, "ds_add_src2_f32"),
12497ec681f3Smrg   (  -1, 0x18, 0x98, 0x98, 0x18, "ds_gws_sema_release_all"),
12507ec681f3Smrg   (0x19, 0x19, 0x99, 0x99, 0x19, "ds_gws_init"),
12517ec681f3Smrg   (0x1a, 0x1a, 0x9a, 0x9a, 0x1a, "ds_gws_sema_v"),
12527ec681f3Smrg   (0x1b, 0x1b, 0x9b, 0x9b, 0x1b, "ds_gws_sema_br"),
12537ec681f3Smrg   (0x1c, 0x1c, 0x9c, 0x9c, 0x1c, "ds_gws_sema_p"),
12547ec681f3Smrg   (0x1d, 0x1d, 0x9d, 0x9d, 0x1d, "ds_gws_barrier"),
12557ec681f3Smrg   (  -1,   -1, 0xb6, 0xb6, 0xb1, "ds_read_addtid_b32"),
12567ec681f3Smrg   (0x3d, 0x3d, 0xbd, 0xbd, 0x3d, "ds_consume"),
12577ec681f3Smrg   (0x3e, 0x3e, 0xbe, 0xbe, 0x3e, "ds_append"),
12587ec681f3Smrg   (0x3f, 0x3f, 0xbf, 0xbf, 0x3f, "ds_ordered_count"),
12597ec681f3Smrg   (0xc0, 0xc0, 0xc0, 0xc0, 0xc0, "ds_add_src2_u64"),
12607ec681f3Smrg   (0xc1, 0xc1, 0xc1, 0xc1, 0xc1, "ds_sub_src2_u64"),
12617ec681f3Smrg   (0xc2, 0xc2, 0xc2, 0xc2, 0xc2, "ds_rsub_src2_u64"),
12627ec681f3Smrg   (0xc3, 0xc3, 0xc3, 0xc3, 0xc3, "ds_inc_src2_u64"),
12637ec681f3Smrg   (0xc4, 0xc4, 0xc4, 0xc4, 0xc4, "ds_dec_src2_u64"),
12647ec681f3Smrg   (0xc5, 0xc5, 0xc5, 0xc5, 0xc5, "ds_min_src2_i64"),
12657ec681f3Smrg   (0xc6, 0xc6, 0xc6, 0xc6, 0xc6, "ds_max_src2_i64"),
12667ec681f3Smrg   (0xc7, 0xc7, 0xc7, 0xc7, 0xc7, "ds_min_src2_u64"),
12677ec681f3Smrg   (0xc8, 0xc8, 0xc8, 0xc8, 0xc8, "ds_max_src2_u64"),
12687ec681f3Smrg   (0xc9, 0xc9, 0xc9, 0xc9, 0xc9, "ds_and_src2_b64"),
12697ec681f3Smrg   (0xca, 0xca, 0xca, 0xca, 0xca, "ds_or_src2_b64"),
12707ec681f3Smrg   (0xcb, 0xcb, 0xcb, 0xcb, 0xcb, "ds_xor_src2_b64"),
12717ec681f3Smrg   (0xcd, 0xcd, 0xcd, 0xcd, 0xcd, "ds_write_src2_b64"),
12727ec681f3Smrg   (0xd2, 0xd2, 0xd2, 0xd2, 0xd2, "ds_min_src2_f64"),
12737ec681f3Smrg   (0xd3, 0xd3, 0xd3, 0xd3, 0xd3, "ds_max_src2_f64"),
12747ec681f3Smrg   (  -1, 0xde, 0xde, 0xde, 0xde, "ds_write_b96"),
12757ec681f3Smrg   (  -1, 0xdf, 0xdf, 0xdf, 0xdf, "ds_write_b128"),
12767ec681f3Smrg   (  -1, 0xfd, 0xfd,   -1,   -1, "ds_condxchg32_rtn_b128"),
12777ec681f3Smrg   (  -1, 0xfe, 0xfe, 0xfe, 0xfe, "ds_read_b96"),
12787ec681f3Smrg   (  -1, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"),
12797ec681f3Smrg}
12807ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name) in DS:
12817ec681f3Smrg    opcode(name, gfx7, gfx9, gfx10, Format.DS, InstrClass.DS)
12827ec681f3Smrg
12837ec681f3Smrg# MUBUF instructions:
12847ec681f3SmrgMUBUF = {
12857ec681f3Smrg   (0x00, 0x00, 0x00, 0x00, 0x00, "buffer_load_format_x"),
12867ec681f3Smrg   (0x01, 0x01, 0x01, 0x01, 0x01, "buffer_load_format_xy"),
12877ec681f3Smrg   (0x02, 0x02, 0x02, 0x02, 0x02, "buffer_load_format_xyz"),
12887ec681f3Smrg   (0x03, 0x03, 0x03, 0x03, 0x03, "buffer_load_format_xyzw"),
12897ec681f3Smrg   (0x04, 0x04, 0x04, 0x04, 0x04, "buffer_store_format_x"),
12907ec681f3Smrg   (0x05, 0x05, 0x05, 0x05, 0x05, "buffer_store_format_xy"),
12917ec681f3Smrg   (0x06, 0x06, 0x06, 0x06, 0x06, "buffer_store_format_xyz"),
12927ec681f3Smrg   (0x07, 0x07, 0x07, 0x07, 0x07, "buffer_store_format_xyzw"),
12937ec681f3Smrg   (  -1,   -1, 0x08, 0x08, 0x80, "buffer_load_format_d16_x"),
12947ec681f3Smrg   (  -1,   -1, 0x09, 0x09, 0x81, "buffer_load_format_d16_xy"),
12957ec681f3Smrg   (  -1,   -1, 0x0a, 0x0a, 0x82, "buffer_load_format_d16_xyz"),
12967ec681f3Smrg   (  -1,   -1, 0x0b, 0x0b, 0x83, "buffer_load_format_d16_xyzw"),
12977ec681f3Smrg   (  -1,   -1, 0x0c, 0x0c, 0x84, "buffer_store_format_d16_x"),
12987ec681f3Smrg   (  -1,   -1, 0x0d, 0x0d, 0x85, "buffer_store_format_d16_xy"),
12997ec681f3Smrg   (  -1,   -1, 0x0e, 0x0e, 0x86, "buffer_store_format_d16_xyz"),
13007ec681f3Smrg   (  -1,   -1, 0x0f, 0x0f, 0x87, "buffer_store_format_d16_xyzw"),
13017ec681f3Smrg   (0x08, 0x08, 0x10, 0x10, 0x08, "buffer_load_ubyte"),
13027ec681f3Smrg   (0x09, 0x09, 0x11, 0x11, 0x09, "buffer_load_sbyte"),
13037ec681f3Smrg   (0x0a, 0x0a, 0x12, 0x12, 0x0a, "buffer_load_ushort"),
13047ec681f3Smrg   (0x0b, 0x0b, 0x13, 0x13, 0x0b, "buffer_load_sshort"),
13057ec681f3Smrg   (0x0c, 0x0c, 0x14, 0x14, 0x0c, "buffer_load_dword"),
13067ec681f3Smrg   (0x0d, 0x0d, 0x15, 0x15, 0x0d, "buffer_load_dwordx2"),
13077ec681f3Smrg   (  -1, 0x0f, 0x16, 0x16, 0x0f, "buffer_load_dwordx3"),
13087ec681f3Smrg   (0x0f, 0x0e, 0x17, 0x17, 0x0e, "buffer_load_dwordx4"),
13097ec681f3Smrg   (0x18, 0x18, 0x18, 0x18, 0x18, "buffer_store_byte"),
13107ec681f3Smrg   (  -1,   -1,   -1, 0x19, 0x19, "buffer_store_byte_d16_hi"),
13117ec681f3Smrg   (0x1a, 0x1a, 0x1a, 0x1a, 0x1a, "buffer_store_short"),
13127ec681f3Smrg   (  -1,   -1,   -1, 0x1b, 0x1b, "buffer_store_short_d16_hi"),
13137ec681f3Smrg   (0x1c, 0x1c, 0x1c, 0x1c, 0x1c, "buffer_store_dword"),
13147ec681f3Smrg   (0x1d, 0x1d, 0x1d, 0x1d, 0x1d, "buffer_store_dwordx2"),
13157ec681f3Smrg   (  -1, 0x1f, 0x1e, 0x1e, 0x1f, "buffer_store_dwordx3"),
13167ec681f3Smrg   (0x1e, 0x1e, 0x1f, 0x1f, 0x1e, "buffer_store_dwordx4"),
13177ec681f3Smrg   (  -1,   -1,   -1, 0x20, 0x20, "buffer_load_ubyte_d16"),
13187ec681f3Smrg   (  -1,   -1,   -1, 0x21, 0x21, "buffer_load_ubyte_d16_hi"),
13197ec681f3Smrg   (  -1,   -1,   -1, 0x22, 0x22, "buffer_load_sbyte_d16"),
13207ec681f3Smrg   (  -1,   -1,   -1, 0x23, 0x23, "buffer_load_sbyte_d16_hi"),
13217ec681f3Smrg   (  -1,   -1,   -1, 0x24, 0x24, "buffer_load_short_d16"),
13227ec681f3Smrg   (  -1,   -1,   -1, 0x25, 0x25, "buffer_load_short_d16_hi"),
13237ec681f3Smrg   (  -1,   -1,   -1, 0x26, 0x26, "buffer_load_format_d16_hi_x"),
13247ec681f3Smrg   (  -1,   -1,   -1, 0x27, 0x27, "buffer_store_format_d16_hi_x"),
13257ec681f3Smrg   (  -1,   -1, 0x3d, 0x3d,   -1, "buffer_store_lds_dword"),
13267ec681f3Smrg   (0x71, 0x71, 0x3e, 0x3e,   -1, "buffer_wbinvl1"),
13277ec681f3Smrg   (0x70, 0x70, 0x3f, 0x3f,   -1, "buffer_wbinvl1_vol"),
13287ec681f3Smrg   (0x30, 0x30, 0x40, 0x40, 0x30, "buffer_atomic_swap"),
13297ec681f3Smrg   (0x31, 0x31, 0x41, 0x41, 0x31, "buffer_atomic_cmpswap"),
13307ec681f3Smrg   (0x32, 0x32, 0x42, 0x42, 0x32, "buffer_atomic_add"),
13317ec681f3Smrg   (0x33, 0x33, 0x43, 0x43, 0x33, "buffer_atomic_sub"),
13327ec681f3Smrg   (0x34,   -1,   -1,   -1,   -1, "buffer_atomic_rsub"),
13337ec681f3Smrg   (0x35, 0x35, 0x44, 0x44, 0x35, "buffer_atomic_smin"),
13347ec681f3Smrg   (0x36, 0x36, 0x45, 0x45, 0x36, "buffer_atomic_umin"),
13357ec681f3Smrg   (0x37, 0x37, 0x46, 0x46, 0x37, "buffer_atomic_smax"),
13367ec681f3Smrg   (0x38, 0x38, 0x47, 0x47, 0x38, "buffer_atomic_umax"),
13377ec681f3Smrg   (0x39, 0x39, 0x48, 0x48, 0x39, "buffer_atomic_and"),
13387ec681f3Smrg   (0x3a, 0x3a, 0x49, 0x49, 0x3a, "buffer_atomic_or"),
13397ec681f3Smrg   (0x3b, 0x3b, 0x4a, 0x4a, 0x3b, "buffer_atomic_xor"),
13407ec681f3Smrg   (0x3c, 0x3c, 0x4b, 0x4b, 0x3c, "buffer_atomic_inc"),
13417ec681f3Smrg   (0x3d, 0x3d, 0x4c, 0x4c, 0x3d, "buffer_atomic_dec"),
13427ec681f3Smrg   (0x3e, 0x3e,   -1,   -1, 0x3e, "buffer_atomic_fcmpswap"),
13437ec681f3Smrg   (0x3f, 0x3f,   -1,   -1, 0x3f, "buffer_atomic_fmin"),
13447ec681f3Smrg   (0x40, 0x40,   -1,   -1, 0x40, "buffer_atomic_fmax"),
13457ec681f3Smrg   (0x50, 0x50, 0x60, 0x60, 0x50, "buffer_atomic_swap_x2"),
13467ec681f3Smrg   (0x51, 0x51, 0x61, 0x61, 0x51, "buffer_atomic_cmpswap_x2"),
13477ec681f3Smrg   (0x52, 0x52, 0x62, 0x62, 0x52, "buffer_atomic_add_x2"),
13487ec681f3Smrg   (0x53, 0x53, 0x63, 0x63, 0x53, "buffer_atomic_sub_x2"),
13497ec681f3Smrg   (0x54,   -1,   -1,   -1,   -1, "buffer_atomic_rsub_x2"),
13507ec681f3Smrg   (0x55, 0x55, 0x64, 0x64, 0x55, "buffer_atomic_smin_x2"),
13517ec681f3Smrg   (0x56, 0x56, 0x65, 0x65, 0x56, "buffer_atomic_umin_x2"),
13527ec681f3Smrg   (0x57, 0x57, 0x66, 0x66, 0x57, "buffer_atomic_smax_x2"),
13537ec681f3Smrg   (0x58, 0x58, 0x67, 0x67, 0x58, "buffer_atomic_umax_x2"),
13547ec681f3Smrg   (0x59, 0x59, 0x68, 0x68, 0x59, "buffer_atomic_and_x2"),
13557ec681f3Smrg   (0x5a, 0x5a, 0x69, 0x69, 0x5a, "buffer_atomic_or_x2"),
13567ec681f3Smrg   (0x5b, 0x5b, 0x6a, 0x6a, 0x5b, "buffer_atomic_xor_x2"),
13577ec681f3Smrg   (0x5c, 0x5c, 0x6b, 0x6b, 0x5c, "buffer_atomic_inc_x2"),
13587ec681f3Smrg   (0x5d, 0x5d, 0x6c, 0x6c, 0x5d, "buffer_atomic_dec_x2"),
13597ec681f3Smrg   (0x5e, 0x5e,   -1,   -1, 0x5e, "buffer_atomic_fcmpswap_x2"),
13607ec681f3Smrg   (0x5f, 0x5f,   -1,   -1, 0x5f, "buffer_atomic_fmin_x2"),
13617ec681f3Smrg   (0x60, 0x60,   -1,   -1, 0x60, "buffer_atomic_fmax_x2"),
13627ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x71, "buffer_gl0_inv"),
13637ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x72, "buffer_gl1_inv"),
13647ec681f3Smrg   (  -1,   -1,   -1,   -1, 0x34, "buffer_atomic_csub"), #GFX10.3+. seems glc must be set
13657ec681f3Smrg}
13667ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF:
13677ec681f3Smrg    opcode(name, gfx7, gfx9, gfx10, Format.MUBUF, InstrClass.VMem, is_atomic = "atomic" in name)
13687ec681f3Smrg
13697ec681f3SmrgMTBUF = {
13707ec681f3Smrg   (0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"),
13717ec681f3Smrg   (0x01, 0x01, 0x01, 0x01, 0x01, "tbuffer_load_format_xy"),
13727ec681f3Smrg   (0x02, 0x02, 0x02, 0x02, 0x02, "tbuffer_load_format_xyz"),
13737ec681f3Smrg   (0x03, 0x03, 0x03, 0x03, 0x03, "tbuffer_load_format_xyzw"),
13747ec681f3Smrg   (0x04, 0x04, 0x04, 0x04, 0x04, "tbuffer_store_format_x"),
13757ec681f3Smrg   (0x05, 0x05, 0x05, 0x05, 0x05, "tbuffer_store_format_xy"),
13767ec681f3Smrg   (0x06, 0x06, 0x06, 0x06, 0x06, "tbuffer_store_format_xyz"),
13777ec681f3Smrg   (0x07, 0x07, 0x07, 0x07, 0x07, "tbuffer_store_format_xyzw"),
13787ec681f3Smrg   (  -1,   -1, 0x08, 0x08, 0x08, "tbuffer_load_format_d16_x"),
13797ec681f3Smrg   (  -1,   -1, 0x09, 0x09, 0x09, "tbuffer_load_format_d16_xy"),
13807ec681f3Smrg   (  -1,   -1, 0x0a, 0x0a, 0x0a, "tbuffer_load_format_d16_xyz"),
13817ec681f3Smrg   (  -1,   -1, 0x0b, 0x0b, 0x0b, "tbuffer_load_format_d16_xyzw"),
13827ec681f3Smrg   (  -1,   -1, 0x0c, 0x0c, 0x0c, "tbuffer_store_format_d16_x"),
13837ec681f3Smrg   (  -1,   -1, 0x0d, 0x0d, 0x0d, "tbuffer_store_format_d16_xy"),
13847ec681f3Smrg   (  -1,   -1, 0x0e, 0x0e, 0x0e, "tbuffer_store_format_d16_xyz"),
13857ec681f3Smrg   (  -1,   -1, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"),
13867ec681f3Smrg}
13877ec681f3Smrgfor (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MTBUF:
13887ec681f3Smrg    opcode(name, gfx7, gfx9, gfx10, Format.MTBUF, InstrClass.VMem)
13897ec681f3Smrg
13907ec681f3Smrg
13917ec681f3SmrgIMAGE = {
13927ec681f3Smrg   (0x00, "image_load"),
13937ec681f3Smrg   (0x01, "image_load_mip"),
13947ec681f3Smrg   (0x02, "image_load_pck"),
13957ec681f3Smrg   (0x03, "image_load_pck_sgn"),
13967ec681f3Smrg   (0x04, "image_load_mip_pck"),
13977ec681f3Smrg   (0x05, "image_load_mip_pck_sgn"),
13987ec681f3Smrg   (0x08, "image_store"),
13997ec681f3Smrg   (0x09, "image_store_mip"),
14007ec681f3Smrg   (0x0a, "image_store_pck"),
14017ec681f3Smrg   (0x0b, "image_store_mip_pck"),
14027ec681f3Smrg   (0x0e, "image_get_resinfo"),
14037ec681f3Smrg   (0x60, "image_get_lod"),
14047ec681f3Smrg}
14057ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
14067ec681f3Smrgfor (code, name) in IMAGE:
14077ec681f3Smrg   opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)
14087ec681f3Smrg
14097ec681f3Smrgopcode("image_msaa_load", -1, -1, 0x80, Format.MIMG, InstrClass.VMem) #GFX10.3+
14107ec681f3Smrg
14117ec681f3SmrgIMAGE_ATOMIC = {
14127ec681f3Smrg   (0x0f, 0x0f, 0x10, "image_atomic_swap"),
14137ec681f3Smrg   (0x10, 0x10, 0x11, "image_atomic_cmpswap"),
14147ec681f3Smrg   (0x11, 0x11, 0x12, "image_atomic_add"),
14157ec681f3Smrg   (0x12, 0x12, 0x13, "image_atomic_sub"),
14167ec681f3Smrg   (0x13,   -1,   -1, "image_atomic_rsub"),
14177ec681f3Smrg   (0x14, 0x14, 0x14, "image_atomic_smin"),
14187ec681f3Smrg   (0x15, 0x15, 0x15, "image_atomic_umin"),
14197ec681f3Smrg   (0x16, 0x16, 0x16, "image_atomic_smax"),
14207ec681f3Smrg   (0x17, 0x17, 0x17, "image_atomic_umax"),
14217ec681f3Smrg   (0x18, 0x18, 0x18, "image_atomic_and"),
14227ec681f3Smrg   (0x19, 0x19, 0x19, "image_atomic_or"),
14237ec681f3Smrg   (0x1a, 0x1a, 0x1a, "image_atomic_xor"),
14247ec681f3Smrg   (0x1b, 0x1b, 0x1b, "image_atomic_inc"),
14257ec681f3Smrg   (0x1c, 0x1c, 0x1c, "image_atomic_dec"),
14267ec681f3Smrg   (0x1d, 0x1d,   -1, "image_atomic_fcmpswap"),
14277ec681f3Smrg   (0x1e, 0x1e,   -1, "image_atomic_fmin"),
14287ec681f3Smrg   (0x1f, 0x1f,   -1, "image_atomic_fmax"),
14297ec681f3Smrg}
14307ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name)
14317ec681f3Smrg# gfx7 and gfx10 opcodes are the same here
14327ec681f3Smrgfor (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC:
14337ec681f3Smrg   opcode(name, gfx7, gfx89, gfx7, Format.MIMG, InstrClass.VMem, is_atomic = True)
14347ec681f3Smrg
14357ec681f3SmrgIMAGE_SAMPLE = {
14367ec681f3Smrg   (0x20, "image_sample"),
14377ec681f3Smrg   (0x21, "image_sample_cl"),
14387ec681f3Smrg   (0x22, "image_sample_d"),
14397ec681f3Smrg   (0x23, "image_sample_d_cl"),
14407ec681f3Smrg   (0x24, "image_sample_l"),
14417ec681f3Smrg   (0x25, "image_sample_b"),
14427ec681f3Smrg   (0x26, "image_sample_b_cl"),
14437ec681f3Smrg   (0x27, "image_sample_lz"),
14447ec681f3Smrg   (0x28, "image_sample_c"),
14457ec681f3Smrg   (0x29, "image_sample_c_cl"),
14467ec681f3Smrg   (0x2a, "image_sample_c_d"),
14477ec681f3Smrg   (0x2b, "image_sample_c_d_cl"),
14487ec681f3Smrg   (0x2c, "image_sample_c_l"),
14497ec681f3Smrg   (0x2d, "image_sample_c_b"),
14507ec681f3Smrg   (0x2e, "image_sample_c_b_cl"),
14517ec681f3Smrg   (0x2f, "image_sample_c_lz"),
14527ec681f3Smrg   (0x30, "image_sample_o"),
14537ec681f3Smrg   (0x31, "image_sample_cl_o"),
14547ec681f3Smrg   (0x32, "image_sample_d_o"),
14557ec681f3Smrg   (0x33, "image_sample_d_cl_o"),
14567ec681f3Smrg   (0x34, "image_sample_l_o"),
14577ec681f3Smrg   (0x35, "image_sample_b_o"),
14587ec681f3Smrg   (0x36, "image_sample_b_cl_o"),
14597ec681f3Smrg   (0x37, "image_sample_lz_o"),
14607ec681f3Smrg   (0x38, "image_sample_c_o"),
14617ec681f3Smrg   (0x39, "image_sample_c_cl_o"),
14627ec681f3Smrg   (0x3a, "image_sample_c_d_o"),
14637ec681f3Smrg   (0x3b, "image_sample_c_d_cl_o"),
14647ec681f3Smrg   (0x3c, "image_sample_c_l_o"),
14657ec681f3Smrg   (0x3d, "image_sample_c_b_o"),
14667ec681f3Smrg   (0x3e, "image_sample_c_b_cl_o"),
14677ec681f3Smrg   (0x3f, "image_sample_c_lz_o"),
14687ec681f3Smrg   (0x68, "image_sample_cd"),
14697ec681f3Smrg   (0x69, "image_sample_cd_cl"),
14707ec681f3Smrg   (0x6a, "image_sample_c_cd"),
14717ec681f3Smrg   (0x6b, "image_sample_c_cd_cl"),
14727ec681f3Smrg   (0x6c, "image_sample_cd_o"),
14737ec681f3Smrg   (0x6d, "image_sample_cd_cl_o"),
14747ec681f3Smrg   (0x6e, "image_sample_c_cd_o"),
14757ec681f3Smrg   (0x6f, "image_sample_c_cd_cl_o"),
14767ec681f3Smrg}
14777ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
14787ec681f3Smrgfor (code, name) in IMAGE_SAMPLE:
14797ec681f3Smrg   opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)
14807ec681f3Smrg
14817ec681f3SmrgIMAGE_GATHER4 = {
14827ec681f3Smrg   (0x40, "image_gather4"),
14837ec681f3Smrg   (0x41, "image_gather4_cl"),
14847ec681f3Smrg   #(0x42, "image_gather4h"), VEGA only?
14857ec681f3Smrg   (0x44, "image_gather4_l"), # following instructions have different opcodes according to ISA sheet.
14867ec681f3Smrg   (0x45, "image_gather4_b"),
14877ec681f3Smrg   (0x46, "image_gather4_b_cl"),
14887ec681f3Smrg   (0x47, "image_gather4_lz"),
14897ec681f3Smrg   (0x48, "image_gather4_c"),
14907ec681f3Smrg   (0x49, "image_gather4_c_cl"), # previous instructions have different opcodes according to ISA sheet.
14917ec681f3Smrg   #(0x4a, "image_gather4h_pck"), VEGA only?
14927ec681f3Smrg   #(0x4b, "image_gather8h_pck"), VGEA only?
14937ec681f3Smrg   (0x4c, "image_gather4_c_l"),
14947ec681f3Smrg   (0x4d, "image_gather4_c_b"),
14957ec681f3Smrg   (0x4e, "image_gather4_c_b_cl"),
14967ec681f3Smrg   (0x4f, "image_gather4_c_lz"),
14977ec681f3Smrg   (0x50, "image_gather4_o"),
14987ec681f3Smrg   (0x51, "image_gather4_cl_o"),
14997ec681f3Smrg   (0x54, "image_gather4_l_o"),
15007ec681f3Smrg   (0x55, "image_gather4_b_o"),
15017ec681f3Smrg   (0x56, "image_gather4_b_cl_o"),
15027ec681f3Smrg   (0x57, "image_gather4_lz_o"),
15037ec681f3Smrg   (0x58, "image_gather4_c_o"),
15047ec681f3Smrg   (0x59, "image_gather4_c_cl_o"),
15057ec681f3Smrg   (0x5c, "image_gather4_c_l_o"),
15067ec681f3Smrg   (0x5d, "image_gather4_c_b_o"),
15077ec681f3Smrg   (0x5e, "image_gather4_c_b_cl_o"),
15087ec681f3Smrg   (0x5f, "image_gather4_c_lz_o"),
15097ec681f3Smrg}
15107ec681f3Smrg# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
15117ec681f3Smrgfor (code, name) in IMAGE_GATHER4:
15127ec681f3Smrg   opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)
15137ec681f3Smrg
15147ec681f3Smrgopcode("image_bvh64_intersect_ray", -1, -1, 231, Format.MIMG, InstrClass.VMem)
15157ec681f3Smrg
15167ec681f3SmrgFLAT = {
15177ec681f3Smrg   #GFX7, GFX8_9, GFX10
15187ec681f3Smrg   (0x08, 0x10, 0x08, "flat_load_ubyte"),
15197ec681f3Smrg   (0x09, 0x11, 0x09, "flat_load_sbyte"),
15207ec681f3Smrg   (0x0a, 0x12, 0x0a, "flat_load_ushort"),
15217ec681f3Smrg   (0x0b, 0x13, 0x0b, "flat_load_sshort"),
15227ec681f3Smrg   (0x0c, 0x14, 0x0c, "flat_load_dword"),
15237ec681f3Smrg   (0x0d, 0x15, 0x0d, "flat_load_dwordx2"),
15247ec681f3Smrg   (0x0f, 0x16, 0x0f, "flat_load_dwordx3"),
15257ec681f3Smrg   (0x0e, 0x17, 0x0e, "flat_load_dwordx4"),
15267ec681f3Smrg   (0x18, 0x18, 0x18, "flat_store_byte"),
15277ec681f3Smrg   (  -1, 0x19, 0x19, "flat_store_byte_d16_hi"),
15287ec681f3Smrg   (0x1a, 0x1a, 0x1a, "flat_store_short"),
15297ec681f3Smrg   (  -1, 0x1b, 0x1b, "flat_store_short_d16_hi"),
15307ec681f3Smrg   (0x1c, 0x1c, 0x1c, "flat_store_dword"),
15317ec681f3Smrg   (0x1d, 0x1d, 0x1d, "flat_store_dwordx2"),
15327ec681f3Smrg   (0x1f, 0x1e, 0x1f, "flat_store_dwordx3"),
15337ec681f3Smrg   (0x1e, 0x1f, 0x1e, "flat_store_dwordx4"),
15347ec681f3Smrg   (  -1, 0x20, 0x20, "flat_load_ubyte_d16"),
15357ec681f3Smrg   (  -1, 0x21, 0x21, "flat_load_ubyte_d16_hi"),
15367ec681f3Smrg   (  -1, 0x22, 0x22, "flat_load_sbyte_d16"),
15377ec681f3Smrg   (  -1, 0x23, 0x23, "flat_load_sbyte_d16_hi"),
15387ec681f3Smrg   (  -1, 0x24, 0x24, "flat_load_short_d16"),
15397ec681f3Smrg   (  -1, 0x25, 0x25, "flat_load_short_d16_hi"),
15407ec681f3Smrg   (0x30, 0x40, 0x30, "flat_atomic_swap"),
15417ec681f3Smrg   (0x31, 0x41, 0x31, "flat_atomic_cmpswap"),
15427ec681f3Smrg   (0x32, 0x42, 0x32, "flat_atomic_add"),
15437ec681f3Smrg   (0x33, 0x43, 0x33, "flat_atomic_sub"),
15447ec681f3Smrg   (0x35, 0x44, 0x35, "flat_atomic_smin"),
15457ec681f3Smrg   (0x36, 0x45, 0x36, "flat_atomic_umin"),
15467ec681f3Smrg   (0x37, 0x46, 0x37, "flat_atomic_smax"),
15477ec681f3Smrg   (0x38, 0x47, 0x38, "flat_atomic_umax"),
15487ec681f3Smrg   (0x39, 0x48, 0x39, "flat_atomic_and"),
15497ec681f3Smrg   (0x3a, 0x49, 0x3a, "flat_atomic_or"),
15507ec681f3Smrg   (0x3b, 0x4a, 0x3b, "flat_atomic_xor"),
15517ec681f3Smrg   (0x3c, 0x4b, 0x3c, "flat_atomic_inc"),
15527ec681f3Smrg   (0x3d, 0x4c, 0x3d, "flat_atomic_dec"),
15537ec681f3Smrg   (0x3e,   -1, 0x3e, "flat_atomic_fcmpswap"),
15547ec681f3Smrg   (0x3f,   -1, 0x3f, "flat_atomic_fmin"),
15557ec681f3Smrg   (0x40,   -1, 0x40, "flat_atomic_fmax"),
15567ec681f3Smrg   (0x50, 0x60, 0x50, "flat_atomic_swap_x2"),
15577ec681f3Smrg   (0x51, 0x61, 0x51, "flat_atomic_cmpswap_x2"),
15587ec681f3Smrg   (0x52, 0x62, 0x52, "flat_atomic_add_x2"),
15597ec681f3Smrg   (0x53, 0x63, 0x53, "flat_atomic_sub_x2"),
15607ec681f3Smrg   (0x55, 0x64, 0x55, "flat_atomic_smin_x2"),
15617ec681f3Smrg   (0x56, 0x65, 0x56, "flat_atomic_umin_x2"),
15627ec681f3Smrg   (0x57, 0x66, 0x57, "flat_atomic_smax_x2"),
15637ec681f3Smrg   (0x58, 0x67, 0x58, "flat_atomic_umax_x2"),
15647ec681f3Smrg   (0x59, 0x68, 0x59, "flat_atomic_and_x2"),
15657ec681f3Smrg   (0x5a, 0x69, 0x5a, "flat_atomic_or_x2"),
15667ec681f3Smrg   (0x5b, 0x6a, 0x5b, "flat_atomic_xor_x2"),
15677ec681f3Smrg   (0x5c, 0x6b, 0x5c, "flat_atomic_inc_x2"),
15687ec681f3Smrg   (0x5d, 0x6c, 0x5d, "flat_atomic_dec_x2"),
15697ec681f3Smrg   (0x5e,   -1, 0x5e, "flat_atomic_fcmpswap_x2"),
15707ec681f3Smrg   (0x5f,   -1, 0x5f, "flat_atomic_fmin_x2"),
15717ec681f3Smrg   (0x60,   -1, 0x60, "flat_atomic_fmax_x2"),
15727ec681f3Smrg}
15737ec681f3Smrgfor (gfx7, gfx8, gfx10, name) in FLAT:
15747ec681f3Smrg    opcode(name, gfx7, gfx8, gfx10, Format.FLAT, InstrClass.VMem, is_atomic = "atomic" in name) #TODO: also LDS?
15757ec681f3Smrg
15767ec681f3SmrgGLOBAL = {
15777ec681f3Smrg   #GFX8_9, GFX10
15787ec681f3Smrg   (0x10, 0x08, "global_load_ubyte"),
15797ec681f3Smrg   (0x11, 0x09, "global_load_sbyte"),
15807ec681f3Smrg   (0x12, 0x0a, "global_load_ushort"),
15817ec681f3Smrg   (0x13, 0x0b, "global_load_sshort"),
15827ec681f3Smrg   (0x14, 0x0c, "global_load_dword"),
15837ec681f3Smrg   (0x15, 0x0d, "global_load_dwordx2"),
15847ec681f3Smrg   (0x16, 0x0f, "global_load_dwordx3"),
15857ec681f3Smrg   (0x17, 0x0e, "global_load_dwordx4"),
15867ec681f3Smrg   (0x18, 0x18, "global_store_byte"),
15877ec681f3Smrg   (0x19, 0x19, "global_store_byte_d16_hi"),
15887ec681f3Smrg   (0x1a, 0x1a, "global_store_short"),
15897ec681f3Smrg   (0x1b, 0x1b, "global_store_short_d16_hi"),
15907ec681f3Smrg   (0x1c, 0x1c, "global_store_dword"),
15917ec681f3Smrg   (0x1d, 0x1d, "global_store_dwordx2"),
15927ec681f3Smrg   (0x1e, 0x1f, "global_store_dwordx3"),
15937ec681f3Smrg   (0x1f, 0x1e, "global_store_dwordx4"),
15947ec681f3Smrg   (0x20, 0x20, "global_load_ubyte_d16"),
15957ec681f3Smrg   (0x21, 0x21, "global_load_ubyte_d16_hi"),
15967ec681f3Smrg   (0x22, 0x22, "global_load_sbyte_d16"),
15977ec681f3Smrg   (0x23, 0x23, "global_load_sbyte_d16_hi"),
15987ec681f3Smrg   (0x24, 0x24, "global_load_short_d16"),
15997ec681f3Smrg   (0x25, 0x25, "global_load_short_d16_hi"),
16007ec681f3Smrg   (0x40, 0x30, "global_atomic_swap"),
16017ec681f3Smrg   (0x41, 0x31, "global_atomic_cmpswap"),
16027ec681f3Smrg   (0x42, 0x32, "global_atomic_add"),
16037ec681f3Smrg   (0x43, 0x33, "global_atomic_sub"),
16047ec681f3Smrg   (0x44, 0x35, "global_atomic_smin"),
16057ec681f3Smrg   (0x45, 0x36, "global_atomic_umin"),
16067ec681f3Smrg   (0x46, 0x37, "global_atomic_smax"),
16077ec681f3Smrg   (0x47, 0x38, "global_atomic_umax"),
16087ec681f3Smrg   (0x48, 0x39, "global_atomic_and"),
16097ec681f3Smrg   (0x49, 0x3a, "global_atomic_or"),
16107ec681f3Smrg   (0x4a, 0x3b, "global_atomic_xor"),
16117ec681f3Smrg   (0x4b, 0x3c, "global_atomic_inc"),
16127ec681f3Smrg   (0x4c, 0x3d, "global_atomic_dec"),
16137ec681f3Smrg   (  -1, 0x3e, "global_atomic_fcmpswap"),
16147ec681f3Smrg   (  -1, 0x3f, "global_atomic_fmin"),
16157ec681f3Smrg   (  -1, 0x40, "global_atomic_fmax"),
16167ec681f3Smrg   (0x60, 0x50, "global_atomic_swap_x2"),
16177ec681f3Smrg   (0x61, 0x51, "global_atomic_cmpswap_x2"),
16187ec681f3Smrg   (0x62, 0x52, "global_atomic_add_x2"),
16197ec681f3Smrg   (0x63, 0x53, "global_atomic_sub_x2"),
16207ec681f3Smrg   (0x64, 0x55, "global_atomic_smin_x2"),
16217ec681f3Smrg   (0x65, 0x56, "global_atomic_umin_x2"),
16227ec681f3Smrg   (0x66, 0x57, "global_atomic_smax_x2"),
16237ec681f3Smrg   (0x67, 0x58, "global_atomic_umax_x2"),
16247ec681f3Smrg   (0x68, 0x59, "global_atomic_and_x2"),
16257ec681f3Smrg   (0x69, 0x5a, "global_atomic_or_x2"),
16267ec681f3Smrg   (0x6a, 0x5b, "global_atomic_xor_x2"),
16277ec681f3Smrg   (0x6b, 0x5c, "global_atomic_inc_x2"),
16287ec681f3Smrg   (0x6c, 0x5d, "global_atomic_dec_x2"),
16297ec681f3Smrg   (  -1, 0x5e, "global_atomic_fcmpswap_x2"),
16307ec681f3Smrg   (  -1, 0x5f, "global_atomic_fmin_x2"),
16317ec681f3Smrg   (  -1, 0x60, "global_atomic_fmax_x2"),
16327ec681f3Smrg   (  -1, 0x16, "global_load_dword_addtid"), #GFX10.3+
16337ec681f3Smrg   (  -1, 0x17, "global_store_dword_addtid"), #GFX10.3+
16347ec681f3Smrg   (  -1, 0x34, "global_atomic_csub"), #GFX10.3+. seems glc must be set
16357ec681f3Smrg}
16367ec681f3Smrgfor (gfx8, gfx10, name) in GLOBAL:
16377ec681f3Smrg    opcode(name, -1, gfx8, gfx10, Format.GLOBAL, InstrClass.VMem, is_atomic = "atomic" in name)
16387ec681f3Smrg
16397ec681f3SmrgSCRATCH = {
16407ec681f3Smrg   #GFX8_9, GFX10
16417ec681f3Smrg   (0x10, 0x08, "scratch_load_ubyte"),
16427ec681f3Smrg   (0x11, 0x09, "scratch_load_sbyte"),
16437ec681f3Smrg   (0x12, 0x0a, "scratch_load_ushort"),
16447ec681f3Smrg   (0x13, 0x0b, "scratch_load_sshort"),
16457ec681f3Smrg   (0x14, 0x0c, "scratch_load_dword"),
16467ec681f3Smrg   (0x15, 0x0d, "scratch_load_dwordx2"),
16477ec681f3Smrg   (0x16, 0x0f, "scratch_load_dwordx3"),
16487ec681f3Smrg   (0x17, 0x0e, "scratch_load_dwordx4"),
16497ec681f3Smrg   (0x18, 0x18, "scratch_store_byte"),
16507ec681f3Smrg   (0x19, 0x19, "scratch_store_byte_d16_hi"),
16517ec681f3Smrg   (0x1a, 0x1a, "scratch_store_short"),
16527ec681f3Smrg   (0x1b, 0x1b, "scratch_store_short_d16_hi"),
16537ec681f3Smrg   (0x1c, 0x1c, "scratch_store_dword"),
16547ec681f3Smrg   (0x1d, 0x1d, "scratch_store_dwordx2"),
16557ec681f3Smrg   (0x1e, 0x1f, "scratch_store_dwordx3"),
16567ec681f3Smrg   (0x1f, 0x1e, "scratch_store_dwordx4"),
16577ec681f3Smrg   (0x20, 0x20, "scratch_load_ubyte_d16"),
16587ec681f3Smrg   (0x21, 0x21, "scratch_load_ubyte_d16_hi"),
16597ec681f3Smrg   (0x22, 0x22, "scratch_load_sbyte_d16"),
16607ec681f3Smrg   (0x23, 0x23, "scratch_load_sbyte_d16_hi"),
16617ec681f3Smrg   (0x24, 0x24, "scratch_load_short_d16"),
16627ec681f3Smrg   (0x25, 0x25, "scratch_load_short_d16_hi"),
16637ec681f3Smrg}
16647ec681f3Smrgfor (gfx8, gfx10, name) in SCRATCH:
16657ec681f3Smrg    opcode(name, -1, gfx8, gfx10, Format.SCRATCH, InstrClass.VMem)
16667ec681f3Smrg
16677ec681f3Smrg# check for duplicate opcode numbers
16687ec681f3Smrgfor ver in ['gfx9', 'gfx10']:
16697ec681f3Smrg    op_to_name = {}
16707ec681f3Smrg    for op in opcodes.values():
16717ec681f3Smrg        if op.format in [Format.PSEUDO, Format.PSEUDO_BRANCH, Format.PSEUDO_BARRIER, Format.PSEUDO_REDUCTION]:
16727ec681f3Smrg            continue
16737ec681f3Smrg
16747ec681f3Smrg        num = getattr(op, 'opcode_' + ver)
16757ec681f3Smrg        if num == -1:
16767ec681f3Smrg            continue
16777ec681f3Smrg
16787ec681f3Smrg        key = (op.format, num)
16797ec681f3Smrg
16807ec681f3Smrg        if key in op_to_name:
16817ec681f3Smrg            # exceptions
16827ec681f3Smrg            names = set([op_to_name[key], op.name])
16837ec681f3Smrg            if ver in ['gfx8', 'gfx9'] and names == set(['v_mul_lo_i32', 'v_mul_lo_u32']):
16847ec681f3Smrg                continue
16857ec681f3Smrg            # v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3
16867ec681f3Smrg            if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']):
16877ec681f3Smrg                continue
16887ec681f3Smrg
16897ec681f3Smrg            print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver))
16907ec681f3Smrg            sys.exit(1)
16917ec681f3Smrg        else:
16927ec681f3Smrg            op_to_name[key] = op.name
16937ec681f3Smrg
1694