17ec681f3Smrg"""
27ec681f3SmrgCopyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
37ec681f3Smrg
47ec681f3SmrgPermission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrgcopy of this software and associated documentation files (the "Software"),
67ec681f3Smrgto deal in the Software without restriction, including without limitation
77ec681f3Smrgthe rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrgand/or sell copies of the Software, and to permit persons to whom the
97ec681f3SmrgSoftware is furnished to do so, subject to the following conditions:
107ec681f3Smrg
117ec681f3SmrgThe above copyright notice and this permission notice (including the next
127ec681f3Smrgparagraph) shall be included in all copies or substantial portions of the
137ec681f3SmrgSoftware.
147ec681f3Smrg
157ec681f3SmrgTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3SmrgIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3SmrgFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3SmrgTHE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3SmrgLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3SmrgOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3SmrgSOFTWARE.
227ec681f3Smrg"""
237ec681f3Smrg
247ec681f3Smrgopcodes = {}
257ec681f3Smrgimmediates = {}
267ec681f3Smrgenums = {}
277ec681f3Smrg
287ec681f3Smrgclass Opcode(object):
297ec681f3Smrg   def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32):
307ec681f3Smrg      self.name = name
317ec681f3Smrg      self.dests = dests
327ec681f3Smrg      self.srcs = srcs
337ec681f3Smrg      self.imms = imms
347ec681f3Smrg      self.is_float = is_float
357ec681f3Smrg      self.can_eliminate = can_eliminate
367ec681f3Smrg      self.encoding_16 = encoding_16
377ec681f3Smrg      self.encoding_32 = encoding_32
387ec681f3Smrg
397ec681f3Smrgclass Immediate(object):
407ec681f3Smrg   def __init__(self, name, ctype):
417ec681f3Smrg      self.name = name
427ec681f3Smrg      self.ctype = ctype
437ec681f3Smrg
447ec681f3Smrgclass Encoding(object):
457ec681f3Smrg   def __init__(self, description):
467ec681f3Smrg      (exact, mask, length_short, length_long) = description
477ec681f3Smrg
487ec681f3Smrg      # Convenience
497ec681f3Smrg      if length_long is None:
507ec681f3Smrg         length_long = length_short
517ec681f3Smrg
527ec681f3Smrg      self.exact = exact
537ec681f3Smrg      self.mask = mask
547ec681f3Smrg      self.length_short = length_short
557ec681f3Smrg      self.extensible = length_short != length_long
567ec681f3Smrg
577ec681f3Smrg      if self.extensible:
587ec681f3Smrg         assert(length_long == length_short + (4 if length_short > 8 else 2))
597ec681f3Smrg
607ec681f3Smrgdef op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, can_eliminate = True, encoding_16 = None):
617ec681f3Smrg   encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None
627ec681f3Smrg   encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None
637ec681f3Smrg
647ec681f3Smrg   opcodes[name] = Opcode(name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32)
657ec681f3Smrg
667ec681f3Smrgdef immediate(name, ctype = "uint32_t"):
677ec681f3Smrg   imm = Immediate(name, ctype)
687ec681f3Smrg   immediates[name] = imm
697ec681f3Smrg   return imm
707ec681f3Smrg
717ec681f3Smrgdef enum(name, value_dict):
727ec681f3Smrg   enums[name] = value_dict
737ec681f3Smrg   return immediate(name, "enum agx_" + name)
747ec681f3Smrg
757ec681f3SmrgL = (1 << 15)
767ec681f3Smrg_ = None
777ec681f3Smrg
787ec681f3SmrgFORMAT = immediate("format", "enum agx_format")
797ec681f3SmrgIMM = immediate("imm")
807ec681f3SmrgWRITEOUT = immediate("writeout")
817ec681f3SmrgINDEX = immediate("index")
827ec681f3SmrgCOMPONENT = immediate("component")
837ec681f3SmrgCHANNELS = immediate("channels")
847ec681f3SmrgTRUTH_TABLE = immediate("truth_table")
857ec681f3SmrgROUND = immediate("round")
867ec681f3SmrgSHIFT = immediate("shift")
877ec681f3SmrgMASK = immediate("mask")
887ec681f3SmrgBFI_MASK = immediate("bfi_mask")
897ec681f3SmrgLOD_MODE = immediate("lod_mode", "enum agx_lod_mode")
907ec681f3SmrgDIM = immediate("dim", "enum agx_dim")
917ec681f3SmrgSCOREBOARD = immediate("scoreboard")
927ec681f3SmrgICOND = immediate("icond")
937ec681f3SmrgFCOND = immediate("fcond")
947ec681f3SmrgNEST = immediate("nest")
957ec681f3SmrgINVERT_COND = immediate("invert_cond")
967ec681f3SmrgNEST = immediate("nest")
977ec681f3SmrgTARGET = immediate("target", "agx_block *")
987ec681f3SmrgPERSPECTIVE = immediate("perspective", "bool")
997ec681f3SmrgSR = enum("sr", {
1007ec681f3Smrg   0:  'threadgroup_position_in_grid.x',
1017ec681f3Smrg   1:  'threadgroup_position_in_grid.y',
1027ec681f3Smrg   2:  'threadgroup_position_in_grid.z',
1037ec681f3Smrg   4:  'threads_per_threadgroup.x',
1047ec681f3Smrg   5:  'threads_per_threadgroup.y',
1057ec681f3Smrg   6:  'threads_per_threadgroup.z',
1067ec681f3Smrg   8:  'dispatch_threads_per_threadgroup.x',
1077ec681f3Smrg   9:  'dispatch_threads_per_threadgroup.y',
1087ec681f3Smrg   10: 'dispatch_threads_per_threadgroup.z',
1097ec681f3Smrg   48: 'thread_position_in_threadgroup.x',
1107ec681f3Smrg   49: 'thread_position_in_threadgroup.y',
1117ec681f3Smrg   50: 'thread_position_in_threadgroup.z',
1127ec681f3Smrg   51: 'thread_index_in_threadgroup',
1137ec681f3Smrg   52: 'thread_index_in_subgroup',
1147ec681f3Smrg   53: 'subgroup_index_in_threadgroup',
1157ec681f3Smrg   56: 'active_thread_index_in_quad',
1167ec681f3Smrg   58: 'active_thread_index_in_subgroup',
1177ec681f3Smrg   62: 'backfacing',
1187ec681f3Smrg   80: 'thread_position_in_grid.x',
1197ec681f3Smrg   81: 'thread_position_in_grid.y',
1207ec681f3Smrg   82: 'thread_position_in_grid.z',
1217ec681f3Smrg})
1227ec681f3Smrg
1237ec681f3SmrgFUNOP = lambda x: (x << 28)
1247ec681f3SmrgFUNOP_MASK = FUNOP((1 << 14) - 1)
1257ec681f3Smrg
1267ec681f3Smrgdef funop(name, opcode):
1277ec681f3Smrg   op(name, (0x0A | L | (opcode << 28),
1287ec681f3Smrg      0x3F | L | (((1 << 14) - 1) << 28), 6, _),
1297ec681f3Smrg      srcs = 1, is_float = True)
1307ec681f3Smrg
1317ec681f3Smrg# Listing of opcodes
1327ec681f3Smrgfunop("floor",     0b000000)
1337ec681f3Smrgfunop("srsqrt",    0b000001)
1347ec681f3Smrgfunop("dfdx",      0b000100)
1357ec681f3Smrgfunop("dfdy",      0b000110)
1367ec681f3Smrgfunop("rcp",       0b001000)
1377ec681f3Smrgfunop("rsqrt",     0b001001)
1387ec681f3Smrgfunop("sin_pt_1",  0b001010)
1397ec681f3Smrgfunop("log2",      0b001100)
1407ec681f3Smrgfunop("exp2",      0b001101)
1417ec681f3Smrgfunop("sin_pt_2",  0b001110)
1427ec681f3Smrgfunop("ceil",      0b010000)
1437ec681f3Smrgfunop("trunc",     0b100000)
1447ec681f3Smrgfunop("roundeven", 0b110000)
1457ec681f3Smrg
1467ec681f3Smrgop("fadd",
1477ec681f3Smrg      encoding_16 = (0x26 | L, 0x3F | L, 6, _),
1487ec681f3Smrg      encoding_32 = (0x2A | L, 0x3F | L, 6, _),
1497ec681f3Smrg      srcs = 2, is_float = True)
1507ec681f3Smrg
1517ec681f3Smrgop("fma",
1527ec681f3Smrg      encoding_16 = (0x36, 0x3F, 6, 8),
1537ec681f3Smrg      encoding_32 = (0x3A, 0x3F, 6, 8),
1547ec681f3Smrg      srcs = 3, is_float = True)
1557ec681f3Smrg
1567ec681f3Smrgop("fmul",
1577ec681f3Smrg      encoding_16 = ((0x16 | L), (0x3F | L), 6, _),
1587ec681f3Smrg      encoding_32 = ((0x1A | L), (0x3F | L), 6, _),
1597ec681f3Smrg      srcs = 2, is_float = True)
1607ec681f3Smrg
1617ec681f3Smrgop("mov_imm",
1627ec681f3Smrg      encoding_32 = (0x62, 0xFF, 6, 8),
1637ec681f3Smrg      encoding_16 = (0x62, 0xFF, 4, 6),
1647ec681f3Smrg      imms = [IMM])
1657ec681f3Smrg
1667ec681f3Smrgop("iadd",
1677ec681f3Smrg      encoding_32 = (0x0E, 0x3F | L, 8, _),
1687ec681f3Smrg      srcs = 2, imms = [SHIFT])
1697ec681f3Smrg
1707ec681f3Smrgop("imad",
1717ec681f3Smrg      encoding_32 = (0x1E, 0x3F | L, 8, _),
1727ec681f3Smrg      srcs = 3, imms = [SHIFT])
1737ec681f3Smrg
1747ec681f3Smrgop("bfi",
1757ec681f3Smrg      encoding_32 = (0x2E, 0x7F | (0x3 << 26), 8, _),
1767ec681f3Smrg      srcs = 3, imms = [BFI_MASK])
1777ec681f3Smrg
1787ec681f3Smrgop("bfeil",
1797ec681f3Smrg      encoding_32 = (0x2E | L, 0x7F | L | (0x3 << 26), 8, _),
1807ec681f3Smrg      srcs = 3, imms = [BFI_MASK])
1817ec681f3Smrg
1827ec681f3Smrgop("asr",
1837ec681f3Smrg      encoding_32 = (0x2E | L | (0x1 << 26), 0x7F | L | (0x3 << 26), 8, _),
1847ec681f3Smrg      srcs = 2)
1857ec681f3Smrg
1867ec681f3Smrgop("icmpsel",
1877ec681f3Smrg      encoding_32 = (0x12, 0x7F, 8, 10),
1887ec681f3Smrg      srcs = 4, imms = [ICOND])
1897ec681f3Smrg
1907ec681f3Smrgop("fcmpsel",
1917ec681f3Smrg      encoding_32 = (0x02, 0x7F, 8, 10),
1927ec681f3Smrg      srcs = 4, imms = [FCOND])
1937ec681f3Smrg
1947ec681f3Smrg# sources are coordinates, LOD, texture, sampler, offset
1957ec681f3Smrg# TODO: anything else?
1967ec681f3Smrgop("texture_sample",
1977ec681f3Smrg      encoding_32 = (0x32, 0x7F, 8, 10), # XXX WRONG SIZE
1987ec681f3Smrg      srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD])
1997ec681f3Smrg
2007ec681f3Smrg# sources are base, index
2017ec681f3Smrgop("device_load",
2027ec681f3Smrg      encoding_32 = (0x05, 0x7F, 6, 8),
2037ec681f3Smrg      srcs = 2, imms = [FORMAT, MASK, SCOREBOARD])
2047ec681f3Smrg
2057ec681f3Smrgop("wait", (0x38, 0xFF, 2, _), dests = 0,
2067ec681f3Smrg      can_eliminate = False, imms = [SCOREBOARD])
2077ec681f3Smrg
2087ec681f3Smrgop("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR])
2097ec681f3Smrg
2107ec681f3Smrg# Essentially same encoding
2117ec681f3Smrgop("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 0,
2127ec681f3Smrg      can_eliminate = False, imms = [FORMAT])
2137ec681f3Smrg
2147ec681f3Smrgop("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 1,
2157ec681f3Smrg      can_eliminate = False, imms = [FORMAT])
2167ec681f3Smrg
2177ec681f3Smrgfor (name, exact) in [("any", 0xC000), ("none", 0xC200)]:
2187ec681f3Smrg   op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0,
2197ec681f3Smrg         can_eliminate = False, imms = [TARGET])
2207ec681f3Smrg
2217ec681f3Smrg# TODO: model implicit r0l destinations
2227ec681f3Smrgop("pop_exec", (0x52 | (0x3 << 9), ((1 << 48) - 1) ^ (0x3 << 7) ^ (0x3 << 11), 6, _),
2237ec681f3Smrg      dests = 0, srcs = 0, can_eliminate = False, imms = [NEST])
2247ec681f3Smrg
2257ec681f3Smrgfor is_float in [False, True]:
2267ec681f3Smrg   mod_mask = 0 if is_float else (0x3 << 26) | (0x3 << 38)
2277ec681f3Smrg
2287ec681f3Smrg   for (cf, cf_op) in [("if", 0), ("else", 1), ("while", 2)]:
2297ec681f3Smrg      name = "{}_{}cmp".format(cf, "f" if is_float else "i")
2307ec681f3Smrg      exact = 0x42 | (0x0 if is_float else 0x10) | (cf_op << 9)
2317ec681f3Smrg      mask = 0x7F | (0x3 << 9) | mod_mask | (0x3 << 44)
2327ec681f3Smrg      imms = [NEST, FCOND if is_float else ICOND, INVERT_COND]
2337ec681f3Smrg
2347ec681f3Smrg      op(name, (exact, mask, 6, _), dests = 0, srcs = 2, can_eliminate = False,
2357ec681f3Smrg            imms = imms, is_float = is_float)
2367ec681f3Smrg
2377ec681f3Smrgop("bitop", (0x7E, 0x7F, 6, _), srcs = 2, imms = [TRUTH_TABLE])
2387ec681f3Smrgop("convert", (0x3E | L, 0x7F | L | (0x3 << 38), 6, _), srcs = 2, imms = [ROUND])
2397ec681f3Smrgop("ld_vary", (0x21, 0xBF, 8, _), srcs = 1, imms = [CHANNELS, PERSPECTIVE])
2407ec681f3Smrgop("ld_vary_flat", (0xA1, 0xBF, 8, _), srcs = 1, imms = [CHANNELS])
2417ec681f3Smrgop("st_vary", None, dests = 0, srcs = 2, can_eliminate = False)
2427ec681f3Smrgop("stop", (0x88, 0xFFFF, 2, _), dests = 0, can_eliminate = False)
2437ec681f3Smrgop("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False)
2447ec681f3Smrgop("writeout", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False)
2457ec681f3Smrg
2467ec681f3Smrgop("p_combine", _, srcs = 4)
2477ec681f3Smrgop("p_extract", _, srcs = 1, imms = [COMPONENT])
248