17ec681f3Smrg""" 27ec681f3SmrgCopyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 37ec681f3Smrg 47ec681f3SmrgPermission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrgcopy of this software and associated documentation files (the "Software"), 67ec681f3Smrgto deal in the Software without restriction, including without limitation 77ec681f3Smrgthe rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrgand/or sell copies of the Software, and to permit persons to whom the 97ec681f3SmrgSoftware is furnished to do so, subject to the following conditions: 107ec681f3Smrg 117ec681f3SmrgThe above copyright notice and this permission notice (including the next 127ec681f3Smrgparagraph) shall be included in all copies or substantial portions of the 137ec681f3SmrgSoftware. 147ec681f3Smrg 157ec681f3SmrgTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3SmrgIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3SmrgFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3SmrgTHE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3SmrgLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3SmrgOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3SmrgSOFTWARE. 227ec681f3Smrg""" 237ec681f3Smrg 247ec681f3Smrgopcodes = {} 257ec681f3Smrgimmediates = {} 267ec681f3Smrgenums = {} 277ec681f3Smrg 287ec681f3Smrgclass Opcode(object): 297ec681f3Smrg def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32): 307ec681f3Smrg self.name = name 317ec681f3Smrg self.dests = dests 327ec681f3Smrg self.srcs = srcs 337ec681f3Smrg self.imms = imms 347ec681f3Smrg self.is_float = is_float 357ec681f3Smrg self.can_eliminate = can_eliminate 367ec681f3Smrg self.encoding_16 = encoding_16 377ec681f3Smrg self.encoding_32 = encoding_32 387ec681f3Smrg 397ec681f3Smrgclass Immediate(object): 407ec681f3Smrg def __init__(self, name, ctype): 417ec681f3Smrg self.name = name 427ec681f3Smrg self.ctype = ctype 437ec681f3Smrg 447ec681f3Smrgclass Encoding(object): 457ec681f3Smrg def __init__(self, description): 467ec681f3Smrg (exact, mask, length_short, length_long) = description 477ec681f3Smrg 487ec681f3Smrg # Convenience 497ec681f3Smrg if length_long is None: 507ec681f3Smrg length_long = length_short 517ec681f3Smrg 527ec681f3Smrg self.exact = exact 537ec681f3Smrg self.mask = mask 547ec681f3Smrg self.length_short = length_short 557ec681f3Smrg self.extensible = length_short != length_long 567ec681f3Smrg 577ec681f3Smrg if self.extensible: 587ec681f3Smrg assert(length_long == length_short + (4 if length_short > 8 else 2)) 597ec681f3Smrg 607ec681f3Smrgdef op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, can_eliminate = True, encoding_16 = None): 617ec681f3Smrg encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None 627ec681f3Smrg encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None 637ec681f3Smrg 647ec681f3Smrg opcodes[name] = Opcode(name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32) 657ec681f3Smrg 667ec681f3Smrgdef immediate(name, ctype = "uint32_t"): 677ec681f3Smrg imm = Immediate(name, ctype) 687ec681f3Smrg immediates[name] = imm 697ec681f3Smrg return imm 707ec681f3Smrg 717ec681f3Smrgdef enum(name, value_dict): 727ec681f3Smrg enums[name] = value_dict 737ec681f3Smrg return immediate(name, "enum agx_" + name) 747ec681f3Smrg 757ec681f3SmrgL = (1 << 15) 767ec681f3Smrg_ = None 777ec681f3Smrg 787ec681f3SmrgFORMAT = immediate("format", "enum agx_format") 797ec681f3SmrgIMM = immediate("imm") 807ec681f3SmrgWRITEOUT = immediate("writeout") 817ec681f3SmrgINDEX = immediate("index") 827ec681f3SmrgCOMPONENT = immediate("component") 837ec681f3SmrgCHANNELS = immediate("channels") 847ec681f3SmrgTRUTH_TABLE = immediate("truth_table") 857ec681f3SmrgROUND = immediate("round") 867ec681f3SmrgSHIFT = immediate("shift") 877ec681f3SmrgMASK = immediate("mask") 887ec681f3SmrgBFI_MASK = immediate("bfi_mask") 897ec681f3SmrgLOD_MODE = immediate("lod_mode", "enum agx_lod_mode") 907ec681f3SmrgDIM = immediate("dim", "enum agx_dim") 917ec681f3SmrgSCOREBOARD = immediate("scoreboard") 927ec681f3SmrgICOND = immediate("icond") 937ec681f3SmrgFCOND = immediate("fcond") 947ec681f3SmrgNEST = immediate("nest") 957ec681f3SmrgINVERT_COND = immediate("invert_cond") 967ec681f3SmrgNEST = immediate("nest") 977ec681f3SmrgTARGET = immediate("target", "agx_block *") 987ec681f3SmrgPERSPECTIVE = immediate("perspective", "bool") 997ec681f3SmrgSR = enum("sr", { 1007ec681f3Smrg 0: 'threadgroup_position_in_grid.x', 1017ec681f3Smrg 1: 'threadgroup_position_in_grid.y', 1027ec681f3Smrg 2: 'threadgroup_position_in_grid.z', 1037ec681f3Smrg 4: 'threads_per_threadgroup.x', 1047ec681f3Smrg 5: 'threads_per_threadgroup.y', 1057ec681f3Smrg 6: 'threads_per_threadgroup.z', 1067ec681f3Smrg 8: 'dispatch_threads_per_threadgroup.x', 1077ec681f3Smrg 9: 'dispatch_threads_per_threadgroup.y', 1087ec681f3Smrg 10: 'dispatch_threads_per_threadgroup.z', 1097ec681f3Smrg 48: 'thread_position_in_threadgroup.x', 1107ec681f3Smrg 49: 'thread_position_in_threadgroup.y', 1117ec681f3Smrg 50: 'thread_position_in_threadgroup.z', 1127ec681f3Smrg 51: 'thread_index_in_threadgroup', 1137ec681f3Smrg 52: 'thread_index_in_subgroup', 1147ec681f3Smrg 53: 'subgroup_index_in_threadgroup', 1157ec681f3Smrg 56: 'active_thread_index_in_quad', 1167ec681f3Smrg 58: 'active_thread_index_in_subgroup', 1177ec681f3Smrg 62: 'backfacing', 1187ec681f3Smrg 80: 'thread_position_in_grid.x', 1197ec681f3Smrg 81: 'thread_position_in_grid.y', 1207ec681f3Smrg 82: 'thread_position_in_grid.z', 1217ec681f3Smrg}) 1227ec681f3Smrg 1237ec681f3SmrgFUNOP = lambda x: (x << 28) 1247ec681f3SmrgFUNOP_MASK = FUNOP((1 << 14) - 1) 1257ec681f3Smrg 1267ec681f3Smrgdef funop(name, opcode): 1277ec681f3Smrg op(name, (0x0A | L | (opcode << 28), 1287ec681f3Smrg 0x3F | L | (((1 << 14) - 1) << 28), 6, _), 1297ec681f3Smrg srcs = 1, is_float = True) 1307ec681f3Smrg 1317ec681f3Smrg# Listing of opcodes 1327ec681f3Smrgfunop("floor", 0b000000) 1337ec681f3Smrgfunop("srsqrt", 0b000001) 1347ec681f3Smrgfunop("dfdx", 0b000100) 1357ec681f3Smrgfunop("dfdy", 0b000110) 1367ec681f3Smrgfunop("rcp", 0b001000) 1377ec681f3Smrgfunop("rsqrt", 0b001001) 1387ec681f3Smrgfunop("sin_pt_1", 0b001010) 1397ec681f3Smrgfunop("log2", 0b001100) 1407ec681f3Smrgfunop("exp2", 0b001101) 1417ec681f3Smrgfunop("sin_pt_2", 0b001110) 1427ec681f3Smrgfunop("ceil", 0b010000) 1437ec681f3Smrgfunop("trunc", 0b100000) 1447ec681f3Smrgfunop("roundeven", 0b110000) 1457ec681f3Smrg 1467ec681f3Smrgop("fadd", 1477ec681f3Smrg encoding_16 = (0x26 | L, 0x3F | L, 6, _), 1487ec681f3Smrg encoding_32 = (0x2A | L, 0x3F | L, 6, _), 1497ec681f3Smrg srcs = 2, is_float = True) 1507ec681f3Smrg 1517ec681f3Smrgop("fma", 1527ec681f3Smrg encoding_16 = (0x36, 0x3F, 6, 8), 1537ec681f3Smrg encoding_32 = (0x3A, 0x3F, 6, 8), 1547ec681f3Smrg srcs = 3, is_float = True) 1557ec681f3Smrg 1567ec681f3Smrgop("fmul", 1577ec681f3Smrg encoding_16 = ((0x16 | L), (0x3F | L), 6, _), 1587ec681f3Smrg encoding_32 = ((0x1A | L), (0x3F | L), 6, _), 1597ec681f3Smrg srcs = 2, is_float = True) 1607ec681f3Smrg 1617ec681f3Smrgop("mov_imm", 1627ec681f3Smrg encoding_32 = (0x62, 0xFF, 6, 8), 1637ec681f3Smrg encoding_16 = (0x62, 0xFF, 4, 6), 1647ec681f3Smrg imms = [IMM]) 1657ec681f3Smrg 1667ec681f3Smrgop("iadd", 1677ec681f3Smrg encoding_32 = (0x0E, 0x3F | L, 8, _), 1687ec681f3Smrg srcs = 2, imms = [SHIFT]) 1697ec681f3Smrg 1707ec681f3Smrgop("imad", 1717ec681f3Smrg encoding_32 = (0x1E, 0x3F | L, 8, _), 1727ec681f3Smrg srcs = 3, imms = [SHIFT]) 1737ec681f3Smrg 1747ec681f3Smrgop("bfi", 1757ec681f3Smrg encoding_32 = (0x2E, 0x7F | (0x3 << 26), 8, _), 1767ec681f3Smrg srcs = 3, imms = [BFI_MASK]) 1777ec681f3Smrg 1787ec681f3Smrgop("bfeil", 1797ec681f3Smrg encoding_32 = (0x2E | L, 0x7F | L | (0x3 << 26), 8, _), 1807ec681f3Smrg srcs = 3, imms = [BFI_MASK]) 1817ec681f3Smrg 1827ec681f3Smrgop("asr", 1837ec681f3Smrg encoding_32 = (0x2E | L | (0x1 << 26), 0x7F | L | (0x3 << 26), 8, _), 1847ec681f3Smrg srcs = 2) 1857ec681f3Smrg 1867ec681f3Smrgop("icmpsel", 1877ec681f3Smrg encoding_32 = (0x12, 0x7F, 8, 10), 1887ec681f3Smrg srcs = 4, imms = [ICOND]) 1897ec681f3Smrg 1907ec681f3Smrgop("fcmpsel", 1917ec681f3Smrg encoding_32 = (0x02, 0x7F, 8, 10), 1927ec681f3Smrg srcs = 4, imms = [FCOND]) 1937ec681f3Smrg 1947ec681f3Smrg# sources are coordinates, LOD, texture, sampler, offset 1957ec681f3Smrg# TODO: anything else? 1967ec681f3Smrgop("texture_sample", 1977ec681f3Smrg encoding_32 = (0x32, 0x7F, 8, 10), # XXX WRONG SIZE 1987ec681f3Smrg srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD]) 1997ec681f3Smrg 2007ec681f3Smrg# sources are base, index 2017ec681f3Smrgop("device_load", 2027ec681f3Smrg encoding_32 = (0x05, 0x7F, 6, 8), 2037ec681f3Smrg srcs = 2, imms = [FORMAT, MASK, SCOREBOARD]) 2047ec681f3Smrg 2057ec681f3Smrgop("wait", (0x38, 0xFF, 2, _), dests = 0, 2067ec681f3Smrg can_eliminate = False, imms = [SCOREBOARD]) 2077ec681f3Smrg 2087ec681f3Smrgop("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR]) 2097ec681f3Smrg 2107ec681f3Smrg# Essentially same encoding 2117ec681f3Smrgop("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 0, 2127ec681f3Smrg can_eliminate = False, imms = [FORMAT]) 2137ec681f3Smrg 2147ec681f3Smrgop("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 1, 2157ec681f3Smrg can_eliminate = False, imms = [FORMAT]) 2167ec681f3Smrg 2177ec681f3Smrgfor (name, exact) in [("any", 0xC000), ("none", 0xC200)]: 2187ec681f3Smrg op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0, 2197ec681f3Smrg can_eliminate = False, imms = [TARGET]) 2207ec681f3Smrg 2217ec681f3Smrg# TODO: model implicit r0l destinations 2227ec681f3Smrgop("pop_exec", (0x52 | (0x3 << 9), ((1 << 48) - 1) ^ (0x3 << 7) ^ (0x3 << 11), 6, _), 2237ec681f3Smrg dests = 0, srcs = 0, can_eliminate = False, imms = [NEST]) 2247ec681f3Smrg 2257ec681f3Smrgfor is_float in [False, True]: 2267ec681f3Smrg mod_mask = 0 if is_float else (0x3 << 26) | (0x3 << 38) 2277ec681f3Smrg 2287ec681f3Smrg for (cf, cf_op) in [("if", 0), ("else", 1), ("while", 2)]: 2297ec681f3Smrg name = "{}_{}cmp".format(cf, "f" if is_float else "i") 2307ec681f3Smrg exact = 0x42 | (0x0 if is_float else 0x10) | (cf_op << 9) 2317ec681f3Smrg mask = 0x7F | (0x3 << 9) | mod_mask | (0x3 << 44) 2327ec681f3Smrg imms = [NEST, FCOND if is_float else ICOND, INVERT_COND] 2337ec681f3Smrg 2347ec681f3Smrg op(name, (exact, mask, 6, _), dests = 0, srcs = 2, can_eliminate = False, 2357ec681f3Smrg imms = imms, is_float = is_float) 2367ec681f3Smrg 2377ec681f3Smrgop("bitop", (0x7E, 0x7F, 6, _), srcs = 2, imms = [TRUTH_TABLE]) 2387ec681f3Smrgop("convert", (0x3E | L, 0x7F | L | (0x3 << 38), 6, _), srcs = 2, imms = [ROUND]) 2397ec681f3Smrgop("ld_vary", (0x21, 0xBF, 8, _), srcs = 1, imms = [CHANNELS, PERSPECTIVE]) 2407ec681f3Smrgop("ld_vary_flat", (0xA1, 0xBF, 8, _), srcs = 1, imms = [CHANNELS]) 2417ec681f3Smrgop("st_vary", None, dests = 0, srcs = 2, can_eliminate = False) 2427ec681f3Smrgop("stop", (0x88, 0xFFFF, 2, _), dests = 0, can_eliminate = False) 2437ec681f3Smrgop("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False) 2447ec681f3Smrgop("writeout", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False) 2457ec681f3Smrg 2467ec681f3Smrgop("p_combine", _, srcs = 4) 2477ec681f3Smrgop("p_extract", _, srcs = 1, imms = [COMPONENT]) 248