17ec681f3Smrg# 27ec681f3Smrg# Copyright (C) 2020 Collabora, Ltd. 37ec681f3Smrg# 47ec681f3Smrg# Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg# copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg# to deal in the Software without restriction, including without limitation 77ec681f3Smrg# the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg# and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg# Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg# 117ec681f3Smrg# The above copyright notice and this permission notice (including the next 127ec681f3Smrg# paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg# Software. 147ec681f3Smrg# 157ec681f3Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg# IN THE SOFTWARE. 227ec681f3Smrg 237ec681f3Smrgimport sys 247ec681f3Smrgimport itertools 257ec681f3Smrgfrom bifrost_isa import parse_instructions, opname_to_c, expand_states 267ec681f3Smrgfrom mako.template import Template 277ec681f3Smrg 287ec681f3Smrginstructions = parse_instructions(sys.argv[1], include_unused = True) 297ec681f3Smrg 307ec681f3Smrg# Constructs a reserved mask for a derived to cull impossible encodings 317ec681f3Smrg 327ec681f3Smrgdef reserved_mask(derived): 337ec681f3Smrg ((pos, width), opts) = derived 347ec681f3Smrg reserved = [x is None for x in opts] 357ec681f3Smrg mask = sum([(y << x) for x, y in enumerate(reserved)]) 367ec681f3Smrg return (pos, width, mask) 377ec681f3Smrg 387ec681f3Smrgdef reserved_masks(op): 397ec681f3Smrg masks = [reserved_mask(m) for m in op[2].get("derived", [])] 407ec681f3Smrg return [m for m in masks if m[2] != 0] 417ec681f3Smrg 427ec681f3Smrg# To decode instructions, pattern match based on the rules: 437ec681f3Smrg# 447ec681f3Smrg# 1. Execution unit (FMA or ADD) must line up. 457ec681f3Smrg# 2. All exact bits must match. 467ec681f3Smrg# 3. No fields should be reserved in a legal encoding. 477ec681f3Smrg# 4. Tiebreaker: Longer exact masks (greater unsigned bitwise inverses) win. 487ec681f3Smrg# 497ec681f3Smrg# To implement, filter the execution unit and check for exact bits in 507ec681f3Smrg# descending order of exact mask length. Check for reserved fields per 517ec681f3Smrg# candidate and succeed if it matches. 527ec681f3Smrg# found. 537ec681f3Smrg 547ec681f3Smrgdef decode_op(instructions, is_fma): 557ec681f3Smrg # Filter out the desired execution unit 567ec681f3Smrg options = [n for n in instructions.keys() if (n[0] == '*') == is_fma] 577ec681f3Smrg 587ec681f3Smrg # Sort by exact masks, descending 597ec681f3Smrg MAX_MASK = (1 << (23 if is_fma else 20)) - 1 607ec681f3Smrg options.sort(key = lambda n: (MAX_MASK ^ instructions[n][2]["exact"][0])) 617ec681f3Smrg 627ec681f3Smrg # Map to what we need to template 637ec681f3Smrg mapped = [(opname_to_c(op), instructions[op][2]["exact"], reserved_masks(instructions[op])) for op in options] 647ec681f3Smrg 657ec681f3Smrg # Generate checks in order 667ec681f3Smrg template = """void 677ec681f3Smrgbi_disasm_${unit}(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool last) 687ec681f3Smrg{ 697ec681f3Smrg fputs(" ", fp); 707ec681f3Smrg 717ec681f3Smrg% for (i, (name, (emask, ebits), derived)) in enumerate(options): 727ec681f3Smrg% if len(derived) > 0: 737ec681f3Smrg ${"else " if i > 0 else ""}if (unlikely(((bits & ${hex(emask)}) == ${hex(ebits)}) 747ec681f3Smrg% for (pos, width, reserved) in derived: 757ec681f3Smrg && !(${hex(reserved)} & (1 << _BITS(bits, ${pos}, ${width}))) 767ec681f3Smrg% endfor 777ec681f3Smrg )) 787ec681f3Smrg% else: 797ec681f3Smrg ${"else " if i > 0 else ""}if (unlikely(((bits & ${hex(emask)}) == ${hex(ebits)}))) 807ec681f3Smrg% endif 817ec681f3Smrg bi_disasm_${name}(fp, bits, srcs, next_regs, staging_register, branch_offset, consts, last); 827ec681f3Smrg% endfor 837ec681f3Smrg else 847ec681f3Smrg fprintf(fp, "INSTR_INVALID_ENC ${unit} %X", bits); 857ec681f3Smrg 867ec681f3Smrg fputs("\\n", fp); 877ec681f3Smrg}""" 887ec681f3Smrg 897ec681f3Smrg return Template(template).render(options = mapped, unit = "fma" if is_fma else "add") 907ec681f3Smrg 917ec681f3Smrg# Decoding emits a series of function calls to e.g. `fma_fadd_v2f16`. We need to 927ec681f3Smrg# emit functions to disassemble a single decoded instruction in a particular 937ec681f3Smrg# state. Sync prototypes to avoid moves when calling. 947ec681f3Smrg 957ec681f3Smrgdisasm_op_template = Template("""static void 967ec681f3Smrgbi_disasm_${c_name}(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool last) 977ec681f3Smrg{ 987ec681f3Smrg ${body.strip()} 997ec681f3Smrg} 1007ec681f3Smrg""") 1017ec681f3Smrg 1027ec681f3Smrglut_template_only = Template(""" static const char *${field}[] = { 1037ec681f3Smrg ${", ".join(['"' + x + '"' for x in table])} 1047ec681f3Smrg }; 1057ec681f3Smrg""") 1067ec681f3Smrg 1077ec681f3Smrg# Given a lookup table written logically, generate an accessor 1087ec681f3Smrglut_template = Template(""" static const char *${field}_table[] = { 1097ec681f3Smrg ${", ".join(['"' + x + '"' for x in table])} 1107ec681f3Smrg }; 1117ec681f3Smrg 1127ec681f3Smrg const char *${field} = ${field}_table[_BITS(bits, ${pos}, ${width})]; 1137ec681f3Smrg""") 1147ec681f3Smrg 1157ec681f3Smrg# Helpers for decoding follow. pretty_mods applies dot syntax 1167ec681f3Smrg 1177ec681f3Smrgdef pretty_mods(opts, default): 1187ec681f3Smrg return [('.' + (opt or 'reserved') if opt != default else '') for opt in opts] 1197ec681f3Smrg 1207ec681f3Smrg# Recursively searches for the set of free variables required by an expression 1217ec681f3Smrg 1227ec681f3Smrgdef find_context_keys_expr(expr): 1237ec681f3Smrg if isinstance(expr, list): 1247ec681f3Smrg return set.union(*[find_context_keys_expr(x) for x in expr[1:]]) 1257ec681f3Smrg elif expr[0] == '#': 1267ec681f3Smrg return set() 1277ec681f3Smrg else: 1287ec681f3Smrg return set([expr]) 1297ec681f3Smrg 1307ec681f3Smrgdef find_context_keys(desc, test): 1317ec681f3Smrg keys = set() 1327ec681f3Smrg 1337ec681f3Smrg if len(test) > 0: 1347ec681f3Smrg keys |= find_context_keys_expr(test) 1357ec681f3Smrg 1367ec681f3Smrg for i, (_, vals) in enumerate(desc.get('derived', [])): 1377ec681f3Smrg for j, val in enumerate(vals): 1387ec681f3Smrg if val is not None: 1397ec681f3Smrg keys |= find_context_keys_expr(val) 1407ec681f3Smrg 1417ec681f3Smrg return keys 1427ec681f3Smrg 1437ec681f3Smrg# Compiles a logic expression to Python expression, ctx -> { T, F } 1447ec681f3Smrg 1457ec681f3SmrgEVALUATORS = { 1467ec681f3Smrg 'and': ' and ', 1477ec681f3Smrg 'or': ' or ', 1487ec681f3Smrg 'eq': ' == ', 1497ec681f3Smrg 'neq': ' != ', 1507ec681f3Smrg} 1517ec681f3Smrg 1527ec681f3Smrgdef compile_derived_inner(expr, keys): 1537ec681f3Smrg if expr == []: 1547ec681f3Smrg return 'True' 1557ec681f3Smrg elif expr is None or expr[0] == 'alias': 1567ec681f3Smrg return 'False' 1577ec681f3Smrg elif isinstance(expr, list): 1587ec681f3Smrg args = [compile_derived_inner(arg, keys) for arg in expr[1:]] 1597ec681f3Smrg return '(' + EVALUATORS[expr[0]].join(args) + ')' 1607ec681f3Smrg elif expr[0] == '#': 1617ec681f3Smrg return "'{}'".format(expr[1:]) 1627ec681f3Smrg elif expr == 'ordering': 1637ec681f3Smrg return expr 1647ec681f3Smrg else: 1657ec681f3Smrg return "ctx[{}]".format(keys.index(expr)) 1667ec681f3Smrg 1677ec681f3Smrgdef compile_derived(expr, keys): 1687ec681f3Smrg return eval('lambda ctx, ordering: ' + compile_derived_inner(expr, keys)) 1697ec681f3Smrg 1707ec681f3Smrg# Generate all possible combinations of values and evaluate the derived values 1717ec681f3Smrg# by bruteforce evaluation to generate a forward mapping (values -> deriveds) 1727ec681f3Smrg 1737ec681f3Smrgdef evaluate_forward_derived(vals, ctx, ordering): 1747ec681f3Smrg for j, expr in enumerate(vals): 1757ec681f3Smrg if expr(ctx, ordering): 1767ec681f3Smrg return j 1777ec681f3Smrg 1787ec681f3Smrg return None 1797ec681f3Smrg 1807ec681f3Smrgdef evaluate_forward(keys, derivf, testf, ctx, ordering): 1817ec681f3Smrg if not testf(ctx, ordering): 1827ec681f3Smrg return None 1837ec681f3Smrg 1847ec681f3Smrg deriv = [] 1857ec681f3Smrg 1867ec681f3Smrg for vals in derivf: 1877ec681f3Smrg evaled = evaluate_forward_derived(vals, ctx, ordering) 1887ec681f3Smrg 1897ec681f3Smrg if evaled is None: 1907ec681f3Smrg return None 1917ec681f3Smrg 1927ec681f3Smrg deriv.append(evaled) 1937ec681f3Smrg 1947ec681f3Smrg return deriv 1957ec681f3Smrg 1967ec681f3Smrgdef evaluate_forwards(keys, derivf, testf, mod_vals, ordered): 1977ec681f3Smrg orderings = ["lt", "gt"] if ordered else [None] 1987ec681f3Smrg return [[evaluate_forward(keys, derivf, testf, i, order) for i in itertools.product(*mod_vals)] for order in orderings] 1997ec681f3Smrg 2007ec681f3Smrg# Invert the forward mapping (values -> deriveds) of finite sets to produce a 2017ec681f3Smrg# backwards mapping (deriveds -> values), suitable for disassembly. This is 2027ec681f3Smrg# possible since the encoding is unambiguous, so this mapping is a bijection 2037ec681f3Smrg# (after reserved/impossible encodings) 2047ec681f3Smrg 2057ec681f3Smrgdef invert_lut(value_size, forward, derived, mod_map, keys, mod_vals): 2067ec681f3Smrg backwards = [None] * (1 << value_size) 2077ec681f3Smrg for (i, deriveds), ctx in zip(enumerate(forward), itertools.product(*mod_vals)): 2087ec681f3Smrg # Skip reserved 2097ec681f3Smrg if deriveds == None: 2107ec681f3Smrg continue 2117ec681f3Smrg 2127ec681f3Smrg shift = 0 2137ec681f3Smrg param = 0 2147ec681f3Smrg for j, ((x, width), y) in enumerate(derived): 2157ec681f3Smrg param += (deriveds[j] << shift) 2167ec681f3Smrg shift += width 2177ec681f3Smrg 2187ec681f3Smrg assert(param not in backwards) 2197ec681f3Smrg backwards[param] = ctx 2207ec681f3Smrg 2217ec681f3Smrg return backwards 2227ec681f3Smrg 2237ec681f3Smrg# Compute the value of all indirectly specified modifiers by using the 2247ec681f3Smrg# backwards mapping (deriveds -> values) as a run-time lookup table. 2257ec681f3Smrg 2267ec681f3Smrgdef build_lut(mnemonic, desc, test): 2277ec681f3Smrg # Construct the system 2287ec681f3Smrg facts = [] 2297ec681f3Smrg 2307ec681f3Smrg mod_map = {} 2317ec681f3Smrg 2327ec681f3Smrg for ((name, pos, width), default, values) in desc.get('modifiers', []): 2337ec681f3Smrg mod_map[name] = (width, values, pos, default) 2347ec681f3Smrg 2357ec681f3Smrg derived = desc.get('derived', []) 2367ec681f3Smrg 2377ec681f3Smrg # Find the keys and impose an order 2387ec681f3Smrg key_set = find_context_keys(desc, test) 2397ec681f3Smrg ordered = 'ordering' in key_set 2407ec681f3Smrg key_set.discard('ordering') 2417ec681f3Smrg keys = list(key_set) 2427ec681f3Smrg 2437ec681f3Smrg # Evaluate the deriveds for every possible state, forming a (state -> deriveds) map 2447ec681f3Smrg testf = compile_derived(test, keys) 2457ec681f3Smrg derivf = [[compile_derived(expr, keys) for expr in v] for (_, v) in derived] 2467ec681f3Smrg mod_vals = [mod_map[k][1] for k in keys] 2477ec681f3Smrg forward = evaluate_forwards(keys, derivf, testf, mod_vals, ordered) 2487ec681f3Smrg 2497ec681f3Smrg # Now invert that map to get a (deriveds -> state) map 2507ec681f3Smrg value_size = sum([width for ((x, width), y) in derived]) 2517ec681f3Smrg backwards = [invert_lut(value_size, f, derived, mod_map, keys, mod_vals) for f in forward] 2527ec681f3Smrg 2537ec681f3Smrg # From that map, we can generate LUTs 2547ec681f3Smrg output = "" 2557ec681f3Smrg 2567ec681f3Smrg if ordered: 2577ec681f3Smrg output += "bool ordering = (_BITS(bits, {}, 3) > _BITS(bits, {}, 3));\n".format(desc["srcs"][0][0], desc["srcs"][1][0]) 2587ec681f3Smrg 2597ec681f3Smrg for j, key in enumerate(keys): 2607ec681f3Smrg # Only generate tables for indirect specifiers 2617ec681f3Smrg if mod_map[key][2] is not None: 2627ec681f3Smrg continue 2637ec681f3Smrg 2647ec681f3Smrg idx_parts = [] 2657ec681f3Smrg shift = 0 2667ec681f3Smrg 2677ec681f3Smrg for ((pos, width), _) in derived: 2687ec681f3Smrg idx_parts.append("(_BITS(bits, {}, {}) << {})".format(pos, width, shift)) 2697ec681f3Smrg shift += width 2707ec681f3Smrg 2717ec681f3Smrg built_idx = (" | ".join(idx_parts)) if len(idx_parts) > 0 else "0" 2727ec681f3Smrg 2737ec681f3Smrg default = mod_map[key][3] 2747ec681f3Smrg 2757ec681f3Smrg if ordered: 2767ec681f3Smrg for i, order in enumerate(backwards): 2777ec681f3Smrg options = [ctx[j] if ctx is not None and ctx[j] is not None else "reserved" for ctx in order] 2787ec681f3Smrg output += lut_template_only.render(field = key + "_" + str(i), table = pretty_mods(options, default)) 2797ec681f3Smrg 2807ec681f3Smrg output += " const char *{} = ordering ? {}_1[{}] : {}_0[{}];\n".format(key, key, built_idx, key, built_idx) 2817ec681f3Smrg else: 2827ec681f3Smrg options = [ctx[j] if ctx is not None and ctx[j] is not None else "reserved" for ctx in backwards[0]] 2837ec681f3Smrg output += lut_template_only.render(field = key + "_table", table = pretty_mods(options, default)) 2847ec681f3Smrg output += " const char *{} = {}_table[{}];\n".format(key, key, built_idx) 2857ec681f3Smrg 2867ec681f3Smrg return output 2877ec681f3Smrg 2887ec681f3Smrgdef disasm_mod(mod, skip_mods): 2897ec681f3Smrg if mod[0][0] in skip_mods: 2907ec681f3Smrg return '' 2917ec681f3Smrg else: 2927ec681f3Smrg return ' fputs({}, fp);\n'.format(mod[0][0]) 2937ec681f3Smrg 2947ec681f3Smrgdef disasm_op(name, op): 2957ec681f3Smrg (mnemonic, test, desc) = op 2967ec681f3Smrg is_fma = mnemonic[0] == '*' 2977ec681f3Smrg 2987ec681f3Smrg # Modifiers may be either direct (pos is not None) or indirect (pos is 2997ec681f3Smrg # None). If direct, we just do the bit lookup. If indirect, we use a LUT. 3007ec681f3Smrg 3017ec681f3Smrg body = "" 3027ec681f3Smrg skip_mods = [] 3037ec681f3Smrg 3047ec681f3Smrg body += build_lut(mnemonic, desc, test) 3057ec681f3Smrg 3067ec681f3Smrg for ((mod, pos, width), default, opts) in desc.get('modifiers', []): 3077ec681f3Smrg if pos is not None: 3087ec681f3Smrg body += lut_template.render(field = mod, table = pretty_mods(opts, default), pos = pos, width = width) + "\n" 3097ec681f3Smrg 3107ec681f3Smrg # Mnemonic, followed by modifiers 3117ec681f3Smrg body += ' fputs("{}", fp);\n'.format(mnemonic) 3127ec681f3Smrg 3137ec681f3Smrg srcs = desc.get('srcs', []) 3147ec681f3Smrg 3157ec681f3Smrg for mod in desc.get('modifiers', []): 3167ec681f3Smrg # Skip per-source until next block 3177ec681f3Smrg if mod[0][0][-1] in "0123" and int(mod[0][0][-1]) < len(srcs): 3187ec681f3Smrg continue 3197ec681f3Smrg 3207ec681f3Smrg body += disasm_mod(mod, skip_mods) 3217ec681f3Smrg 3227ec681f3Smrg body += ' fputs(" ", fp);\n' 3237ec681f3Smrg body += ' bi_disasm_dest_{}(fp, next_regs, last);\n'.format('fma' if is_fma else 'add') 3247ec681f3Smrg 3257ec681f3Smrg # Next up, each source. Source modifiers are inserterd here 3267ec681f3Smrg 3277ec681f3Smrg for i, (pos, mask) in enumerate(srcs): 3287ec681f3Smrg body += ' fputs(", ", fp);\n' 3297ec681f3Smrg body += ' dump_src(fp, _BITS(bits, {}, 3), *srcs, branch_offset, consts, {});\n'.format(pos, "true" if is_fma else "false") 3307ec681f3Smrg 3317ec681f3Smrg # Error check if needed 3327ec681f3Smrg if (mask != 0xFF): 3337ec681f3Smrg body += ' if (!({} & (1 << _BITS(bits, {}, 3)))) fputs("(INVALID)", fp);\n'.format(hex(mask), pos, 3) 3347ec681f3Smrg 3357ec681f3Smrg # Print modifiers suffixed with this src number (e.g. abs0 for src0) 3367ec681f3Smrg for mod in desc.get('modifiers', []): 3377ec681f3Smrg if mod[0][0][-1] == str(i): 3387ec681f3Smrg body += disasm_mod(mod, skip_mods) 3397ec681f3Smrg 3407ec681f3Smrg # And each immediate 3417ec681f3Smrg for (imm, pos, width) in desc.get('immediates', []): 3427ec681f3Smrg body += ' fprintf(fp, ", {}:%u", _BITS(bits, {}, {}));\n'.format(imm, pos, width) 3437ec681f3Smrg 3447ec681f3Smrg # Attach a staging register if one is used 3457ec681f3Smrg if desc.get('staging'): 3467ec681f3Smrg body += ' fprintf(fp, ", @r%u", staging_register);\n' 3477ec681f3Smrg 3487ec681f3Smrg return disasm_op_template.render(c_name = opname_to_c(name), body = body) 3497ec681f3Smrg 3507ec681f3Smrgprint('#include "util/macros.h"') 3517ec681f3Smrgprint('#include "disassemble.h"') 3527ec681f3Smrg 3537ec681f3Smrgstates = expand_states(instructions) 3547ec681f3Smrgprint('#define _BITS(bits, pos, width) (((bits) >> (pos)) & ((1 << (width)) - 1))') 3557ec681f3Smrg 3567ec681f3Smrgfor st in states: 3577ec681f3Smrg print(disasm_op(st, states[st])) 3587ec681f3Smrg 3597ec681f3Smrgprint(decode_op(states, True)) 3607ec681f3Smrgprint(decode_op(states, False)) 361