17ec681f3Smrg#
27ec681f3Smrg# Copyright (C) 2020 Collabora, Ltd.
37ec681f3Smrg#
47ec681f3Smrg# Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg# copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg# to deal in the Software without restriction, including without limitation
77ec681f3Smrg# the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg# and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg# Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg#
117ec681f3Smrg# The above copyright notice and this permission notice (including the next
127ec681f3Smrg# paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg# Software.
147ec681f3Smrg#
157ec681f3Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg# IN THE SOFTWARE.
227ec681f3Smrg
237ec681f3Smrgimport sys
247ec681f3Smrgimport itertools
257ec681f3Smrgfrom bifrost_isa import parse_instructions, opname_to_c, expand_states
267ec681f3Smrgfrom mako.template import Template
277ec681f3Smrg
287ec681f3Smrginstructions = parse_instructions(sys.argv[1], include_unused = True)
297ec681f3Smrg
307ec681f3Smrg# Constructs a reserved mask for a derived to cull impossible encodings
317ec681f3Smrg
327ec681f3Smrgdef reserved_mask(derived):
337ec681f3Smrg    ((pos, width), opts) = derived
347ec681f3Smrg    reserved = [x is None for x in opts]
357ec681f3Smrg    mask = sum([(y << x) for x, y in enumerate(reserved)])
367ec681f3Smrg    return (pos, width, mask)
377ec681f3Smrg
387ec681f3Smrgdef reserved_masks(op):
397ec681f3Smrg    masks = [reserved_mask(m) for m in op[2].get("derived", [])]
407ec681f3Smrg    return [m for m in masks if m[2] != 0]
417ec681f3Smrg
427ec681f3Smrg# To decode instructions, pattern match based on the rules:
437ec681f3Smrg#
447ec681f3Smrg# 1. Execution unit (FMA or ADD) must line up.
457ec681f3Smrg# 2. All exact bits must match.
467ec681f3Smrg# 3. No fields should be reserved in a legal encoding.
477ec681f3Smrg# 4. Tiebreaker: Longer exact masks (greater unsigned bitwise inverses) win.
487ec681f3Smrg#
497ec681f3Smrg# To implement, filter the execution unit and check for exact bits in
507ec681f3Smrg# descending order of exact mask length.  Check for reserved fields per
517ec681f3Smrg# candidate and succeed if it matches.
527ec681f3Smrg# found.
537ec681f3Smrg
547ec681f3Smrgdef decode_op(instructions, is_fma):
557ec681f3Smrg    # Filter out the desired execution unit
567ec681f3Smrg    options = [n for n in instructions.keys() if (n[0] == '*') == is_fma]
577ec681f3Smrg
587ec681f3Smrg    # Sort by exact masks, descending
597ec681f3Smrg    MAX_MASK = (1 << (23 if is_fma else 20)) - 1
607ec681f3Smrg    options.sort(key = lambda n: (MAX_MASK ^ instructions[n][2]["exact"][0]))
617ec681f3Smrg
627ec681f3Smrg    # Map to what we need to template
637ec681f3Smrg    mapped = [(opname_to_c(op), instructions[op][2]["exact"], reserved_masks(instructions[op])) for op in options]
647ec681f3Smrg
657ec681f3Smrg    # Generate checks in order
667ec681f3Smrg    template = """void
677ec681f3Smrgbi_disasm_${unit}(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool last)
687ec681f3Smrg{
697ec681f3Smrg    fputs("    ", fp);
707ec681f3Smrg
717ec681f3Smrg% for (i, (name, (emask, ebits), derived)) in enumerate(options):
727ec681f3Smrg% if len(derived) > 0:
737ec681f3Smrg    ${"else " if i > 0 else ""}if (unlikely(((bits & ${hex(emask)}) == ${hex(ebits)})
747ec681f3Smrg% for (pos, width, reserved) in derived:
757ec681f3Smrg        && !(${hex(reserved)} & (1 << _BITS(bits, ${pos}, ${width})))
767ec681f3Smrg% endfor
777ec681f3Smrg    ))
787ec681f3Smrg% else:
797ec681f3Smrg    ${"else " if i > 0 else ""}if (unlikely(((bits & ${hex(emask)}) == ${hex(ebits)})))
807ec681f3Smrg% endif
817ec681f3Smrg        bi_disasm_${name}(fp, bits, srcs, next_regs, staging_register, branch_offset, consts, last);
827ec681f3Smrg% endfor
837ec681f3Smrg    else
847ec681f3Smrg        fprintf(fp, "INSTR_INVALID_ENC ${unit} %X", bits);
857ec681f3Smrg
867ec681f3Smrg    fputs("\\n", fp);
877ec681f3Smrg}"""
887ec681f3Smrg
897ec681f3Smrg    return Template(template).render(options = mapped, unit = "fma" if is_fma else "add")
907ec681f3Smrg
917ec681f3Smrg# Decoding emits a series of function calls to e.g. `fma_fadd_v2f16`. We need to
927ec681f3Smrg# emit functions to disassemble a single decoded instruction in a particular
937ec681f3Smrg# state. Sync prototypes to avoid moves when calling.
947ec681f3Smrg
957ec681f3Smrgdisasm_op_template = Template("""static void
967ec681f3Smrgbi_disasm_${c_name}(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool last)
977ec681f3Smrg{
987ec681f3Smrg    ${body.strip()}
997ec681f3Smrg}
1007ec681f3Smrg""")
1017ec681f3Smrg
1027ec681f3Smrglut_template_only = Template("""    static const char *${field}[] = {
1037ec681f3Smrg        ${", ".join(['"' + x + '"' for x in table])}
1047ec681f3Smrg    };
1057ec681f3Smrg""")
1067ec681f3Smrg
1077ec681f3Smrg# Given a lookup table written logically, generate an accessor
1087ec681f3Smrglut_template = Template("""    static const char *${field}_table[] = {
1097ec681f3Smrg        ${", ".join(['"' + x + '"' for x in table])}
1107ec681f3Smrg    };
1117ec681f3Smrg
1127ec681f3Smrg    const char *${field} = ${field}_table[_BITS(bits, ${pos}, ${width})];
1137ec681f3Smrg""")
1147ec681f3Smrg
1157ec681f3Smrg# Helpers for decoding follow. pretty_mods applies dot syntax
1167ec681f3Smrg
1177ec681f3Smrgdef pretty_mods(opts, default):
1187ec681f3Smrg    return [('.' + (opt or 'reserved') if opt != default else '') for opt in opts]
1197ec681f3Smrg
1207ec681f3Smrg# Recursively searches for the set of free variables required by an expression
1217ec681f3Smrg
1227ec681f3Smrgdef find_context_keys_expr(expr):
1237ec681f3Smrg    if isinstance(expr, list):
1247ec681f3Smrg        return set.union(*[find_context_keys_expr(x) for x in expr[1:]])
1257ec681f3Smrg    elif expr[0] == '#':
1267ec681f3Smrg        return set()
1277ec681f3Smrg    else:
1287ec681f3Smrg        return set([expr])
1297ec681f3Smrg
1307ec681f3Smrgdef find_context_keys(desc, test):
1317ec681f3Smrg    keys = set()
1327ec681f3Smrg
1337ec681f3Smrg    if len(test) > 0:
1347ec681f3Smrg        keys |= find_context_keys_expr(test)
1357ec681f3Smrg
1367ec681f3Smrg    for i, (_, vals) in enumerate(desc.get('derived', [])):
1377ec681f3Smrg        for j, val in enumerate(vals):
1387ec681f3Smrg            if val is not None:
1397ec681f3Smrg                keys |= find_context_keys_expr(val)
1407ec681f3Smrg
1417ec681f3Smrg    return keys
1427ec681f3Smrg
1437ec681f3Smrg# Compiles a logic expression to Python expression, ctx -> { T, F }
1447ec681f3Smrg
1457ec681f3SmrgEVALUATORS = {
1467ec681f3Smrg        'and': ' and ',
1477ec681f3Smrg        'or': ' or ',
1487ec681f3Smrg        'eq': ' == ',
1497ec681f3Smrg        'neq': ' != ',
1507ec681f3Smrg}
1517ec681f3Smrg
1527ec681f3Smrgdef compile_derived_inner(expr, keys):
1537ec681f3Smrg    if expr == []:
1547ec681f3Smrg        return 'True'
1557ec681f3Smrg    elif expr is None or expr[0] == 'alias':
1567ec681f3Smrg        return 'False'
1577ec681f3Smrg    elif isinstance(expr, list):
1587ec681f3Smrg        args = [compile_derived_inner(arg, keys) for arg in expr[1:]]
1597ec681f3Smrg        return '(' + EVALUATORS[expr[0]].join(args) + ')'
1607ec681f3Smrg    elif expr[0] == '#':
1617ec681f3Smrg        return "'{}'".format(expr[1:])
1627ec681f3Smrg    elif expr == 'ordering':
1637ec681f3Smrg        return expr
1647ec681f3Smrg    else:
1657ec681f3Smrg        return "ctx[{}]".format(keys.index(expr))
1667ec681f3Smrg
1677ec681f3Smrgdef compile_derived(expr, keys):
1687ec681f3Smrg    return eval('lambda ctx, ordering: ' + compile_derived_inner(expr, keys))
1697ec681f3Smrg
1707ec681f3Smrg# Generate all possible combinations of values and evaluate the derived values
1717ec681f3Smrg# by bruteforce evaluation to generate a forward mapping (values -> deriveds)
1727ec681f3Smrg
1737ec681f3Smrgdef evaluate_forward_derived(vals, ctx, ordering):
1747ec681f3Smrg    for j, expr in enumerate(vals):
1757ec681f3Smrg        if expr(ctx, ordering):
1767ec681f3Smrg            return j
1777ec681f3Smrg
1787ec681f3Smrg    return None
1797ec681f3Smrg
1807ec681f3Smrgdef evaluate_forward(keys, derivf, testf, ctx, ordering):
1817ec681f3Smrg    if not testf(ctx, ordering):
1827ec681f3Smrg        return None
1837ec681f3Smrg
1847ec681f3Smrg    deriv = []
1857ec681f3Smrg
1867ec681f3Smrg    for vals in derivf:
1877ec681f3Smrg        evaled = evaluate_forward_derived(vals, ctx, ordering)
1887ec681f3Smrg
1897ec681f3Smrg        if evaled is None:
1907ec681f3Smrg            return None
1917ec681f3Smrg
1927ec681f3Smrg        deriv.append(evaled)
1937ec681f3Smrg
1947ec681f3Smrg    return deriv
1957ec681f3Smrg
1967ec681f3Smrgdef evaluate_forwards(keys, derivf, testf, mod_vals, ordered):
1977ec681f3Smrg    orderings = ["lt", "gt"] if ordered else [None]
1987ec681f3Smrg    return [[evaluate_forward(keys, derivf, testf, i, order) for i in itertools.product(*mod_vals)] for order in orderings]
1997ec681f3Smrg
2007ec681f3Smrg# Invert the forward mapping (values -> deriveds) of finite sets to produce a
2017ec681f3Smrg# backwards mapping (deriveds -> values), suitable for disassembly. This is
2027ec681f3Smrg# possible since the encoding is unambiguous, so this mapping is a bijection
2037ec681f3Smrg# (after reserved/impossible encodings)
2047ec681f3Smrg
2057ec681f3Smrgdef invert_lut(value_size, forward, derived, mod_map, keys, mod_vals):
2067ec681f3Smrg    backwards = [None] * (1 << value_size)
2077ec681f3Smrg    for (i, deriveds), ctx in zip(enumerate(forward), itertools.product(*mod_vals)):
2087ec681f3Smrg        # Skip reserved
2097ec681f3Smrg        if deriveds == None:
2107ec681f3Smrg            continue
2117ec681f3Smrg
2127ec681f3Smrg        shift = 0
2137ec681f3Smrg        param = 0
2147ec681f3Smrg        for j, ((x, width), y) in enumerate(derived):
2157ec681f3Smrg            param += (deriveds[j] << shift)
2167ec681f3Smrg            shift += width
2177ec681f3Smrg
2187ec681f3Smrg        assert(param not in backwards)
2197ec681f3Smrg        backwards[param] = ctx
2207ec681f3Smrg
2217ec681f3Smrg    return backwards
2227ec681f3Smrg
2237ec681f3Smrg# Compute the value of all indirectly specified modifiers by using the
2247ec681f3Smrg# backwards mapping (deriveds -> values) as a run-time lookup table.
2257ec681f3Smrg
2267ec681f3Smrgdef build_lut(mnemonic, desc, test):
2277ec681f3Smrg    # Construct the system
2287ec681f3Smrg    facts = []
2297ec681f3Smrg
2307ec681f3Smrg    mod_map = {}
2317ec681f3Smrg
2327ec681f3Smrg    for ((name, pos, width), default, values) in desc.get('modifiers', []):
2337ec681f3Smrg        mod_map[name] = (width, values, pos, default)
2347ec681f3Smrg
2357ec681f3Smrg    derived = desc.get('derived', [])
2367ec681f3Smrg
2377ec681f3Smrg    # Find the keys and impose an order
2387ec681f3Smrg    key_set = find_context_keys(desc, test)
2397ec681f3Smrg    ordered = 'ordering' in key_set
2407ec681f3Smrg    key_set.discard('ordering')
2417ec681f3Smrg    keys = list(key_set)
2427ec681f3Smrg
2437ec681f3Smrg    # Evaluate the deriveds for every possible state, forming a (state -> deriveds) map
2447ec681f3Smrg    testf = compile_derived(test, keys)
2457ec681f3Smrg    derivf = [[compile_derived(expr, keys) for expr in v] for (_, v) in derived]
2467ec681f3Smrg    mod_vals = [mod_map[k][1] for k in keys]
2477ec681f3Smrg    forward = evaluate_forwards(keys, derivf, testf, mod_vals, ordered)
2487ec681f3Smrg
2497ec681f3Smrg    # Now invert that map to get a (deriveds -> state) map
2507ec681f3Smrg    value_size = sum([width for ((x, width), y) in derived])
2517ec681f3Smrg    backwards = [invert_lut(value_size, f, derived, mod_map, keys, mod_vals) for f in forward]
2527ec681f3Smrg
2537ec681f3Smrg    # From that map, we can generate LUTs
2547ec681f3Smrg    output = ""
2557ec681f3Smrg
2567ec681f3Smrg    if ordered:
2577ec681f3Smrg        output += "bool ordering = (_BITS(bits, {}, 3) > _BITS(bits, {}, 3));\n".format(desc["srcs"][0][0], desc["srcs"][1][0])
2587ec681f3Smrg
2597ec681f3Smrg    for j, key in enumerate(keys):
2607ec681f3Smrg        # Only generate tables for indirect specifiers
2617ec681f3Smrg        if mod_map[key][2] is not None:
2627ec681f3Smrg            continue
2637ec681f3Smrg
2647ec681f3Smrg        idx_parts = []
2657ec681f3Smrg        shift = 0
2667ec681f3Smrg
2677ec681f3Smrg        for ((pos, width), _) in derived:
2687ec681f3Smrg            idx_parts.append("(_BITS(bits, {}, {}) << {})".format(pos, width, shift))
2697ec681f3Smrg            shift += width
2707ec681f3Smrg
2717ec681f3Smrg        built_idx = (" | ".join(idx_parts)) if len(idx_parts) > 0 else "0"
2727ec681f3Smrg
2737ec681f3Smrg        default = mod_map[key][3]
2747ec681f3Smrg
2757ec681f3Smrg        if ordered:
2767ec681f3Smrg            for i, order in enumerate(backwards):
2777ec681f3Smrg                options = [ctx[j] if ctx is not None and ctx[j] is not None else "reserved" for ctx in order]
2787ec681f3Smrg                output += lut_template_only.render(field = key + "_" + str(i), table = pretty_mods(options, default))
2797ec681f3Smrg
2807ec681f3Smrg            output += "    const char *{} = ordering ? {}_1[{}] : {}_0[{}];\n".format(key, key, built_idx, key, built_idx)
2817ec681f3Smrg        else:
2827ec681f3Smrg            options = [ctx[j] if ctx is not None and ctx[j] is not None else "reserved" for ctx in backwards[0]]
2837ec681f3Smrg            output += lut_template_only.render(field = key + "_table", table = pretty_mods(options, default))
2847ec681f3Smrg            output += "    const char *{} = {}_table[{}];\n".format(key, key, built_idx)
2857ec681f3Smrg
2867ec681f3Smrg    return output
2877ec681f3Smrg
2887ec681f3Smrgdef disasm_mod(mod, skip_mods):
2897ec681f3Smrg    if mod[0][0] in skip_mods:
2907ec681f3Smrg        return ''
2917ec681f3Smrg    else:
2927ec681f3Smrg        return '    fputs({}, fp);\n'.format(mod[0][0])
2937ec681f3Smrg
2947ec681f3Smrgdef disasm_op(name, op):
2957ec681f3Smrg    (mnemonic, test, desc) = op
2967ec681f3Smrg    is_fma = mnemonic[0] == '*'
2977ec681f3Smrg
2987ec681f3Smrg    # Modifiers may be either direct (pos is not None) or indirect (pos is
2997ec681f3Smrg    # None). If direct, we just do the bit lookup. If indirect, we use a LUT.
3007ec681f3Smrg
3017ec681f3Smrg    body = ""
3027ec681f3Smrg    skip_mods = []
3037ec681f3Smrg
3047ec681f3Smrg    body += build_lut(mnemonic, desc, test)
3057ec681f3Smrg
3067ec681f3Smrg    for ((mod, pos, width), default, opts) in desc.get('modifiers', []):
3077ec681f3Smrg        if pos is not None:
3087ec681f3Smrg            body += lut_template.render(field = mod, table = pretty_mods(opts, default), pos = pos, width = width) + "\n"
3097ec681f3Smrg
3107ec681f3Smrg    # Mnemonic, followed by modifiers
3117ec681f3Smrg    body += '    fputs("{}", fp);\n'.format(mnemonic)
3127ec681f3Smrg
3137ec681f3Smrg    srcs = desc.get('srcs', [])
3147ec681f3Smrg
3157ec681f3Smrg    for mod in desc.get('modifiers', []):
3167ec681f3Smrg        # Skip per-source until next block
3177ec681f3Smrg        if mod[0][0][-1] in "0123" and int(mod[0][0][-1]) < len(srcs):
3187ec681f3Smrg            continue
3197ec681f3Smrg
3207ec681f3Smrg        body += disasm_mod(mod, skip_mods)
3217ec681f3Smrg
3227ec681f3Smrg    body += '    fputs(" ", fp);\n'
3237ec681f3Smrg    body += '    bi_disasm_dest_{}(fp, next_regs, last);\n'.format('fma' if is_fma else 'add')
3247ec681f3Smrg
3257ec681f3Smrg    # Next up, each source. Source modifiers are inserterd here
3267ec681f3Smrg
3277ec681f3Smrg    for i, (pos, mask) in enumerate(srcs):
3287ec681f3Smrg        body += '    fputs(", ", fp);\n'
3297ec681f3Smrg        body += '    dump_src(fp, _BITS(bits, {}, 3), *srcs, branch_offset, consts, {});\n'.format(pos, "true" if is_fma else "false")
3307ec681f3Smrg
3317ec681f3Smrg        # Error check if needed
3327ec681f3Smrg        if (mask != 0xFF):
3337ec681f3Smrg            body += '    if (!({} & (1 << _BITS(bits, {}, 3)))) fputs("(INVALID)", fp);\n'.format(hex(mask), pos, 3)
3347ec681f3Smrg
3357ec681f3Smrg        # Print modifiers suffixed with this src number (e.g. abs0 for src0)
3367ec681f3Smrg        for mod in desc.get('modifiers', []):
3377ec681f3Smrg            if mod[0][0][-1] == str(i):
3387ec681f3Smrg                body += disasm_mod(mod, skip_mods)
3397ec681f3Smrg
3407ec681f3Smrg    # And each immediate
3417ec681f3Smrg    for (imm, pos, width) in desc.get('immediates', []):
3427ec681f3Smrg        body += '    fprintf(fp, ", {}:%u", _BITS(bits, {}, {}));\n'.format(imm, pos, width)
3437ec681f3Smrg
3447ec681f3Smrg    # Attach a staging register if one is used
3457ec681f3Smrg    if desc.get('staging'):
3467ec681f3Smrg        body += '    fprintf(fp, ", @r%u", staging_register);\n'
3477ec681f3Smrg
3487ec681f3Smrg    return disasm_op_template.render(c_name = opname_to_c(name), body = body)
3497ec681f3Smrg
3507ec681f3Smrgprint('#include "util/macros.h"')
3517ec681f3Smrgprint('#include "disassemble.h"')
3527ec681f3Smrg
3537ec681f3Smrgstates = expand_states(instructions)
3547ec681f3Smrgprint('#define _BITS(bits, pos, width) (((bits) >> (pos)) & ((1 << (width)) - 1))')
3557ec681f3Smrg
3567ec681f3Smrgfor st in states:
3577ec681f3Smrg    print(disasm_op(st, states[st]))
3587ec681f3Smrg
3597ec681f3Smrgprint(decode_op(states, True))
3607ec681f3Smrgprint(decode_op(states, False))
361