compiler.h revision 7ec681f3
17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2020 Collabora Ltd.
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg *
237ec681f3Smrg * Authors (Collabora):
247ec681f3Smrg *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
257ec681f3Smrg */
267ec681f3Smrg
277ec681f3Smrg#ifndef __BIFROST_COMPILER_H
287ec681f3Smrg#define __BIFROST_COMPILER_H
297ec681f3Smrg
307ec681f3Smrg#include "bifrost.h"
317ec681f3Smrg#include "bi_opcodes.h"
327ec681f3Smrg#include "compiler/nir/nir.h"
337ec681f3Smrg#include "panfrost/util/pan_ir.h"
347ec681f3Smrg#include "util/u_math.h"
357ec681f3Smrg#include "util/half_float.h"
367ec681f3Smrg
377ec681f3Smrg/* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly.
387ec681f3Smrg * To express widen, use the correpsonding replicated form, i.e. H01 = identity
397ec681f3Smrg * for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also
407ec681f3Smrg * use the replicated form (interpretation is governed by the opcode). For
417ec681f3Smrg * 8-bit lanes with two channels, use replicated forms for replicated forms
427ec681f3Smrg * (TODO: what about others?). For 8-bit lanes with four channels using
437ec681f3Smrg * matching form (TODO: what about others?).
447ec681f3Smrg */
457ec681f3Smrg
467ec681f3Smrgenum bi_swizzle {
477ec681f3Smrg        /* 16-bit swizzle ordering deliberate for fast compute */
487ec681f3Smrg        BI_SWIZZLE_H00 = 0, /* = B0101 */
497ec681f3Smrg        BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */
507ec681f3Smrg        BI_SWIZZLE_H10 = 2, /* = B2301 */
517ec681f3Smrg        BI_SWIZZLE_H11 = 3, /* = B2323 */
527ec681f3Smrg
537ec681f3Smrg        /* replication order should be maintained for fast compute */
547ec681f3Smrg        BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */
557ec681f3Smrg        BI_SWIZZLE_B1111 = 5,
567ec681f3Smrg        BI_SWIZZLE_B2222 = 6,
577ec681f3Smrg        BI_SWIZZLE_B3333 = 7,
587ec681f3Smrg
597ec681f3Smrg        /* totally special for explicit pattern matching */
607ec681f3Smrg        BI_SWIZZLE_B0011 = 8, /* +SWZ.v4i8 */
617ec681f3Smrg        BI_SWIZZLE_B2233 = 9, /* +SWZ.v4i8 */
627ec681f3Smrg        BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */
637ec681f3Smrg        BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */
647ec681f3Smrg
657ec681f3Smrg        BI_SWIZZLE_B0022 = 12, /* for b02 lanes */
667ec681f3Smrg};
677ec681f3Smrg
687ec681f3Smrg/* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant
697ec681f3Smrg * folding and Valhall constant optimization. */
707ec681f3Smrg
717ec681f3Smrgstatic inline uint32_t
727ec681f3Smrgbi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
737ec681f3Smrg{
747ec681f3Smrg   const uint16_t *h = (const uint16_t *) &value;
757ec681f3Smrg   const uint8_t  *b = (const uint8_t *) &value;
767ec681f3Smrg
777ec681f3Smrg#define H(h0, h1) (h[h0] | (h[h1] << 16))
787ec681f3Smrg#define B(b0, b1, b2, b3) (b[b0] | (b[b1] << 8) | (b[b2] << 16) | (b[b3] << 24))
797ec681f3Smrg
807ec681f3Smrg   switch (swz) {
817ec681f3Smrg   case BI_SWIZZLE_H00: return H(0, 0);
827ec681f3Smrg   case BI_SWIZZLE_H01: return H(0, 1);
837ec681f3Smrg   case BI_SWIZZLE_H10: return H(1, 0);
847ec681f3Smrg   case BI_SWIZZLE_H11: return H(1, 1);
857ec681f3Smrg   case BI_SWIZZLE_B0000: return B(0, 0, 0, 0);
867ec681f3Smrg   case BI_SWIZZLE_B1111: return B(1, 1, 1, 1);
877ec681f3Smrg   case BI_SWIZZLE_B2222: return B(2, 2, 2, 2);
887ec681f3Smrg   case BI_SWIZZLE_B3333: return B(3, 3, 3, 3);
897ec681f3Smrg   case BI_SWIZZLE_B0011: return B(0, 0, 1, 1);
907ec681f3Smrg   case BI_SWIZZLE_B2233: return B(2, 2, 3, 3);
917ec681f3Smrg   case BI_SWIZZLE_B1032: return B(1, 0, 3, 2);
927ec681f3Smrg   case BI_SWIZZLE_B3210: return B(3, 2, 1, 0);
937ec681f3Smrg   case BI_SWIZZLE_B0022: return B(0, 0, 2, 2);
947ec681f3Smrg   }
957ec681f3Smrg
967ec681f3Smrg#undef H
977ec681f3Smrg#undef B
987ec681f3Smrg
997ec681f3Smrg   unreachable("Invalid swizzle");
1007ec681f3Smrg}
1017ec681f3Smrg
1027ec681f3Smrgenum bi_index_type {
1037ec681f3Smrg        BI_INDEX_NULL = 0,
1047ec681f3Smrg        BI_INDEX_NORMAL = 1,
1057ec681f3Smrg        BI_INDEX_REGISTER = 2,
1067ec681f3Smrg        BI_INDEX_CONSTANT = 3,
1077ec681f3Smrg        BI_INDEX_PASS = 4,
1087ec681f3Smrg        BI_INDEX_FAU = 5
1097ec681f3Smrg};
1107ec681f3Smrg
1117ec681f3Smrgtypedef struct {
1127ec681f3Smrg        uint32_t value;
1137ec681f3Smrg
1147ec681f3Smrg        /* modifiers, should only be set if applicable for a given instruction.
1157ec681f3Smrg         * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where
1167ec681f3Smrg         * applicable, neg plays the role of not */
1177ec681f3Smrg        bool abs : 1;
1187ec681f3Smrg        bool neg : 1;
1197ec681f3Smrg
1207ec681f3Smrg        /* The last use of a value, should be purged from the register cache.
1217ec681f3Smrg         * Set by liveness analysis. */
1227ec681f3Smrg        bool discard : 1;
1237ec681f3Smrg
1247ec681f3Smrg        /* For a source, the swizzle. For a destination, acts a bit like a
1257ec681f3Smrg         * write mask. Identity for the full 32-bit, H00 for only caring about
1267ec681f3Smrg         * the lower half, other values unused. */
1277ec681f3Smrg        enum bi_swizzle swizzle : 4;
1287ec681f3Smrg        uint32_t offset : 2;
1297ec681f3Smrg        bool reg : 1;
1307ec681f3Smrg        enum bi_index_type type : 3;
1317ec681f3Smrg} bi_index;
1327ec681f3Smrg
1337ec681f3Smrgstatic inline bi_index
1347ec681f3Smrgbi_get_index(unsigned value, bool is_reg, unsigned offset)
1357ec681f3Smrg{
1367ec681f3Smrg        return (bi_index) {
1377ec681f3Smrg                .type = BI_INDEX_NORMAL,
1387ec681f3Smrg                .value = value,
1397ec681f3Smrg                .swizzle = BI_SWIZZLE_H01,
1407ec681f3Smrg                .offset = offset,
1417ec681f3Smrg                .reg = is_reg,
1427ec681f3Smrg        };
1437ec681f3Smrg}
1447ec681f3Smrg
1457ec681f3Smrgstatic inline bi_index
1467ec681f3Smrgbi_register(unsigned reg)
1477ec681f3Smrg{
1487ec681f3Smrg        assert(reg < 64);
1497ec681f3Smrg
1507ec681f3Smrg        return (bi_index) {
1517ec681f3Smrg                .type = BI_INDEX_REGISTER,
1527ec681f3Smrg                .swizzle = BI_SWIZZLE_H01,
1537ec681f3Smrg                .value = reg
1547ec681f3Smrg        };
1557ec681f3Smrg}
1567ec681f3Smrg
1577ec681f3Smrgstatic inline bi_index
1587ec681f3Smrgbi_imm_u32(uint32_t imm)
1597ec681f3Smrg{
1607ec681f3Smrg        return (bi_index) {
1617ec681f3Smrg                .type = BI_INDEX_CONSTANT,
1627ec681f3Smrg                .swizzle = BI_SWIZZLE_H01,
1637ec681f3Smrg                .value = imm
1647ec681f3Smrg        };
1657ec681f3Smrg}
1667ec681f3Smrg
1677ec681f3Smrgstatic inline bi_index
1687ec681f3Smrgbi_imm_f32(float imm)
1697ec681f3Smrg{
1707ec681f3Smrg        return bi_imm_u32(fui(imm));
1717ec681f3Smrg}
1727ec681f3Smrg
1737ec681f3Smrgstatic inline bi_index
1747ec681f3Smrgbi_null()
1757ec681f3Smrg{
1767ec681f3Smrg        return (bi_index) { .type = BI_INDEX_NULL };
1777ec681f3Smrg}
1787ec681f3Smrg
1797ec681f3Smrgstatic inline bi_index
1807ec681f3Smrgbi_zero()
1817ec681f3Smrg{
1827ec681f3Smrg        return bi_imm_u32(0);
1837ec681f3Smrg}
1847ec681f3Smrg
1857ec681f3Smrgstatic inline bi_index
1867ec681f3Smrgbi_passthrough(enum bifrost_packed_src value)
1877ec681f3Smrg{
1887ec681f3Smrg        return (bi_index) {
1897ec681f3Smrg                .type = BI_INDEX_PASS,
1907ec681f3Smrg                .swizzle = BI_SWIZZLE_H01,
1917ec681f3Smrg                .value = value
1927ec681f3Smrg        };
1937ec681f3Smrg}
1947ec681f3Smrg
1957ec681f3Smrg/* Read back power-efficent garbage, TODO maybe merge with null? */
1967ec681f3Smrgstatic inline bi_index
1977ec681f3Smrgbi_dontcare()
1987ec681f3Smrg{
1997ec681f3Smrg        return bi_passthrough(BIFROST_SRC_FAU_HI);
2007ec681f3Smrg}
2017ec681f3Smrg
2027ec681f3Smrg/* Extracts a word from a vectored index */
2037ec681f3Smrgstatic inline bi_index
2047ec681f3Smrgbi_word(bi_index idx, unsigned component)
2057ec681f3Smrg{
2067ec681f3Smrg        idx.offset += component;
2077ec681f3Smrg        return idx;
2087ec681f3Smrg}
2097ec681f3Smrg
2107ec681f3Smrg/* Helps construct swizzles */
2117ec681f3Smrgstatic inline bi_index
2127ec681f3Smrgbi_swz_16(bi_index idx, bool x, bool y)
2137ec681f3Smrg{
2147ec681f3Smrg        assert(idx.swizzle == BI_SWIZZLE_H01);
2157ec681f3Smrg        idx.swizzle = BI_SWIZZLE_H00 | (x << 1) | y;
2167ec681f3Smrg        return idx;
2177ec681f3Smrg}
2187ec681f3Smrg
2197ec681f3Smrgstatic inline bi_index
2207ec681f3Smrgbi_half(bi_index idx, bool upper)
2217ec681f3Smrg{
2227ec681f3Smrg        return bi_swz_16(idx, upper, upper);
2237ec681f3Smrg}
2247ec681f3Smrg
2257ec681f3Smrgstatic inline bi_index
2267ec681f3Smrgbi_byte(bi_index idx, unsigned lane)
2277ec681f3Smrg{
2287ec681f3Smrg        assert(idx.swizzle == BI_SWIZZLE_H01);
2297ec681f3Smrg        assert(lane < 4);
2307ec681f3Smrg        idx.swizzle = BI_SWIZZLE_B0000 + lane;
2317ec681f3Smrg        return idx;
2327ec681f3Smrg}
2337ec681f3Smrg
2347ec681f3Smrgstatic inline bi_index
2357ec681f3Smrgbi_abs(bi_index idx)
2367ec681f3Smrg{
2377ec681f3Smrg        idx.abs = true;
2387ec681f3Smrg        return idx;
2397ec681f3Smrg}
2407ec681f3Smrg
2417ec681f3Smrgstatic inline bi_index
2427ec681f3Smrgbi_neg(bi_index idx)
2437ec681f3Smrg{
2447ec681f3Smrg        idx.neg ^= true;
2457ec681f3Smrg        return idx;
2467ec681f3Smrg}
2477ec681f3Smrg
2487ec681f3Smrgstatic inline bi_index
2497ec681f3Smrgbi_discard(bi_index idx)
2507ec681f3Smrg{
2517ec681f3Smrg        idx.discard = true;
2527ec681f3Smrg        return idx;
2537ec681f3Smrg}
2547ec681f3Smrg
2557ec681f3Smrg/* Additive identity in IEEE 754 arithmetic */
2567ec681f3Smrgstatic inline bi_index
2577ec681f3Smrgbi_negzero()
2587ec681f3Smrg{
2597ec681f3Smrg        return bi_neg(bi_zero());
2607ec681f3Smrg}
2617ec681f3Smrg
2627ec681f3Smrg/* Replaces an index, preserving any modifiers */
2637ec681f3Smrg
2647ec681f3Smrgstatic inline bi_index
2657ec681f3Smrgbi_replace_index(bi_index old, bi_index replacement)
2667ec681f3Smrg{
2677ec681f3Smrg        replacement.abs = old.abs;
2687ec681f3Smrg        replacement.neg = old.neg;
2697ec681f3Smrg        replacement.swizzle = old.swizzle;
2707ec681f3Smrg        return replacement;
2717ec681f3Smrg}
2727ec681f3Smrg
2737ec681f3Smrg/* Remove any modifiers. This has the property:
2747ec681f3Smrg *
2757ec681f3Smrg *     replace_index(x, strip_index(x)) = x
2767ec681f3Smrg *
2777ec681f3Smrg * This ensures it is suitable to use when lowering sources to moves */
2787ec681f3Smrg
2797ec681f3Smrgstatic inline bi_index
2807ec681f3Smrgbi_strip_index(bi_index index)
2817ec681f3Smrg{
2827ec681f3Smrg        index.abs = index.neg = false;
2837ec681f3Smrg        index.swizzle = BI_SWIZZLE_H01;
2847ec681f3Smrg        return index;
2857ec681f3Smrg}
2867ec681f3Smrg
2877ec681f3Smrg/* For bitwise instructions */
2887ec681f3Smrg#define bi_not(x) bi_neg(x)
2897ec681f3Smrg
2907ec681f3Smrgstatic inline bi_index
2917ec681f3Smrgbi_imm_u8(uint8_t imm)
2927ec681f3Smrg{
2937ec681f3Smrg        return bi_byte(bi_imm_u32(imm), 0);
2947ec681f3Smrg}
2957ec681f3Smrg
2967ec681f3Smrgstatic inline bi_index
2977ec681f3Smrgbi_imm_u16(uint16_t imm)
2987ec681f3Smrg{
2997ec681f3Smrg        return bi_half(bi_imm_u32(imm), false);
3007ec681f3Smrg}
3017ec681f3Smrg
3027ec681f3Smrgstatic inline bi_index
3037ec681f3Smrgbi_imm_uintN(uint32_t imm, unsigned sz)
3047ec681f3Smrg{
3057ec681f3Smrg        assert(sz == 8 || sz == 16 || sz == 32);
3067ec681f3Smrg        return (sz == 8) ? bi_imm_u8(imm) :
3077ec681f3Smrg                (sz == 16) ? bi_imm_u16(imm) :
3087ec681f3Smrg                bi_imm_u32(imm);
3097ec681f3Smrg}
3107ec681f3Smrg
3117ec681f3Smrgstatic inline bi_index
3127ec681f3Smrgbi_imm_f16(float imm)
3137ec681f3Smrg{
3147ec681f3Smrg        return bi_imm_u16(_mesa_float_to_half(imm));
3157ec681f3Smrg}
3167ec681f3Smrg
3177ec681f3Smrgstatic inline bool
3187ec681f3Smrgbi_is_null(bi_index idx)
3197ec681f3Smrg{
3207ec681f3Smrg        return idx.type == BI_INDEX_NULL;
3217ec681f3Smrg}
3227ec681f3Smrg
3237ec681f3Smrgstatic inline bool
3247ec681f3Smrgbi_is_ssa(bi_index idx)
3257ec681f3Smrg{
3267ec681f3Smrg        return idx.type == BI_INDEX_NORMAL && !idx.reg;
3277ec681f3Smrg}
3287ec681f3Smrg
3297ec681f3Smrg/* Compares equivalence as references. Does not compare offsets, swizzles, or
3307ec681f3Smrg * modifiers. In other words, this forms bi_index equivalence classes by
3317ec681f3Smrg * partitioning memory. E.g. -abs(foo[1].yx) == foo.xy but foo != bar */
3327ec681f3Smrg
3337ec681f3Smrgstatic inline bool
3347ec681f3Smrgbi_is_equiv(bi_index left, bi_index right)
3357ec681f3Smrg{
3367ec681f3Smrg        return (left.type == right.type) &&
3377ec681f3Smrg                (left.reg == right.reg) &&
3387ec681f3Smrg                (left.value == right.value);
3397ec681f3Smrg}
3407ec681f3Smrg
3417ec681f3Smrg/* A stronger equivalence relation that requires the indices access the
3427ec681f3Smrg * same offset, useful for RA/scheduling to see what registers will
3437ec681f3Smrg * correspond to */
3447ec681f3Smrg
3457ec681f3Smrgstatic inline bool
3467ec681f3Smrgbi_is_word_equiv(bi_index left, bi_index right)
3477ec681f3Smrg{
3487ec681f3Smrg        return bi_is_equiv(left, right) && left.offset == right.offset;
3497ec681f3Smrg}
3507ec681f3Smrg
3517ec681f3Smrg#define BI_MAX_DESTS 2
3527ec681f3Smrg#define BI_MAX_SRCS 4
3537ec681f3Smrg
3547ec681f3Smrgtypedef struct {
3557ec681f3Smrg        /* Must be first */
3567ec681f3Smrg        struct list_head link;
3577ec681f3Smrg
3587ec681f3Smrg        enum bi_opcode op;
3597ec681f3Smrg
3607ec681f3Smrg        /* Data flow */
3617ec681f3Smrg        bi_index dest[BI_MAX_DESTS];
3627ec681f3Smrg        bi_index src[BI_MAX_SRCS];
3637ec681f3Smrg
3647ec681f3Smrg        /* For a branch */
3657ec681f3Smrg        struct bi_block *branch_target;
3667ec681f3Smrg
3677ec681f3Smrg        /* These don't fit neatly with anything else.. */
3687ec681f3Smrg        enum bi_register_format register_format;
3697ec681f3Smrg        enum bi_vecsize vecsize;
3707ec681f3Smrg
3717ec681f3Smrg        /* Can we spill the value written here? Used to prevent
3727ec681f3Smrg         * useless double fills */
3737ec681f3Smrg        bool no_spill;
3747ec681f3Smrg
3757ec681f3Smrg        /* Override table, inducing a DTSEL_IMM pair if nonzero */
3767ec681f3Smrg        enum bi_table table;
3777ec681f3Smrg
3787ec681f3Smrg        /* Everything after this MUST NOT be accessed directly, since
3797ec681f3Smrg         * interpretation depends on opcodes */
3807ec681f3Smrg
3817ec681f3Smrg        /* Destination modifiers */
3827ec681f3Smrg        union {
3837ec681f3Smrg                enum bi_clamp clamp;
3847ec681f3Smrg                bool saturate;
3857ec681f3Smrg                bool not_result;
3867ec681f3Smrg                unsigned dest_mod;
3877ec681f3Smrg        };
3887ec681f3Smrg
3897ec681f3Smrg        /* Immediates. All seen alone in an instruction, except for varying/texture
3907ec681f3Smrg         * which are specified jointly for VARTEX */
3917ec681f3Smrg        union {
3927ec681f3Smrg                uint32_t shift;
3937ec681f3Smrg                uint32_t fill;
3947ec681f3Smrg                uint32_t index;
3957ec681f3Smrg                uint32_t attribute_index;
3967ec681f3Smrg                int32_t branch_offset;
3977ec681f3Smrg
3987ec681f3Smrg                struct {
3997ec681f3Smrg                        uint32_t varying_index;
4007ec681f3Smrg                        uint32_t sampler_index;
4017ec681f3Smrg                        uint32_t texture_index;
4027ec681f3Smrg                };
4037ec681f3Smrg
4047ec681f3Smrg                /* TEXC, ATOM_CX: # of staging registers used */
4057ec681f3Smrg                uint32_t sr_count;
4067ec681f3Smrg        };
4077ec681f3Smrg
4087ec681f3Smrg        /* Modifiers specific to particular instructions are thrown in a union */
4097ec681f3Smrg        union {
4107ec681f3Smrg                enum bi_adj adj; /* FEXP_TABLE.u4 */
4117ec681f3Smrg                enum bi_atom_opc atom_opc; /* atomics */
4127ec681f3Smrg                enum bi_func func; /* FPOW_SC_DET */
4137ec681f3Smrg                enum bi_function function; /* LD_VAR_FLAT */
4147ec681f3Smrg                enum bi_mux mux; /* MUX */
4157ec681f3Smrg                enum bi_sem sem; /* FMAX, FMIN */
4167ec681f3Smrg                enum bi_source source; /* LD_GCLK */
4177ec681f3Smrg                bool scale; /* VN_ASST2, FSINCOS_OFFSET */
4187ec681f3Smrg                bool offset; /* FSIN_TABLE, FOCS_TABLE */
4197ec681f3Smrg                bool mask; /* CLZ */
4207ec681f3Smrg                bool threads; /* IMULD, IMOV_FMA */
4217ec681f3Smrg                bool combine; /* BRANCHC */
4227ec681f3Smrg                bool format; /* LEA_TEX */
4237ec681f3Smrg
4247ec681f3Smrg                struct {
4257ec681f3Smrg                        enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */
4267ec681f3Smrg                        enum bi_round round; /* FMA, converts, FADD, _RSCALE, etc */
4277ec681f3Smrg                };
4287ec681f3Smrg
4297ec681f3Smrg                struct {
4307ec681f3Smrg                        enum bi_result_type result_type; /* FCMP, ICMP */
4317ec681f3Smrg                        enum bi_cmpf cmpf; /* CSEL, FCMP, ICMP, BRANCH */
4327ec681f3Smrg                };
4337ec681f3Smrg
4347ec681f3Smrg                struct {
4357ec681f3Smrg                        enum bi_stack_mode stack_mode; /* JUMP_EX */
4367ec681f3Smrg                        bool test_mode;
4377ec681f3Smrg                };
4387ec681f3Smrg
4397ec681f3Smrg                struct {
4407ec681f3Smrg                        enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */
4417ec681f3Smrg                        bool preserve_null; /* SEG_ADD, SEG_SUB */
4427ec681f3Smrg                        enum bi_extend extend; /* LOAD, IMUL */
4437ec681f3Smrg                };
4447ec681f3Smrg
4457ec681f3Smrg                struct {
4467ec681f3Smrg                        enum bi_sample sample; /* VAR_TEX, LD_VAR */
4477ec681f3Smrg                        enum bi_update update; /* VAR_TEX, LD_VAR */
4487ec681f3Smrg                        enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */
4497ec681f3Smrg                        bool skip; /* VAR_TEX, TEXS, TEXC */
4507ec681f3Smrg                        bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */
4517ec681f3Smrg                };
4527ec681f3Smrg
4537ec681f3Smrg                /* Maximum size, for hashing */
4547ec681f3Smrg                unsigned flags[5];
4557ec681f3Smrg
4567ec681f3Smrg                struct {
4577ec681f3Smrg                        enum bi_subgroup subgroup; /* WMASK, CLPER */
4587ec681f3Smrg                        enum bi_inactive_result inactive_result; /* CLPER */
4597ec681f3Smrg                        enum bi_lane_op lane_op; /* CLPER */
4607ec681f3Smrg                };
4617ec681f3Smrg
4627ec681f3Smrg                struct {
4637ec681f3Smrg                        bool z; /* ZS_EMIT */
4647ec681f3Smrg                        bool stencil; /* ZS_EMIT */
4657ec681f3Smrg                };
4667ec681f3Smrg
4677ec681f3Smrg                struct {
4687ec681f3Smrg                        bool h; /* VN_ASST1.f16 */
4697ec681f3Smrg                        bool l; /* VN_ASST1.f16 */
4707ec681f3Smrg                };
4717ec681f3Smrg
4727ec681f3Smrg                struct {
4737ec681f3Smrg                        bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */
4747ec681f3Smrg                        bool result_word;
4757ec681f3Smrg                };
4767ec681f3Smrg
4777ec681f3Smrg                struct {
4787ec681f3Smrg                        bool sqrt; /* FREXPM */
4797ec681f3Smrg                        bool log; /* FREXPM */
4807ec681f3Smrg                };
4817ec681f3Smrg
4827ec681f3Smrg                struct {
4837ec681f3Smrg                        enum bi_mode mode; /* FLOG_TABLE */
4847ec681f3Smrg                        enum bi_precision precision; /* FLOG_TABLE */
4857ec681f3Smrg                        bool divzero; /* FRSQ_APPROX, FRSQ */
4867ec681f3Smrg                };
4877ec681f3Smrg        };
4887ec681f3Smrg} bi_instr;
4897ec681f3Smrg
4907ec681f3Smrg/* Represents the assignment of slots for a given bi_tuple */
4917ec681f3Smrg
4927ec681f3Smrgtypedef struct {
4937ec681f3Smrg        /* Register to assign to each slot */
4947ec681f3Smrg        unsigned slot[4];
4957ec681f3Smrg
4967ec681f3Smrg        /* Read slots can be disabled */
4977ec681f3Smrg        bool enabled[2];
4987ec681f3Smrg
4997ec681f3Smrg        /* Configuration for slots 2/3 */
5007ec681f3Smrg        struct bifrost_reg_ctrl_23 slot23;
5017ec681f3Smrg
5027ec681f3Smrg        /* Fast-Access-Uniform RAM index */
5037ec681f3Smrg        uint8_t fau_idx;
5047ec681f3Smrg
5057ec681f3Smrg        /* Whether writes are actually for the last instruction */
5067ec681f3Smrg        bool first_instruction;
5077ec681f3Smrg} bi_registers;
5087ec681f3Smrg
5097ec681f3Smrg/* A bi_tuple contains two paired instruction pointers. If a slot is unfilled,
5107ec681f3Smrg * leave it NULL; the emitter will fill in a nop. Instructions reference
5117ec681f3Smrg * registers via slots which are assigned per tuple.
5127ec681f3Smrg */
5137ec681f3Smrg
5147ec681f3Smrgtypedef struct {
5157ec681f3Smrg        uint8_t fau_idx;
5167ec681f3Smrg        bi_registers regs;
5177ec681f3Smrg        bi_instr *fma;
5187ec681f3Smrg        bi_instr *add;
5197ec681f3Smrg} bi_tuple;
5207ec681f3Smrg
5217ec681f3Smrgstruct bi_block;
5227ec681f3Smrg
5237ec681f3Smrgtypedef struct {
5247ec681f3Smrg        struct list_head link;
5257ec681f3Smrg
5267ec681f3Smrg        /* Link back up for branch calculations */
5277ec681f3Smrg        struct bi_block *block;
5287ec681f3Smrg
5297ec681f3Smrg        /* Architectural limit of 8 tuples/clause */
5307ec681f3Smrg        unsigned tuple_count;
5317ec681f3Smrg        bi_tuple tuples[8];
5327ec681f3Smrg
5337ec681f3Smrg        /* For scoreboarding -- the clause ID (this is not globally unique!)
5347ec681f3Smrg         * and its dependencies in terms of other clauses, computed during
5357ec681f3Smrg         * scheduling and used when emitting code. Dependencies expressed as a
5367ec681f3Smrg         * bitfield matching the hardware, except shifted by a clause (the
5377ec681f3Smrg         * shift back to the ISA's off-by-one encoding is worked out when
5387ec681f3Smrg         * emitting clauses) */
5397ec681f3Smrg        unsigned scoreboard_id;
5407ec681f3Smrg        uint8_t dependencies;
5417ec681f3Smrg
5427ec681f3Smrg        /* See ISA header for description */
5437ec681f3Smrg        enum bifrost_flow flow_control;
5447ec681f3Smrg
5457ec681f3Smrg        /* Can we prefetch the next clause? Usually it makes sense, except for
5467ec681f3Smrg         * clauses ending in unconditional branches */
5477ec681f3Smrg        bool next_clause_prefetch;
5487ec681f3Smrg
5497ec681f3Smrg        /* Assigned data register */
5507ec681f3Smrg        unsigned staging_register;
5517ec681f3Smrg
5527ec681f3Smrg        /* Corresponds to the usual bit but shifted by a clause */
5537ec681f3Smrg        bool staging_barrier;
5547ec681f3Smrg
5557ec681f3Smrg        /* Constants read by this clause. ISA limit. Must satisfy:
5567ec681f3Smrg         *
5577ec681f3Smrg         *      constant_count + tuple_count <= 13
5587ec681f3Smrg         *
5597ec681f3Smrg         * Also implicitly constant_count <= tuple_count since a tuple only
5607ec681f3Smrg         * reads a single constant.
5617ec681f3Smrg         */
5627ec681f3Smrg        uint64_t constants[8];
5637ec681f3Smrg        unsigned constant_count;
5647ec681f3Smrg
5657ec681f3Smrg        /* Index of a constant to be PC-relative */
5667ec681f3Smrg        unsigned pcrel_idx;
5677ec681f3Smrg
5687ec681f3Smrg        /* Branches encode a constant offset relative to the program counter
5697ec681f3Smrg         * with some magic flags. By convention, if there is a branch, its
5707ec681f3Smrg         * constant will be last. Set this flag to indicate this is required.
5717ec681f3Smrg         */
5727ec681f3Smrg        bool branch_constant;
5737ec681f3Smrg
5747ec681f3Smrg        /* Unique in a clause */
5757ec681f3Smrg        enum bifrost_message_type message_type;
5767ec681f3Smrg        bi_instr *message;
5777ec681f3Smrg
5787ec681f3Smrg        /* Discard helper threads */
5797ec681f3Smrg        bool td;
5807ec681f3Smrg} bi_clause;
5817ec681f3Smrg
5827ec681f3Smrgtypedef struct bi_block {
5837ec681f3Smrg        /* Link to next block. Must be first for mir_get_block */
5847ec681f3Smrg        struct list_head link;
5857ec681f3Smrg
5867ec681f3Smrg        /* List of instructions emitted for the current block */
5877ec681f3Smrg        struct list_head instructions;
5887ec681f3Smrg
5897ec681f3Smrg        /* Index of the block in source order */
5907ec681f3Smrg        unsigned name;
5917ec681f3Smrg
5927ec681f3Smrg        /* Control flow graph */
5937ec681f3Smrg        struct bi_block *successors[2];
5947ec681f3Smrg        struct set *predecessors;
5957ec681f3Smrg        bool unconditional_jumps;
5967ec681f3Smrg
5977ec681f3Smrg        /* Per 32-bit word live masks for the block indexed by node */
5987ec681f3Smrg        uint8_t *live_in;
5997ec681f3Smrg        uint8_t *live_out;
6007ec681f3Smrg
6017ec681f3Smrg        /* If true, uses clauses; if false, uses instructions */
6027ec681f3Smrg        bool scheduled;
6037ec681f3Smrg        struct list_head clauses; /* list of bi_clause */
6047ec681f3Smrg
6057ec681f3Smrg        /* Post-RA liveness */
6067ec681f3Smrg        uint64_t reg_live_in, reg_live_out;
6077ec681f3Smrg
6087ec681f3Smrg        /* Flags available for pass-internal use */
6097ec681f3Smrg        uint8_t pass_flags;
6107ec681f3Smrg} bi_block;
6117ec681f3Smrg
6127ec681f3Smrgtypedef struct {
6137ec681f3Smrg       const struct panfrost_compile_inputs *inputs;
6147ec681f3Smrg       nir_shader *nir;
6157ec681f3Smrg       struct pan_shader_info *info;
6167ec681f3Smrg       gl_shader_stage stage;
6177ec681f3Smrg       struct list_head blocks; /* list of bi_block */
6187ec681f3Smrg       struct hash_table_u64 *sysval_to_id;
6197ec681f3Smrg       uint32_t quirks;
6207ec681f3Smrg       unsigned arch;
6217ec681f3Smrg
6227ec681f3Smrg       /* During NIR->BIR */
6237ec681f3Smrg       bi_block *current_block;
6247ec681f3Smrg       bi_block *after_block;
6257ec681f3Smrg       bi_block *break_block;
6267ec681f3Smrg       bi_block *continue_block;
6277ec681f3Smrg       bool emitted_atest;
6287ec681f3Smrg
6297ec681f3Smrg       /* For creating temporaries */
6307ec681f3Smrg       unsigned ssa_alloc;
6317ec681f3Smrg       unsigned reg_alloc;
6327ec681f3Smrg
6337ec681f3Smrg       /* Analysis results */
6347ec681f3Smrg       bool has_liveness;
6357ec681f3Smrg
6367ec681f3Smrg       /* Mask of UBOs that need to be uploaded */
6377ec681f3Smrg       uint32_t ubo_mask;
6387ec681f3Smrg
6397ec681f3Smrg       /* Stats for shader-db */
6407ec681f3Smrg       unsigned instruction_count;
6417ec681f3Smrg       unsigned loop_count;
6427ec681f3Smrg       unsigned spills;
6437ec681f3Smrg       unsigned fills;
6447ec681f3Smrg} bi_context;
6457ec681f3Smrg
6467ec681f3Smrgstatic inline void
6477ec681f3Smrgbi_remove_instruction(bi_instr *ins)
6487ec681f3Smrg{
6497ec681f3Smrg        list_del(&ins->link);
6507ec681f3Smrg}
6517ec681f3Smrg
6527ec681f3Smrgenum bir_fau {
6537ec681f3Smrg        BIR_FAU_ZERO = 0,
6547ec681f3Smrg        BIR_FAU_LANE_ID = 1,
6557ec681f3Smrg        BIR_FAU_WARP_ID = 2,
6567ec681f3Smrg        BIR_FAU_CORE_ID = 3,
6577ec681f3Smrg        BIR_FAU_FB_EXTENT = 4,
6587ec681f3Smrg        BIR_FAU_ATEST_PARAM = 5,
6597ec681f3Smrg        BIR_FAU_SAMPLE_POS_ARRAY = 6,
6607ec681f3Smrg        BIR_FAU_BLEND_0 = 8,
6617ec681f3Smrg        /* blend descs 1 - 7 */
6627ec681f3Smrg        BIR_FAU_TYPE_MASK = 15,
6637ec681f3Smrg
6647ec681f3Smrg        /* Valhall only */
6657ec681f3Smrg        BIR_FAU_TLS_PTR = 16,
6667ec681f3Smrg        BIR_FAU_WLS_PTR = 17,
6677ec681f3Smrg        BIR_FAU_PROGRAM_COUNTER = 18,
6687ec681f3Smrg
6697ec681f3Smrg        BIR_FAU_UNIFORM = (1 << 7),
6707ec681f3Smrg        /* Look up table on Valhall */
6717ec681f3Smrg        BIR_FAU_IMMEDIATE = (1 << 8),
6727ec681f3Smrg
6737ec681f3Smrg};
6747ec681f3Smrg
6757ec681f3Smrgstatic inline bi_index
6767ec681f3Smrgbi_fau(enum bir_fau value, bool hi)
6777ec681f3Smrg{
6787ec681f3Smrg        return (bi_index) {
6797ec681f3Smrg                .type = BI_INDEX_FAU,
6807ec681f3Smrg                .value = value,
6817ec681f3Smrg                .swizzle = BI_SWIZZLE_H01,
6827ec681f3Smrg                .offset = hi ? 1 : 0
6837ec681f3Smrg        };
6847ec681f3Smrg}
6857ec681f3Smrg
6867ec681f3Smrgstatic inline unsigned
6877ec681f3Smrgbi_max_temp(bi_context *ctx)
6887ec681f3Smrg{
6897ec681f3Smrg        return (MAX2(ctx->reg_alloc, ctx->ssa_alloc) + 2) << 1;
6907ec681f3Smrg}
6917ec681f3Smrg
6927ec681f3Smrgstatic inline bi_index
6937ec681f3Smrgbi_temp(bi_context *ctx)
6947ec681f3Smrg{
6957ec681f3Smrg        return bi_get_index(ctx->ssa_alloc++, false, 0);
6967ec681f3Smrg}
6977ec681f3Smrg
6987ec681f3Smrgstatic inline bi_index
6997ec681f3Smrgbi_temp_reg(bi_context *ctx)
7007ec681f3Smrg{
7017ec681f3Smrg        return bi_get_index(ctx->reg_alloc++, true, 0);
7027ec681f3Smrg}
7037ec681f3Smrg
7047ec681f3Smrg/* NIR booleans are 1-bit (0/1). For now, backend IR booleans are N-bit
7057ec681f3Smrg * (0/~0) where N depends on the context. This requires us to sign-extend
7067ec681f3Smrg * when converting constants from NIR to the backend IR.
7077ec681f3Smrg */
7087ec681f3Smrgstatic inline uint32_t
7097ec681f3Smrgbi_extend_constant(uint32_t constant, unsigned bit_size)
7107ec681f3Smrg{
7117ec681f3Smrg        if (bit_size == 1 && constant != 0)
7127ec681f3Smrg                return ~0;
7137ec681f3Smrg        else
7147ec681f3Smrg                return constant;
7157ec681f3Smrg}
7167ec681f3Smrg
7177ec681f3Smrg/* Inline constants automatically, will be lowered out by bi_lower_fau where a
7187ec681f3Smrg * constant is not allowed. load_const_to_scalar gaurantees that this makes
7197ec681f3Smrg * sense */
7207ec681f3Smrg
7217ec681f3Smrgstatic inline bi_index
7227ec681f3Smrgbi_src_index(nir_src *src)
7237ec681f3Smrg{
7247ec681f3Smrg        if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32) {
7257ec681f3Smrg                uint32_t v = nir_src_as_uint(*src);
7267ec681f3Smrg
7277ec681f3Smrg                return bi_imm_u32(bi_extend_constant(v, nir_src_bit_size(*src)));
7287ec681f3Smrg        } else if (src->is_ssa) {
7297ec681f3Smrg                return bi_get_index(src->ssa->index, false, 0);
7307ec681f3Smrg        } else {
7317ec681f3Smrg                assert(!src->reg.indirect);
7327ec681f3Smrg                return bi_get_index(src->reg.reg->index, true, 0);
7337ec681f3Smrg        }
7347ec681f3Smrg}
7357ec681f3Smrg
7367ec681f3Smrgstatic inline bi_index
7377ec681f3Smrgbi_dest_index(nir_dest *dst)
7387ec681f3Smrg{
7397ec681f3Smrg        if (dst->is_ssa)
7407ec681f3Smrg                return bi_get_index(dst->ssa.index, false, 0);
7417ec681f3Smrg        else {
7427ec681f3Smrg                assert(!dst->reg.indirect);
7437ec681f3Smrg                return bi_get_index(dst->reg.reg->index, true, 0);
7447ec681f3Smrg        }
7457ec681f3Smrg}
7467ec681f3Smrg
7477ec681f3Smrgstatic inline unsigned
7487ec681f3Smrgbi_get_node(bi_index index)
7497ec681f3Smrg{
7507ec681f3Smrg        if (bi_is_null(index) || index.type != BI_INDEX_NORMAL)
7517ec681f3Smrg                return ~0;
7527ec681f3Smrg        else
7537ec681f3Smrg                return (index.value << 1) | index.reg;
7547ec681f3Smrg}
7557ec681f3Smrg
7567ec681f3Smrgstatic inline bi_index
7577ec681f3Smrgbi_node_to_index(unsigned node, unsigned node_count)
7587ec681f3Smrg{
7597ec681f3Smrg        assert(node < node_count);
7607ec681f3Smrg        assert(node_count < ~0);
7617ec681f3Smrg
7627ec681f3Smrg        return bi_get_index(node >> 1, node & PAN_IS_REG, 0);
7637ec681f3Smrg}
7647ec681f3Smrg
7657ec681f3Smrg/* Iterators for Bifrost IR */
7667ec681f3Smrg
7677ec681f3Smrg#define bi_foreach_block(ctx, v) \
7687ec681f3Smrg        list_for_each_entry(bi_block, v, &ctx->blocks, link)
7697ec681f3Smrg
7707ec681f3Smrg#define bi_foreach_block_rev(ctx, v) \
7717ec681f3Smrg        list_for_each_entry_rev(bi_block, v, &ctx->blocks, link)
7727ec681f3Smrg
7737ec681f3Smrg#define bi_foreach_block_from(ctx, from, v) \
7747ec681f3Smrg        list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link)
7757ec681f3Smrg
7767ec681f3Smrg#define bi_foreach_block_from_rev(ctx, from, v) \
7777ec681f3Smrg        list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link)
7787ec681f3Smrg
7797ec681f3Smrg#define bi_foreach_instr_in_block(block, v) \
7807ec681f3Smrg        list_for_each_entry(bi_instr, v, &(block)->instructions, link)
7817ec681f3Smrg
7827ec681f3Smrg#define bi_foreach_instr_in_block_rev(block, v) \
7837ec681f3Smrg        list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link)
7847ec681f3Smrg
7857ec681f3Smrg#define bi_foreach_instr_in_block_safe(block, v) \
7867ec681f3Smrg        list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link)
7877ec681f3Smrg
7887ec681f3Smrg#define bi_foreach_instr_in_block_safe_rev(block, v) \
7897ec681f3Smrg        list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link)
7907ec681f3Smrg
7917ec681f3Smrg#define bi_foreach_instr_in_block_from(block, v, from) \
7927ec681f3Smrg        list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link)
7937ec681f3Smrg
7947ec681f3Smrg#define bi_foreach_instr_in_block_from_rev(block, v, from) \
7957ec681f3Smrg        list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link)
7967ec681f3Smrg
7977ec681f3Smrg#define bi_foreach_clause_in_block(block, v) \
7987ec681f3Smrg        list_for_each_entry(bi_clause, v, &(block)->clauses, link)
7997ec681f3Smrg
8007ec681f3Smrg#define bi_foreach_clause_in_block_rev(block, v) \
8017ec681f3Smrg        list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link)
8027ec681f3Smrg
8037ec681f3Smrg#define bi_foreach_clause_in_block_safe(block, v) \
8047ec681f3Smrg        list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link)
8057ec681f3Smrg
8067ec681f3Smrg#define bi_foreach_clause_in_block_from(block, v, from) \
8077ec681f3Smrg        list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link)
8087ec681f3Smrg
8097ec681f3Smrg#define bi_foreach_clause_in_block_from_rev(block, v, from) \
8107ec681f3Smrg        list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link)
8117ec681f3Smrg
8127ec681f3Smrg#define bi_foreach_instr_global(ctx, v) \
8137ec681f3Smrg        bi_foreach_block(ctx, v_block) \
8147ec681f3Smrg                bi_foreach_instr_in_block(v_block, v)
8157ec681f3Smrg
8167ec681f3Smrg#define bi_foreach_instr_global_rev(ctx, v) \
8177ec681f3Smrg        bi_foreach_block_rev(ctx, v_block) \
8187ec681f3Smrg                bi_foreach_instr_in_block_rev(v_block, v)
8197ec681f3Smrg
8207ec681f3Smrg#define bi_foreach_instr_global_safe(ctx, v) \
8217ec681f3Smrg        bi_foreach_block(ctx, v_block) \
8227ec681f3Smrg                bi_foreach_instr_in_block_safe(v_block, v)
8237ec681f3Smrg
8247ec681f3Smrg#define bi_foreach_instr_global_rev_safe(ctx, v) \
8257ec681f3Smrg        bi_foreach_block_rev(ctx, v_block) \
8267ec681f3Smrg                bi_foreach_instr_in_block_rev_safe(v_block, v)
8277ec681f3Smrg
8287ec681f3Smrg#define bi_foreach_instr_in_tuple(tuple, v) \
8297ec681f3Smrg        for (bi_instr *v = (tuple)->fma ?: (tuple)->add; \
8307ec681f3Smrg                        v != NULL; \
8317ec681f3Smrg                        v = (v == (tuple)->add) ? NULL : (tuple)->add)
8327ec681f3Smrg
8337ec681f3Smrg#define bi_foreach_successor(blk, v) \
8347ec681f3Smrg        bi_block *v; \
8357ec681f3Smrg        bi_block **_v; \
8367ec681f3Smrg        for (_v = &blk->successors[0], \
8377ec681f3Smrg                v = *_v; \
8387ec681f3Smrg                v != NULL && _v < &blk->successors[2]; \
8397ec681f3Smrg                _v++, v = *_v) \
8407ec681f3Smrg
8417ec681f3Smrg/* Based on set_foreach, expanded with automatic type casts */
8427ec681f3Smrg
8437ec681f3Smrg#define bi_foreach_predecessor(blk, v) \
8447ec681f3Smrg        struct set_entry *_entry_##v; \
8457ec681f3Smrg        bi_block *v; \
8467ec681f3Smrg        for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
8477ec681f3Smrg                v = (bi_block *) (_entry_##v ? _entry_##v->key : NULL);  \
8487ec681f3Smrg                _entry_##v != NULL; \
8497ec681f3Smrg                _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
8507ec681f3Smrg                v = (bi_block *) (_entry_##v ? _entry_##v->key : NULL))
8517ec681f3Smrg
8527ec681f3Smrg#define bi_foreach_src(ins, v) \
8537ec681f3Smrg        for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)
8547ec681f3Smrg
8557ec681f3Smrg#define bi_foreach_dest(ins, v) \
8567ec681f3Smrg        for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v)
8577ec681f3Smrg
8587ec681f3Smrg#define bi_foreach_instr_and_src_in_tuple(tuple, ins, s) \
8597ec681f3Smrg        bi_foreach_instr_in_tuple(tuple, ins) \
8607ec681f3Smrg                bi_foreach_src(ins, s)
8617ec681f3Smrg
8627ec681f3Smrgstatic inline bi_instr *
8637ec681f3Smrgbi_prev_op(bi_instr *ins)
8647ec681f3Smrg{
8657ec681f3Smrg        return list_last_entry(&(ins->link), bi_instr, link);
8667ec681f3Smrg}
8677ec681f3Smrg
8687ec681f3Smrgstatic inline bi_instr *
8697ec681f3Smrgbi_next_op(bi_instr *ins)
8707ec681f3Smrg{
8717ec681f3Smrg        return list_first_entry(&(ins->link), bi_instr, link);
8727ec681f3Smrg}
8737ec681f3Smrg
8747ec681f3Smrgstatic inline bi_block *
8757ec681f3Smrgbi_next_block(bi_block *block)
8767ec681f3Smrg{
8777ec681f3Smrg        return list_first_entry(&(block->link), bi_block, link);
8787ec681f3Smrg}
8797ec681f3Smrg
8807ec681f3Smrgstatic inline bi_block *
8817ec681f3Smrgbi_entry_block(bi_context *ctx)
8827ec681f3Smrg{
8837ec681f3Smrg        return list_first_entry(&ctx->blocks, bi_block, link);
8847ec681f3Smrg}
8857ec681f3Smrg
8867ec681f3Smrg/* BIR manipulation */
8877ec681f3Smrg
8887ec681f3Smrgbool bi_has_arg(const bi_instr *ins, bi_index arg);
8897ec681f3Smrgunsigned bi_count_read_registers(const bi_instr *ins, unsigned src);
8907ec681f3Smrgunsigned bi_count_write_registers(const bi_instr *ins, unsigned dest);
8917ec681f3Smrgbool bi_is_regfmt_16(enum bi_register_format fmt);
8927ec681f3Smrgunsigned bi_writemask(const bi_instr *ins, unsigned dest);
8937ec681f3Smrgbi_clause * bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause);
8947ec681f3Smrgbool bi_side_effects(enum bi_opcode op);
8957ec681f3Smrgbool bi_reconverge_branches(bi_block *block);
8967ec681f3Smrg
8977ec681f3Smrgvoid bi_print_instr(const bi_instr *I, FILE *fp);
8987ec681f3Smrgvoid bi_print_slots(bi_registers *regs, FILE *fp);
8997ec681f3Smrgvoid bi_print_tuple(bi_tuple *tuple, FILE *fp);
9007ec681f3Smrgvoid bi_print_clause(bi_clause *clause, FILE *fp);
9017ec681f3Smrgvoid bi_print_block(bi_block *block, FILE *fp);
9027ec681f3Smrgvoid bi_print_shader(bi_context *ctx, FILE *fp);
9037ec681f3Smrg
9047ec681f3Smrg/* BIR passes */
9057ec681f3Smrg
9067ec681f3Smrgvoid bi_analyze_helper_terminate(bi_context *ctx);
9077ec681f3Smrgvoid bi_analyze_helper_requirements(bi_context *ctx);
9087ec681f3Smrgvoid bi_opt_copy_prop(bi_context *ctx);
9097ec681f3Smrgvoid bi_opt_cse(bi_context *ctx);
9107ec681f3Smrgvoid bi_opt_mod_prop_forward(bi_context *ctx);
9117ec681f3Smrgvoid bi_opt_mod_prop_backward(bi_context *ctx);
9127ec681f3Smrgvoid bi_opt_dead_code_eliminate(bi_context *ctx);
9137ec681f3Smrgvoid bi_opt_dce_post_ra(bi_context *ctx);
9147ec681f3Smrgvoid bi_opt_push_ubo(bi_context *ctx);
9157ec681f3Smrgvoid bi_lower_swizzle(bi_context *ctx);
9167ec681f3Smrgvoid bi_lower_fau(bi_context *ctx);
9177ec681f3Smrgvoid bi_assign_scoreboard(bi_context *ctx);
9187ec681f3Smrgvoid bi_register_allocate(bi_context *ctx);
9197ec681f3Smrg
9207ec681f3Smrgvoid bi_lower_opt_instruction(bi_instr *I);
9217ec681f3Smrg
9227ec681f3Smrgvoid bi_schedule(bi_context *ctx);
9237ec681f3Smrgbool bi_can_fma(bi_instr *ins);
9247ec681f3Smrgbool bi_can_add(bi_instr *ins);
9257ec681f3Smrgbool bi_must_message(bi_instr *ins);
9267ec681f3Smrgbool bi_reads_zero(bi_instr *ins);
9277ec681f3Smrgbool bi_reads_temps(bi_instr *ins, unsigned src);
9287ec681f3Smrgbool bi_reads_t(bi_instr *ins, unsigned src);
9297ec681f3Smrg
9307ec681f3Smrg#ifndef NDEBUG
9317ec681f3Smrgbool bi_validate_initialization(bi_context *ctx);
9327ec681f3Smrgvoid bi_validate(bi_context *ctx, const char *after_str);
9337ec681f3Smrg#else
9347ec681f3Smrgstatic inline bool bi_validate_initialization(UNUSED bi_context *ctx) { return true; }
9357ec681f3Smrgstatic inline void bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str) { return; }
9367ec681f3Smrg#endif
9377ec681f3Smrg
9387ec681f3Smrguint32_t bi_fold_constant(bi_instr *I, bool *unsupported);
9397ec681f3Smrgvoid bi_opt_constant_fold(bi_context *ctx);
9407ec681f3Smrg
9417ec681f3Smrg/* Liveness */
9427ec681f3Smrg
9437ec681f3Smrgvoid bi_compute_liveness(bi_context *ctx);
9447ec681f3Smrgvoid bi_liveness_ins_update(uint8_t *live, bi_instr *ins, unsigned max);
9457ec681f3Smrgvoid bi_invalidate_liveness(bi_context *ctx);
9467ec681f3Smrg
9477ec681f3Smrgvoid bi_postra_liveness(bi_context *ctx);
9487ec681f3Smrguint64_t bi_postra_liveness_ins(uint64_t live, bi_instr *ins);
9497ec681f3Smrg
9507ec681f3Smrg/* Layout */
9517ec681f3Smrg
9527ec681f3Smrgsigned bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target);
9537ec681f3Smrgbool bi_ec0_packed(unsigned tuple_count);
9547ec681f3Smrg
9557ec681f3Smrg/* Check if there are no more instructions starting with a given block, this
9567ec681f3Smrg * needs to recurse in case a shader ends with multiple empty blocks */
9577ec681f3Smrg
9587ec681f3Smrgstatic inline bool
9597ec681f3Smrgbi_is_terminal_block(bi_block *block)
9607ec681f3Smrg{
9617ec681f3Smrg        return (block == NULL) ||
9627ec681f3Smrg                (list_is_empty(&block->instructions) &&
9637ec681f3Smrg                 bi_is_terminal_block(block->successors[0]) &&
9647ec681f3Smrg                 bi_is_terminal_block(block->successors[1]));
9657ec681f3Smrg}
9667ec681f3Smrg
9677ec681f3Smrg/* Code emit */
9687ec681f3Smrg
9697ec681f3Smrg/* Returns the size of the final clause */
9707ec681f3Smrgunsigned bi_pack(bi_context *ctx, struct util_dynarray *emission);
9717ec681f3Smrg
9727ec681f3Smrgstruct bi_packed_tuple {
9737ec681f3Smrg        uint64_t lo;
9747ec681f3Smrg        uint64_t hi;
9757ec681f3Smrg};
9767ec681f3Smrg
9777ec681f3Smrguint8_t bi_pack_literal(enum bi_clause_subword literal);
9787ec681f3Smrg
9797ec681f3Smrguint8_t
9807ec681f3Smrgbi_pack_upper(enum bi_clause_subword upper,
9817ec681f3Smrg                struct bi_packed_tuple *tuples,
9827ec681f3Smrg                ASSERTED unsigned tuple_count);
9837ec681f3Smrguint64_t
9847ec681f3Smrgbi_pack_tuple_bits(enum bi_clause_subword idx,
9857ec681f3Smrg                struct bi_packed_tuple *tuples,
9867ec681f3Smrg                ASSERTED unsigned tuple_count,
9877ec681f3Smrg                unsigned offset, unsigned nbits);
9887ec681f3Smrg
9897ec681f3Smrguint8_t
9907ec681f3Smrgbi_pack_sync(enum bi_clause_subword t1,
9917ec681f3Smrg             enum bi_clause_subword t2,
9927ec681f3Smrg             enum bi_clause_subword t3,
9937ec681f3Smrg             struct bi_packed_tuple *tuples,
9947ec681f3Smrg             ASSERTED unsigned tuple_count,
9957ec681f3Smrg             bool z);
9967ec681f3Smrg
9977ec681f3Smrgvoid
9987ec681f3Smrgbi_pack_format(struct util_dynarray *emission,
9997ec681f3Smrg                unsigned index,
10007ec681f3Smrg                struct bi_packed_tuple *tuples,
10017ec681f3Smrg                ASSERTED unsigned tuple_count,
10027ec681f3Smrg                uint64_t header, uint64_t ec0,
10037ec681f3Smrg                unsigned m0, bool z);
10047ec681f3Smrg
10057ec681f3Smrgunsigned bi_pack_fma(bi_instr *I,
10067ec681f3Smrg                enum bifrost_packed_src src0,
10077ec681f3Smrg                enum bifrost_packed_src src1,
10087ec681f3Smrg                enum bifrost_packed_src src2,
10097ec681f3Smrg                enum bifrost_packed_src src3);
10107ec681f3Smrgunsigned bi_pack_add(bi_instr *I,
10117ec681f3Smrg                enum bifrost_packed_src src0,
10127ec681f3Smrg                enum bifrost_packed_src src1,
10137ec681f3Smrg                enum bifrost_packed_src src2,
10147ec681f3Smrg                enum bifrost_packed_src src3);
10157ec681f3Smrg
10167ec681f3Smrg/* Like in NIR, for use with the builder */
10177ec681f3Smrg
10187ec681f3Smrgenum bi_cursor_option {
10197ec681f3Smrg    bi_cursor_after_block,
10207ec681f3Smrg    bi_cursor_before_instr,
10217ec681f3Smrg    bi_cursor_after_instr
10227ec681f3Smrg};
10237ec681f3Smrg
10247ec681f3Smrgtypedef struct {
10257ec681f3Smrg    enum bi_cursor_option option;
10267ec681f3Smrg
10277ec681f3Smrg    union {
10287ec681f3Smrg        bi_block *block;
10297ec681f3Smrg        bi_instr *instr;
10307ec681f3Smrg    };
10317ec681f3Smrg} bi_cursor;
10327ec681f3Smrg
10337ec681f3Smrgstatic inline bi_cursor
10347ec681f3Smrgbi_after_block(bi_block *block)
10357ec681f3Smrg{
10367ec681f3Smrg    return (bi_cursor) {
10377ec681f3Smrg        .option = bi_cursor_after_block,
10387ec681f3Smrg        .block = block
10397ec681f3Smrg    };
10407ec681f3Smrg}
10417ec681f3Smrg
10427ec681f3Smrgstatic inline bi_cursor
10437ec681f3Smrgbi_before_instr(bi_instr *instr)
10447ec681f3Smrg{
10457ec681f3Smrg    return (bi_cursor) {
10467ec681f3Smrg        .option = bi_cursor_before_instr,
10477ec681f3Smrg        .instr = instr
10487ec681f3Smrg    };
10497ec681f3Smrg}
10507ec681f3Smrg
10517ec681f3Smrgstatic inline bi_cursor
10527ec681f3Smrgbi_after_instr(bi_instr *instr)
10537ec681f3Smrg{
10547ec681f3Smrg    return (bi_cursor) {
10557ec681f3Smrg        .option = bi_cursor_after_instr,
10567ec681f3Smrg        .instr = instr
10577ec681f3Smrg    };
10587ec681f3Smrg}
10597ec681f3Smrg
10607ec681f3Smrg/* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause,
10617ec681f3Smrg * in which case there must exist a nonempty penultimate tuple */
10627ec681f3Smrg
10637ec681f3SmrgATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
10647ec681f3Smrgbi_first_instr_in_tuple(bi_tuple *tuple)
10657ec681f3Smrg{
10667ec681f3Smrg        bi_instr *instr = tuple->fma ?: tuple->add;
10677ec681f3Smrg        assert(instr != NULL);
10687ec681f3Smrg        return instr;
10697ec681f3Smrg}
10707ec681f3Smrg
10717ec681f3SmrgATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
10727ec681f3Smrgbi_first_instr_in_clause(bi_clause *clause)
10737ec681f3Smrg{
10747ec681f3Smrg        return bi_first_instr_in_tuple(&clause->tuples[0]);
10757ec681f3Smrg}
10767ec681f3Smrg
10777ec681f3SmrgATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
10787ec681f3Smrgbi_last_instr_in_clause(bi_clause *clause)
10797ec681f3Smrg{
10807ec681f3Smrg        bi_tuple tuple = clause->tuples[clause->tuple_count - 1];
10817ec681f3Smrg        bi_instr *instr = tuple.add ?: tuple.fma;
10827ec681f3Smrg
10837ec681f3Smrg        if (!instr) {
10847ec681f3Smrg                assert(clause->tuple_count >= 2);
10857ec681f3Smrg                tuple = clause->tuples[clause->tuple_count - 2];
10867ec681f3Smrg                instr = tuple.add ?: tuple.fma;
10877ec681f3Smrg        }
10887ec681f3Smrg
10897ec681f3Smrg        assert(instr != NULL);
10907ec681f3Smrg        return instr;
10917ec681f3Smrg}
10927ec681f3Smrg
10937ec681f3Smrg/* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start
10947ec681f3Smrg * (end) of the clause and adding a condition for the clause boundary */
10957ec681f3Smrg
10967ec681f3Smrg#define bi_foreach_instr_in_clause(block, clause, pos) \
10977ec681f3Smrg   for (bi_instr *pos = LIST_ENTRY(bi_instr, bi_first_instr_in_clause(clause), link); \
10987ec681f3Smrg	(&pos->link != &(block)->instructions) \
10997ec681f3Smrg                && (pos != bi_next_op(bi_last_instr_in_clause(clause))); \
11007ec681f3Smrg	pos = LIST_ENTRY(bi_instr, pos->link.next, link))
11017ec681f3Smrg
11027ec681f3Smrg#define bi_foreach_instr_in_clause_rev(block, clause, pos) \
11037ec681f3Smrg   for (bi_instr *pos = LIST_ENTRY(bi_instr, bi_last_instr_in_clause(clause), link); \
11047ec681f3Smrg	(&pos->link != &(block)->instructions) \
11057ec681f3Smrg	        && pos != bi_prev_op(bi_first_instr_in_clause(clause)); \
11067ec681f3Smrg	pos = LIST_ENTRY(bi_instr, pos->link.prev, link))
11077ec681f3Smrg
11087ec681f3Smrgstatic inline bi_cursor
11097ec681f3Smrgbi_before_clause(bi_clause *clause)
11107ec681f3Smrg{
11117ec681f3Smrg    return bi_before_instr(bi_first_instr_in_clause(clause));
11127ec681f3Smrg}
11137ec681f3Smrg
11147ec681f3Smrgstatic inline bi_cursor
11157ec681f3Smrgbi_before_tuple(bi_tuple *tuple)
11167ec681f3Smrg{
11177ec681f3Smrg    return bi_before_instr(bi_first_instr_in_tuple(tuple));
11187ec681f3Smrg}
11197ec681f3Smrg
11207ec681f3Smrgstatic inline bi_cursor
11217ec681f3Smrgbi_after_clause(bi_clause *clause)
11227ec681f3Smrg{
11237ec681f3Smrg    return bi_after_instr(bi_last_instr_in_clause(clause));
11247ec681f3Smrg}
11257ec681f3Smrg
11267ec681f3Smrg/* IR builder in terms of cursor infrastructure */
11277ec681f3Smrg
11287ec681f3Smrgtypedef struct {
11297ec681f3Smrg    bi_context *shader;
11307ec681f3Smrg    bi_cursor cursor;
11317ec681f3Smrg} bi_builder;
11327ec681f3Smrg
11337ec681f3Smrgstatic inline bi_builder
11347ec681f3Smrgbi_init_builder(bi_context *ctx, bi_cursor cursor)
11357ec681f3Smrg{
11367ec681f3Smrg        return (bi_builder) {
11377ec681f3Smrg                .shader = ctx,
11387ec681f3Smrg                .cursor = cursor
11397ec681f3Smrg        };
11407ec681f3Smrg}
11417ec681f3Smrg
11427ec681f3Smrg/* Insert an instruction at the cursor and move the cursor */
11437ec681f3Smrg
11447ec681f3Smrgstatic inline void
11457ec681f3Smrgbi_builder_insert(bi_cursor *cursor, bi_instr *I)
11467ec681f3Smrg{
11477ec681f3Smrg    switch (cursor->option) {
11487ec681f3Smrg    case bi_cursor_after_instr:
11497ec681f3Smrg        list_add(&I->link, &cursor->instr->link);
11507ec681f3Smrg        cursor->instr = I;
11517ec681f3Smrg        return;
11527ec681f3Smrg
11537ec681f3Smrg    case bi_cursor_after_block:
11547ec681f3Smrg        list_addtail(&I->link, &cursor->block->instructions);
11557ec681f3Smrg        cursor->option = bi_cursor_after_instr;
11567ec681f3Smrg        cursor->instr = I;
11577ec681f3Smrg        return;
11587ec681f3Smrg
11597ec681f3Smrg    case bi_cursor_before_instr:
11607ec681f3Smrg        list_addtail(&I->link, &cursor->instr->link);
11617ec681f3Smrg        cursor->option = bi_cursor_after_instr;
11627ec681f3Smrg        cursor->instr = I;
11637ec681f3Smrg        return;
11647ec681f3Smrg    }
11657ec681f3Smrg
11667ec681f3Smrg    unreachable("Invalid cursor option");
11677ec681f3Smrg}
11687ec681f3Smrg
11697ec681f3Smrgstatic inline unsigned
11707ec681f3Smrgbi_word_node(bi_index idx)
11717ec681f3Smrg{
11727ec681f3Smrg        assert(idx.type == BI_INDEX_NORMAL && !idx.reg);
11737ec681f3Smrg        return (idx.value << 2) | idx.offset;
11747ec681f3Smrg}
11757ec681f3Smrg
11767ec681f3Smrg/* NIR passes */
11777ec681f3Smrg
11787ec681f3Smrgbool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes);
11797ec681f3Smrg
11807ec681f3Smrg#endif
1181