17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2020 Collabora, Ltd.
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#ifndef __PAN_IR_H
257ec681f3Smrg#define __PAN_IR_H
267ec681f3Smrg
277ec681f3Smrg#include <stdint.h>
287ec681f3Smrg#include "compiler/nir/nir.h"
297ec681f3Smrg#include "util/u_dynarray.h"
307ec681f3Smrg#include "util/hash_table.h"
317ec681f3Smrg
327ec681f3Smrg/* Indices for named (non-XFB) varyings that are present. These are packed
337ec681f3Smrg * tightly so they correspond to a bitfield present (P) indexed by (1 <<
347ec681f3Smrg * PAN_VARY_*). This has the nice property that you can lookup the buffer index
357ec681f3Smrg * of a given special field given a shift S by:
367ec681f3Smrg *
377ec681f3Smrg *      idx = popcount(P & ((1 << S) - 1))
387ec681f3Smrg *
397ec681f3Smrg * That is... look at all of the varyings that come earlier and count them, the
407ec681f3Smrg * count is the new index since plus one. Likewise, the total number of special
417ec681f3Smrg * buffers required is simply popcount(P)
427ec681f3Smrg */
437ec681f3Smrg
447ec681f3Smrgenum pan_special_varying {
457ec681f3Smrg        PAN_VARY_GENERAL = 0,
467ec681f3Smrg        PAN_VARY_POSITION = 1,
477ec681f3Smrg        PAN_VARY_PSIZ = 2,
487ec681f3Smrg        PAN_VARY_PNTCOORD = 3,
497ec681f3Smrg        PAN_VARY_FACE = 4,
507ec681f3Smrg        PAN_VARY_FRAGCOORD = 5,
517ec681f3Smrg
527ec681f3Smrg        /* Keep last */
537ec681f3Smrg        PAN_VARY_MAX,
547ec681f3Smrg};
557ec681f3Smrg
567ec681f3Smrg/* Maximum number of attribute descriptors required for varyings. These include
577ec681f3Smrg * up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL
587ec681f3Smrg * special varying */
597ec681f3Smrg#define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1)
607ec681f3Smrg
617ec681f3Smrg/* Define the general compiler entry point */
627ec681f3Smrg
637ec681f3Smrg#define MAX_SYSVAL_COUNT 32
647ec681f3Smrg
657ec681f3Smrg/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
667ec681f3Smrg * their class for equal comparison */
677ec681f3Smrg
687ec681f3Smrg#define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type)
697ec681f3Smrg#define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff)
707ec681f3Smrg#define PAN_SYSVAL_ID(sysval) ((sysval) >> 16)
717ec681f3Smrg
727ec681f3Smrg/* Define some common types. We start at one for easy indexing of hash
737ec681f3Smrg * tables internal to the compiler */
747ec681f3Smrg
757ec681f3Smrgenum {
767ec681f3Smrg        PAN_SYSVAL_VIEWPORT_SCALE = 1,
777ec681f3Smrg        PAN_SYSVAL_VIEWPORT_OFFSET = 2,
787ec681f3Smrg        PAN_SYSVAL_TEXTURE_SIZE = 3,
797ec681f3Smrg        PAN_SYSVAL_SSBO = 4,
807ec681f3Smrg        PAN_SYSVAL_NUM_WORK_GROUPS = 5,
817ec681f3Smrg        PAN_SYSVAL_SAMPLER = 7,
827ec681f3Smrg        PAN_SYSVAL_LOCAL_GROUP_SIZE = 8,
837ec681f3Smrg        PAN_SYSVAL_WORK_DIM = 9,
847ec681f3Smrg        PAN_SYSVAL_IMAGE_SIZE = 10,
857ec681f3Smrg        PAN_SYSVAL_SAMPLE_POSITIONS = 11,
867ec681f3Smrg        PAN_SYSVAL_MULTISAMPLED = 12,
877ec681f3Smrg        PAN_SYSVAL_RT_CONVERSION = 13,
887ec681f3Smrg        PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14,
897ec681f3Smrg        PAN_SYSVAL_DRAWID = 15,
907ec681f3Smrg        PAN_SYSVAL_BLEND_CONSTANTS = 16,
917ec681f3Smrg};
927ec681f3Smrg
937ec681f3Smrg#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array)          \
947ec681f3Smrg	((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
957ec681f3Smrg
967ec681f3Smrg#define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id)        ((id) & 0x7f)
977ec681f3Smrg#define PAN_SYSVAL_ID_TO_TXS_DIM(id)            (((id) >> 7) & 0x3)
987ec681f3Smrg#define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id)       !!((id) & (1 << 9))
997ec681f3Smrg
1007ec681f3Smrg/* Special attribute slots for vertex builtins. Sort of arbitrary but let's be
1017ec681f3Smrg * consistent with the blob so we can compare traces easier. */
1027ec681f3Smrg
1037ec681f3Smrgenum {
1047ec681f3Smrg        PAN_VERTEX_ID   = 16,
1057ec681f3Smrg        PAN_INSTANCE_ID = 17,
1067ec681f3Smrg        PAN_MAX_ATTRIBUTE
1077ec681f3Smrg};
1087ec681f3Smrg
1097ec681f3Smrgstruct panfrost_sysvals {
1107ec681f3Smrg        /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
1117ec681f3Smrg        unsigned sysvals[MAX_SYSVAL_COUNT];
1127ec681f3Smrg        unsigned sysval_count;
1137ec681f3Smrg};
1147ec681f3Smrg
1157ec681f3Smrg/* Technically Midgard could go up to 92 in a pathological case but we don't
1167ec681f3Smrg * take advantage of that. Likewise Bifrost's FAU encoding can address 128
1177ec681f3Smrg * words but actual implementations (G72, G76) are capped at 64 */
1187ec681f3Smrg
1197ec681f3Smrg#define PAN_MAX_PUSH 64
1207ec681f3Smrg
1217ec681f3Smrg/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so
1227ec681f3Smrg * an offset to a word must be < 2^16. There are less than 2^8 UBOs */
1237ec681f3Smrg
1247ec681f3Smrgstruct panfrost_ubo_word {
1257ec681f3Smrg        uint16_t ubo;
1267ec681f3Smrg        uint16_t offset;
1277ec681f3Smrg};
1287ec681f3Smrg
1297ec681f3Smrgstruct panfrost_ubo_push {
1307ec681f3Smrg        unsigned count;
1317ec681f3Smrg        struct panfrost_ubo_word words[PAN_MAX_PUSH];
1327ec681f3Smrg};
1337ec681f3Smrg
1347ec681f3Smrg/* Helper for searching the above. Note this is O(N) to the number of pushed
1357ec681f3Smrg * constants, do not run in the draw call hot path */
1367ec681f3Smrg
1377ec681f3Smrgunsigned
1387ec681f3Smrgpan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs);
1397ec681f3Smrg
1407ec681f3Smrgstruct hash_table_u64 *
1417ec681f3Smrgpanfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx);
1427ec681f3Smrg
1437ec681f3Smrgunsigned
1447ec681f3Smrgpan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
1457ec681f3Smrg                  struct panfrost_sysvals *sysvals,
1467ec681f3Smrg                  int sysval);
1477ec681f3Smrg
1487ec681f3Smrgint
1497ec681f3Smrgpanfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
1507ec681f3Smrg
1517ec681f3Smrgstruct panfrost_compile_inputs {
1527ec681f3Smrg        unsigned gpu_id;
1537ec681f3Smrg        bool is_blend, is_blit;
1547ec681f3Smrg        struct {
1557ec681f3Smrg                unsigned rt;
1567ec681f3Smrg                unsigned nr_samples;
1577ec681f3Smrg                uint64_t bifrost_blend_desc;
1587ec681f3Smrg        } blend;
1597ec681f3Smrg        unsigned sysval_ubo;
1607ec681f3Smrg        bool shaderdb;
1617ec681f3Smrg        bool no_ubo_to_push;
1627ec681f3Smrg
1637ec681f3Smrg        enum pipe_format rt_formats[8];
1647ec681f3Smrg        uint8_t raw_fmt_mask;
1657ec681f3Smrg        unsigned nr_cbufs;
1667ec681f3Smrg
1677ec681f3Smrg        union {
1687ec681f3Smrg                struct {
1697ec681f3Smrg                        bool static_rt_conv;
1707ec681f3Smrg                        uint32_t rt_conv[8];
1717ec681f3Smrg                } bifrost;
1727ec681f3Smrg        };
1737ec681f3Smrg};
1747ec681f3Smrg
1757ec681f3Smrgstruct pan_shader_varying {
1767ec681f3Smrg        gl_varying_slot location;
1777ec681f3Smrg        enum pipe_format format;
1787ec681f3Smrg};
1797ec681f3Smrg
1807ec681f3Smrgstruct bifrost_shader_blend_info {
1817ec681f3Smrg        nir_alu_type type;
1827ec681f3Smrg        uint32_t return_offset;
1837ec681f3Smrg
1847ec681f3Smrg        /* mali_bifrost_register_file_format corresponding to nir_alu_type */
1857ec681f3Smrg        unsigned format;
1867ec681f3Smrg};
1877ec681f3Smrg
1887ec681f3Smrgstruct bifrost_shader_info {
1897ec681f3Smrg        struct bifrost_shader_blend_info blend[8];
1907ec681f3Smrg        nir_alu_type blend_src1_type;
1917ec681f3Smrg        bool wait_6, wait_7;
1927ec681f3Smrg
1937ec681f3Smrg        /* Packed, preloaded message descriptors */
1947ec681f3Smrg        uint16_t messages[2];
1957ec681f3Smrg};
1967ec681f3Smrg
1977ec681f3Smrgstruct midgard_shader_info {
1987ec681f3Smrg        unsigned first_tag;
1997ec681f3Smrg};
2007ec681f3Smrg
2017ec681f3Smrgstruct pan_shader_info {
2027ec681f3Smrg        gl_shader_stage stage;
2037ec681f3Smrg        unsigned work_reg_count;
2047ec681f3Smrg        unsigned tls_size;
2057ec681f3Smrg        unsigned wls_size;
2067ec681f3Smrg
2077ec681f3Smrg        union {
2087ec681f3Smrg                struct {
2097ec681f3Smrg                        bool reads_frag_coord;
2107ec681f3Smrg                        bool reads_point_coord;
2117ec681f3Smrg                        bool reads_face;
2127ec681f3Smrg                        bool helper_invocations;
2137ec681f3Smrg                        bool can_discard;
2147ec681f3Smrg                        bool writes_depth;
2157ec681f3Smrg                        bool writes_stencil;
2167ec681f3Smrg                        bool writes_coverage;
2177ec681f3Smrg                        bool sidefx;
2187ec681f3Smrg                        bool reads_sample_id;
2197ec681f3Smrg                        bool reads_sample_pos;
2207ec681f3Smrg                        bool reads_sample_mask_in;
2217ec681f3Smrg                        bool reads_helper_invocation;
2227ec681f3Smrg                        bool sample_shading;
2237ec681f3Smrg                        bool early_fragment_tests;
2247ec681f3Smrg                        bool can_early_z, can_fpk;
2257ec681f3Smrg                        BITSET_WORD outputs_read;
2267ec681f3Smrg                        BITSET_WORD outputs_written;
2277ec681f3Smrg                } fs;
2287ec681f3Smrg
2297ec681f3Smrg                struct {
2307ec681f3Smrg                        bool writes_point_size;
2317ec681f3Smrg                } vs;
2327ec681f3Smrg        };
2337ec681f3Smrg
2347ec681f3Smrg        bool separable;
2357ec681f3Smrg        bool contains_barrier;
2367ec681f3Smrg        bool writes_global;
2377ec681f3Smrg        uint64_t outputs_written;
2387ec681f3Smrg
2397ec681f3Smrg        unsigned sampler_count;
2407ec681f3Smrg        unsigned texture_count;
2417ec681f3Smrg        unsigned ubo_count;
2427ec681f3Smrg        unsigned attribute_count;
2437ec681f3Smrg
2447ec681f3Smrg        struct {
2457ec681f3Smrg                unsigned input_count;
2467ec681f3Smrg                struct pan_shader_varying input[PAN_MAX_VARYINGS];
2477ec681f3Smrg                unsigned output_count;
2487ec681f3Smrg                struct pan_shader_varying output[PAN_MAX_VARYINGS];
2497ec681f3Smrg        } varyings;
2507ec681f3Smrg
2517ec681f3Smrg        struct panfrost_sysvals sysvals;
2527ec681f3Smrg
2537ec681f3Smrg        /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
2547ec681f3Smrg         * Uniforms (Bifrost) */
2557ec681f3Smrg        struct panfrost_ubo_push push;
2567ec681f3Smrg
2577ec681f3Smrg        uint32_t ubo_mask;
2587ec681f3Smrg
2597ec681f3Smrg        union {
2607ec681f3Smrg                struct bifrost_shader_info bifrost;
2617ec681f3Smrg                struct midgard_shader_info midgard;
2627ec681f3Smrg        };
2637ec681f3Smrg};
2647ec681f3Smrg
2657ec681f3Smrgtypedef struct pan_block {
2667ec681f3Smrg        /* Link to next block. Must be first for mir_get_block */
2677ec681f3Smrg        struct list_head link;
2687ec681f3Smrg
2697ec681f3Smrg        /* List of instructions emitted for the current block */
2707ec681f3Smrg        struct list_head instructions;
2717ec681f3Smrg
2727ec681f3Smrg        /* Index of the block in source order */
2737ec681f3Smrg        unsigned name;
2747ec681f3Smrg
2757ec681f3Smrg        /* Control flow graph */
2767ec681f3Smrg        struct pan_block *successors[2];
2777ec681f3Smrg        struct set *predecessors;
2787ec681f3Smrg        bool unconditional_jumps;
2797ec681f3Smrg
2807ec681f3Smrg        /* In liveness analysis, these are live masks (per-component) for
2817ec681f3Smrg         * indices for the block. Scalar compilers have the luxury of using
2827ec681f3Smrg         * simple bit fields, but for us, liveness is a vector idea. */
2837ec681f3Smrg        uint16_t *live_in;
2847ec681f3Smrg        uint16_t *live_out;
2857ec681f3Smrg} pan_block;
2867ec681f3Smrg
2877ec681f3Smrgstruct pan_instruction {
2887ec681f3Smrg        struct list_head link;
2897ec681f3Smrg};
2907ec681f3Smrg
2917ec681f3Smrg#define pan_foreach_instr_in_block_rev(block, v) \
2927ec681f3Smrg        list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, link)
2937ec681f3Smrg
2947ec681f3Smrg#define pan_foreach_successor(blk, v) \
2957ec681f3Smrg        pan_block *v; \
2967ec681f3Smrg        pan_block **_v; \
2977ec681f3Smrg        for (_v = (pan_block **) &blk->successors[0], \
2987ec681f3Smrg                v = *_v; \
2997ec681f3Smrg                v != NULL && _v < (pan_block **) &blk->successors[2]; \
3007ec681f3Smrg                _v++, v = *_v) \
3017ec681f3Smrg
3027ec681f3Smrg#define pan_foreach_predecessor(blk, v) \
3037ec681f3Smrg        struct set_entry *_entry_##v; \
3047ec681f3Smrg        struct pan_block *v; \
3057ec681f3Smrg        for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
3067ec681f3Smrg                v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL);  \
3077ec681f3Smrg                _entry_##v != NULL; \
3087ec681f3Smrg                _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
3097ec681f3Smrg                v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL))
3107ec681f3Smrg
3117ec681f3Smrgstatic inline pan_block *
3127ec681f3Smrgpan_exit_block(struct list_head *blocks)
3137ec681f3Smrg{
3147ec681f3Smrg        pan_block *last = list_last_entry(blocks, pan_block, link);
3157ec681f3Smrg        assert(!last->successors[0] && !last->successors[1]);
3167ec681f3Smrg        return last;
3177ec681f3Smrg}
3187ec681f3Smrg
3197ec681f3Smrgtypedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max);
3207ec681f3Smrg
3217ec681f3Smrgvoid pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
3227ec681f3Smrgvoid pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
3237ec681f3Smrgbool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max);
3247ec681f3Smrg
3257ec681f3Smrgvoid pan_compute_liveness(struct list_head *blocks,
3267ec681f3Smrg                unsigned temp_count,
3277ec681f3Smrg                pan_liveness_update callback);
3287ec681f3Smrg
3297ec681f3Smrgvoid pan_free_liveness(struct list_head *blocks);
3307ec681f3Smrg
3317ec681f3Smrguint16_t
3327ec681f3Smrgpan_to_bytemask(unsigned bytes, unsigned mask);
3337ec681f3Smrg
3347ec681f3Smrgvoid pan_block_add_successor(pan_block *block, pan_block *successor);
3357ec681f3Smrg
3367ec681f3Smrg/* IR indexing */
3377ec681f3Smrg#define PAN_IS_REG (1)
3387ec681f3Smrg
3397ec681f3Smrgstatic inline unsigned
3407ec681f3Smrgpan_ssa_index(nir_ssa_def *ssa)
3417ec681f3Smrg{
3427ec681f3Smrg        /* Off-by-one ensures BIR_NO_ARG is skipped */
3437ec681f3Smrg        return ((ssa->index + 1) << 1) | 0;
3447ec681f3Smrg}
3457ec681f3Smrg
3467ec681f3Smrgstatic inline unsigned
3477ec681f3Smrgpan_src_index(nir_src *src)
3487ec681f3Smrg{
3497ec681f3Smrg        if (src->is_ssa)
3507ec681f3Smrg                return pan_ssa_index(src->ssa);
3517ec681f3Smrg        else {
3527ec681f3Smrg                assert(!src->reg.indirect);
3537ec681f3Smrg                return (src->reg.reg->index << 1) | PAN_IS_REG;
3547ec681f3Smrg        }
3557ec681f3Smrg}
3567ec681f3Smrg
3577ec681f3Smrgstatic inline unsigned
3587ec681f3Smrgpan_dest_index(nir_dest *dst)
3597ec681f3Smrg{
3607ec681f3Smrg        if (dst->is_ssa)
3617ec681f3Smrg                return pan_ssa_index(&dst->ssa);
3627ec681f3Smrg        else {
3637ec681f3Smrg                assert(!dst->reg.indirect);
3647ec681f3Smrg                return (dst->reg.reg->index << 1) | PAN_IS_REG;
3657ec681f3Smrg        }
3667ec681f3Smrg}
3677ec681f3Smrg
3687ec681f3Smrg/* IR printing helpers */
3697ec681f3Smrgvoid pan_print_alu_type(nir_alu_type t, FILE *fp);
3707ec681f3Smrg
3717ec681f3Smrg/* Until it can be upstreamed.. */
3727ec681f3Smrgbool pan_has_source_mod(nir_alu_src *src, nir_op op);
3737ec681f3Smrgbool pan_has_dest_mod(nir_dest **dest, nir_op op);
3747ec681f3Smrg
3757ec681f3Smrg/* NIR passes to do some backend-specific lowering */
3767ec681f3Smrg
3777ec681f3Smrg#define PAN_WRITEOUT_C 1
3787ec681f3Smrg#define PAN_WRITEOUT_Z 2
3797ec681f3Smrg#define PAN_WRITEOUT_S 4
3807ec681f3Smrg
3817ec681f3Smrgbool pan_nir_reorder_writeout(nir_shader *nir);
3827ec681f3Smrgbool pan_nir_lower_zs_store(nir_shader *nir);
3837ec681f3Smrg
3847ec681f3Smrgbool pan_nir_lower_64bit_intrin(nir_shader *shader);
3857ec681f3Smrg
3867ec681f3Smrgbool pan_lower_helper_invocation(nir_shader *shader);
3877ec681f3Smrgbool pan_lower_sample_pos(nir_shader *shader);
3887ec681f3Smrg
3897ec681f3Smrg#endif
390