17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2020 Collabora, Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#ifndef __PAN_IR_H 257ec681f3Smrg#define __PAN_IR_H 267ec681f3Smrg 277ec681f3Smrg#include <stdint.h> 287ec681f3Smrg#include "compiler/nir/nir.h" 297ec681f3Smrg#include "util/u_dynarray.h" 307ec681f3Smrg#include "util/hash_table.h" 317ec681f3Smrg 327ec681f3Smrg/* Indices for named (non-XFB) varyings that are present. These are packed 337ec681f3Smrg * tightly so they correspond to a bitfield present (P) indexed by (1 << 347ec681f3Smrg * PAN_VARY_*). This has the nice property that you can lookup the buffer index 357ec681f3Smrg * of a given special field given a shift S by: 367ec681f3Smrg * 377ec681f3Smrg * idx = popcount(P & ((1 << S) - 1)) 387ec681f3Smrg * 397ec681f3Smrg * That is... look at all of the varyings that come earlier and count them, the 407ec681f3Smrg * count is the new index since plus one. Likewise, the total number of special 417ec681f3Smrg * buffers required is simply popcount(P) 427ec681f3Smrg */ 437ec681f3Smrg 447ec681f3Smrgenum pan_special_varying { 457ec681f3Smrg PAN_VARY_GENERAL = 0, 467ec681f3Smrg PAN_VARY_POSITION = 1, 477ec681f3Smrg PAN_VARY_PSIZ = 2, 487ec681f3Smrg PAN_VARY_PNTCOORD = 3, 497ec681f3Smrg PAN_VARY_FACE = 4, 507ec681f3Smrg PAN_VARY_FRAGCOORD = 5, 517ec681f3Smrg 527ec681f3Smrg /* Keep last */ 537ec681f3Smrg PAN_VARY_MAX, 547ec681f3Smrg}; 557ec681f3Smrg 567ec681f3Smrg/* Maximum number of attribute descriptors required for varyings. These include 577ec681f3Smrg * up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL 587ec681f3Smrg * special varying */ 597ec681f3Smrg#define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1) 607ec681f3Smrg 617ec681f3Smrg/* Define the general compiler entry point */ 627ec681f3Smrg 637ec681f3Smrg#define MAX_SYSVAL_COUNT 32 647ec681f3Smrg 657ec681f3Smrg/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal 667ec681f3Smrg * their class for equal comparison */ 677ec681f3Smrg 687ec681f3Smrg#define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type) 697ec681f3Smrg#define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff) 707ec681f3Smrg#define PAN_SYSVAL_ID(sysval) ((sysval) >> 16) 717ec681f3Smrg 727ec681f3Smrg/* Define some common types. We start at one for easy indexing of hash 737ec681f3Smrg * tables internal to the compiler */ 747ec681f3Smrg 757ec681f3Smrgenum { 767ec681f3Smrg PAN_SYSVAL_VIEWPORT_SCALE = 1, 777ec681f3Smrg PAN_SYSVAL_VIEWPORT_OFFSET = 2, 787ec681f3Smrg PAN_SYSVAL_TEXTURE_SIZE = 3, 797ec681f3Smrg PAN_SYSVAL_SSBO = 4, 807ec681f3Smrg PAN_SYSVAL_NUM_WORK_GROUPS = 5, 817ec681f3Smrg PAN_SYSVAL_SAMPLER = 7, 827ec681f3Smrg PAN_SYSVAL_LOCAL_GROUP_SIZE = 8, 837ec681f3Smrg PAN_SYSVAL_WORK_DIM = 9, 847ec681f3Smrg PAN_SYSVAL_IMAGE_SIZE = 10, 857ec681f3Smrg PAN_SYSVAL_SAMPLE_POSITIONS = 11, 867ec681f3Smrg PAN_SYSVAL_MULTISAMPLED = 12, 877ec681f3Smrg PAN_SYSVAL_RT_CONVERSION = 13, 887ec681f3Smrg PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14, 897ec681f3Smrg PAN_SYSVAL_DRAWID = 15, 907ec681f3Smrg PAN_SYSVAL_BLEND_CONSTANTS = 16, 917ec681f3Smrg}; 927ec681f3Smrg 937ec681f3Smrg#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \ 947ec681f3Smrg ((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0)) 957ec681f3Smrg 967ec681f3Smrg#define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id) ((id) & 0x7f) 977ec681f3Smrg#define PAN_SYSVAL_ID_TO_TXS_DIM(id) (((id) >> 7) & 0x3) 987ec681f3Smrg#define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id) !!((id) & (1 << 9)) 997ec681f3Smrg 1007ec681f3Smrg/* Special attribute slots for vertex builtins. Sort of arbitrary but let's be 1017ec681f3Smrg * consistent with the blob so we can compare traces easier. */ 1027ec681f3Smrg 1037ec681f3Smrgenum { 1047ec681f3Smrg PAN_VERTEX_ID = 16, 1057ec681f3Smrg PAN_INSTANCE_ID = 17, 1067ec681f3Smrg PAN_MAX_ATTRIBUTE 1077ec681f3Smrg}; 1087ec681f3Smrg 1097ec681f3Smrgstruct panfrost_sysvals { 1107ec681f3Smrg /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */ 1117ec681f3Smrg unsigned sysvals[MAX_SYSVAL_COUNT]; 1127ec681f3Smrg unsigned sysval_count; 1137ec681f3Smrg}; 1147ec681f3Smrg 1157ec681f3Smrg/* Technically Midgard could go up to 92 in a pathological case but we don't 1167ec681f3Smrg * take advantage of that. Likewise Bifrost's FAU encoding can address 128 1177ec681f3Smrg * words but actual implementations (G72, G76) are capped at 64 */ 1187ec681f3Smrg 1197ec681f3Smrg#define PAN_MAX_PUSH 64 1207ec681f3Smrg 1217ec681f3Smrg/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so 1227ec681f3Smrg * an offset to a word must be < 2^16. There are less than 2^8 UBOs */ 1237ec681f3Smrg 1247ec681f3Smrgstruct panfrost_ubo_word { 1257ec681f3Smrg uint16_t ubo; 1267ec681f3Smrg uint16_t offset; 1277ec681f3Smrg}; 1287ec681f3Smrg 1297ec681f3Smrgstruct panfrost_ubo_push { 1307ec681f3Smrg unsigned count; 1317ec681f3Smrg struct panfrost_ubo_word words[PAN_MAX_PUSH]; 1327ec681f3Smrg}; 1337ec681f3Smrg 1347ec681f3Smrg/* Helper for searching the above. Note this is O(N) to the number of pushed 1357ec681f3Smrg * constants, do not run in the draw call hot path */ 1367ec681f3Smrg 1377ec681f3Smrgunsigned 1387ec681f3Smrgpan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs); 1397ec681f3Smrg 1407ec681f3Smrgstruct hash_table_u64 * 1417ec681f3Smrgpanfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx); 1427ec681f3Smrg 1437ec681f3Smrgunsigned 1447ec681f3Smrgpan_lookup_sysval(struct hash_table_u64 *sysval_to_id, 1457ec681f3Smrg struct panfrost_sysvals *sysvals, 1467ec681f3Smrg int sysval); 1477ec681f3Smrg 1487ec681f3Smrgint 1497ec681f3Smrgpanfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest); 1507ec681f3Smrg 1517ec681f3Smrgstruct panfrost_compile_inputs { 1527ec681f3Smrg unsigned gpu_id; 1537ec681f3Smrg bool is_blend, is_blit; 1547ec681f3Smrg struct { 1557ec681f3Smrg unsigned rt; 1567ec681f3Smrg unsigned nr_samples; 1577ec681f3Smrg uint64_t bifrost_blend_desc; 1587ec681f3Smrg } blend; 1597ec681f3Smrg unsigned sysval_ubo; 1607ec681f3Smrg bool shaderdb; 1617ec681f3Smrg bool no_ubo_to_push; 1627ec681f3Smrg 1637ec681f3Smrg enum pipe_format rt_formats[8]; 1647ec681f3Smrg uint8_t raw_fmt_mask; 1657ec681f3Smrg unsigned nr_cbufs; 1667ec681f3Smrg 1677ec681f3Smrg union { 1687ec681f3Smrg struct { 1697ec681f3Smrg bool static_rt_conv; 1707ec681f3Smrg uint32_t rt_conv[8]; 1717ec681f3Smrg } bifrost; 1727ec681f3Smrg }; 1737ec681f3Smrg}; 1747ec681f3Smrg 1757ec681f3Smrgstruct pan_shader_varying { 1767ec681f3Smrg gl_varying_slot location; 1777ec681f3Smrg enum pipe_format format; 1787ec681f3Smrg}; 1797ec681f3Smrg 1807ec681f3Smrgstruct bifrost_shader_blend_info { 1817ec681f3Smrg nir_alu_type type; 1827ec681f3Smrg uint32_t return_offset; 1837ec681f3Smrg 1847ec681f3Smrg /* mali_bifrost_register_file_format corresponding to nir_alu_type */ 1857ec681f3Smrg unsigned format; 1867ec681f3Smrg}; 1877ec681f3Smrg 1887ec681f3Smrgstruct bifrost_shader_info { 1897ec681f3Smrg struct bifrost_shader_blend_info blend[8]; 1907ec681f3Smrg nir_alu_type blend_src1_type; 1917ec681f3Smrg bool wait_6, wait_7; 1927ec681f3Smrg 1937ec681f3Smrg /* Packed, preloaded message descriptors */ 1947ec681f3Smrg uint16_t messages[2]; 1957ec681f3Smrg}; 1967ec681f3Smrg 1977ec681f3Smrgstruct midgard_shader_info { 1987ec681f3Smrg unsigned first_tag; 1997ec681f3Smrg}; 2007ec681f3Smrg 2017ec681f3Smrgstruct pan_shader_info { 2027ec681f3Smrg gl_shader_stage stage; 2037ec681f3Smrg unsigned work_reg_count; 2047ec681f3Smrg unsigned tls_size; 2057ec681f3Smrg unsigned wls_size; 2067ec681f3Smrg 2077ec681f3Smrg union { 2087ec681f3Smrg struct { 2097ec681f3Smrg bool reads_frag_coord; 2107ec681f3Smrg bool reads_point_coord; 2117ec681f3Smrg bool reads_face; 2127ec681f3Smrg bool helper_invocations; 2137ec681f3Smrg bool can_discard; 2147ec681f3Smrg bool writes_depth; 2157ec681f3Smrg bool writes_stencil; 2167ec681f3Smrg bool writes_coverage; 2177ec681f3Smrg bool sidefx; 2187ec681f3Smrg bool reads_sample_id; 2197ec681f3Smrg bool reads_sample_pos; 2207ec681f3Smrg bool reads_sample_mask_in; 2217ec681f3Smrg bool reads_helper_invocation; 2227ec681f3Smrg bool sample_shading; 2237ec681f3Smrg bool early_fragment_tests; 2247ec681f3Smrg bool can_early_z, can_fpk; 2257ec681f3Smrg BITSET_WORD outputs_read; 2267ec681f3Smrg BITSET_WORD outputs_written; 2277ec681f3Smrg } fs; 2287ec681f3Smrg 2297ec681f3Smrg struct { 2307ec681f3Smrg bool writes_point_size; 2317ec681f3Smrg } vs; 2327ec681f3Smrg }; 2337ec681f3Smrg 2347ec681f3Smrg bool separable; 2357ec681f3Smrg bool contains_barrier; 2367ec681f3Smrg bool writes_global; 2377ec681f3Smrg uint64_t outputs_written; 2387ec681f3Smrg 2397ec681f3Smrg unsigned sampler_count; 2407ec681f3Smrg unsigned texture_count; 2417ec681f3Smrg unsigned ubo_count; 2427ec681f3Smrg unsigned attribute_count; 2437ec681f3Smrg 2447ec681f3Smrg struct { 2457ec681f3Smrg unsigned input_count; 2467ec681f3Smrg struct pan_shader_varying input[PAN_MAX_VARYINGS]; 2477ec681f3Smrg unsigned output_count; 2487ec681f3Smrg struct pan_shader_varying output[PAN_MAX_VARYINGS]; 2497ec681f3Smrg } varyings; 2507ec681f3Smrg 2517ec681f3Smrg struct panfrost_sysvals sysvals; 2527ec681f3Smrg 2537ec681f3Smrg /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access 2547ec681f3Smrg * Uniforms (Bifrost) */ 2557ec681f3Smrg struct panfrost_ubo_push push; 2567ec681f3Smrg 2577ec681f3Smrg uint32_t ubo_mask; 2587ec681f3Smrg 2597ec681f3Smrg union { 2607ec681f3Smrg struct bifrost_shader_info bifrost; 2617ec681f3Smrg struct midgard_shader_info midgard; 2627ec681f3Smrg }; 2637ec681f3Smrg}; 2647ec681f3Smrg 2657ec681f3Smrgtypedef struct pan_block { 2667ec681f3Smrg /* Link to next block. Must be first for mir_get_block */ 2677ec681f3Smrg struct list_head link; 2687ec681f3Smrg 2697ec681f3Smrg /* List of instructions emitted for the current block */ 2707ec681f3Smrg struct list_head instructions; 2717ec681f3Smrg 2727ec681f3Smrg /* Index of the block in source order */ 2737ec681f3Smrg unsigned name; 2747ec681f3Smrg 2757ec681f3Smrg /* Control flow graph */ 2767ec681f3Smrg struct pan_block *successors[2]; 2777ec681f3Smrg struct set *predecessors; 2787ec681f3Smrg bool unconditional_jumps; 2797ec681f3Smrg 2807ec681f3Smrg /* In liveness analysis, these are live masks (per-component) for 2817ec681f3Smrg * indices for the block. Scalar compilers have the luxury of using 2827ec681f3Smrg * simple bit fields, but for us, liveness is a vector idea. */ 2837ec681f3Smrg uint16_t *live_in; 2847ec681f3Smrg uint16_t *live_out; 2857ec681f3Smrg} pan_block; 2867ec681f3Smrg 2877ec681f3Smrgstruct pan_instruction { 2887ec681f3Smrg struct list_head link; 2897ec681f3Smrg}; 2907ec681f3Smrg 2917ec681f3Smrg#define pan_foreach_instr_in_block_rev(block, v) \ 2927ec681f3Smrg list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, link) 2937ec681f3Smrg 2947ec681f3Smrg#define pan_foreach_successor(blk, v) \ 2957ec681f3Smrg pan_block *v; \ 2967ec681f3Smrg pan_block **_v; \ 2977ec681f3Smrg for (_v = (pan_block **) &blk->successors[0], \ 2987ec681f3Smrg v = *_v; \ 2997ec681f3Smrg v != NULL && _v < (pan_block **) &blk->successors[2]; \ 3007ec681f3Smrg _v++, v = *_v) \ 3017ec681f3Smrg 3027ec681f3Smrg#define pan_foreach_predecessor(blk, v) \ 3037ec681f3Smrg struct set_entry *_entry_##v; \ 3047ec681f3Smrg struct pan_block *v; \ 3057ec681f3Smrg for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \ 3067ec681f3Smrg v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL); \ 3077ec681f3Smrg _entry_##v != NULL; \ 3087ec681f3Smrg _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \ 3097ec681f3Smrg v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL)) 3107ec681f3Smrg 3117ec681f3Smrgstatic inline pan_block * 3127ec681f3Smrgpan_exit_block(struct list_head *blocks) 3137ec681f3Smrg{ 3147ec681f3Smrg pan_block *last = list_last_entry(blocks, pan_block, link); 3157ec681f3Smrg assert(!last->successors[0] && !last->successors[1]); 3167ec681f3Smrg return last; 3177ec681f3Smrg} 3187ec681f3Smrg 3197ec681f3Smrgtypedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max); 3207ec681f3Smrg 3217ec681f3Smrgvoid pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask); 3227ec681f3Smrgvoid pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask); 3237ec681f3Smrgbool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max); 3247ec681f3Smrg 3257ec681f3Smrgvoid pan_compute_liveness(struct list_head *blocks, 3267ec681f3Smrg unsigned temp_count, 3277ec681f3Smrg pan_liveness_update callback); 3287ec681f3Smrg 3297ec681f3Smrgvoid pan_free_liveness(struct list_head *blocks); 3307ec681f3Smrg 3317ec681f3Smrguint16_t 3327ec681f3Smrgpan_to_bytemask(unsigned bytes, unsigned mask); 3337ec681f3Smrg 3347ec681f3Smrgvoid pan_block_add_successor(pan_block *block, pan_block *successor); 3357ec681f3Smrg 3367ec681f3Smrg/* IR indexing */ 3377ec681f3Smrg#define PAN_IS_REG (1) 3387ec681f3Smrg 3397ec681f3Smrgstatic inline unsigned 3407ec681f3Smrgpan_ssa_index(nir_ssa_def *ssa) 3417ec681f3Smrg{ 3427ec681f3Smrg /* Off-by-one ensures BIR_NO_ARG is skipped */ 3437ec681f3Smrg return ((ssa->index + 1) << 1) | 0; 3447ec681f3Smrg} 3457ec681f3Smrg 3467ec681f3Smrgstatic inline unsigned 3477ec681f3Smrgpan_src_index(nir_src *src) 3487ec681f3Smrg{ 3497ec681f3Smrg if (src->is_ssa) 3507ec681f3Smrg return pan_ssa_index(src->ssa); 3517ec681f3Smrg else { 3527ec681f3Smrg assert(!src->reg.indirect); 3537ec681f3Smrg return (src->reg.reg->index << 1) | PAN_IS_REG; 3547ec681f3Smrg } 3557ec681f3Smrg} 3567ec681f3Smrg 3577ec681f3Smrgstatic inline unsigned 3587ec681f3Smrgpan_dest_index(nir_dest *dst) 3597ec681f3Smrg{ 3607ec681f3Smrg if (dst->is_ssa) 3617ec681f3Smrg return pan_ssa_index(&dst->ssa); 3627ec681f3Smrg else { 3637ec681f3Smrg assert(!dst->reg.indirect); 3647ec681f3Smrg return (dst->reg.reg->index << 1) | PAN_IS_REG; 3657ec681f3Smrg } 3667ec681f3Smrg} 3677ec681f3Smrg 3687ec681f3Smrg/* IR printing helpers */ 3697ec681f3Smrgvoid pan_print_alu_type(nir_alu_type t, FILE *fp); 3707ec681f3Smrg 3717ec681f3Smrg/* Until it can be upstreamed.. */ 3727ec681f3Smrgbool pan_has_source_mod(nir_alu_src *src, nir_op op); 3737ec681f3Smrgbool pan_has_dest_mod(nir_dest **dest, nir_op op); 3747ec681f3Smrg 3757ec681f3Smrg/* NIR passes to do some backend-specific lowering */ 3767ec681f3Smrg 3777ec681f3Smrg#define PAN_WRITEOUT_C 1 3787ec681f3Smrg#define PAN_WRITEOUT_Z 2 3797ec681f3Smrg#define PAN_WRITEOUT_S 4 3807ec681f3Smrg 3817ec681f3Smrgbool pan_nir_reorder_writeout(nir_shader *nir); 3827ec681f3Smrgbool pan_nir_lower_zs_store(nir_shader *nir); 3837ec681f3Smrg 3847ec681f3Smrgbool pan_nir_lower_64bit_intrin(nir_shader *shader); 3857ec681f3Smrg 3867ec681f3Smrgbool pan_lower_helper_invocation(nir_shader *shader); 3877ec681f3Smrgbool pan_lower_sample_pos(nir_shader *shader); 3887ec681f3Smrg 3897ec681f3Smrg#endif 390