1/* 2 * Copyright (C) 2020 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#ifndef __PAN_IR_H 25#define __PAN_IR_H 26 27#include <stdint.h> 28#include "compiler/nir/nir.h" 29#include "util/u_dynarray.h" 30#include "util/hash_table.h" 31 32/* Indices for named (non-XFB) varyings that are present. These are packed 33 * tightly so they correspond to a bitfield present (P) indexed by (1 << 34 * PAN_VARY_*). This has the nice property that you can lookup the buffer index 35 * of a given special field given a shift S by: 36 * 37 * idx = popcount(P & ((1 << S) - 1)) 38 * 39 * That is... look at all of the varyings that come earlier and count them, the 40 * count is the new index since plus one. Likewise, the total number of special 41 * buffers required is simply popcount(P) 42 */ 43 44enum pan_special_varying { 45 PAN_VARY_GENERAL = 0, 46 PAN_VARY_POSITION = 1, 47 PAN_VARY_PSIZ = 2, 48 PAN_VARY_PNTCOORD = 3, 49 PAN_VARY_FACE = 4, 50 PAN_VARY_FRAGCOORD = 5, 51 52 /* Keep last */ 53 PAN_VARY_MAX, 54}; 55 56/* Maximum number of attribute descriptors required for varyings. These include 57 * up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL 58 * special varying */ 59#define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1) 60 61/* Define the general compiler entry point */ 62 63#define MAX_SYSVAL_COUNT 32 64 65/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal 66 * their class for equal comparison */ 67 68#define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type) 69#define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff) 70#define PAN_SYSVAL_ID(sysval) ((sysval) >> 16) 71 72/* Define some common types. We start at one for easy indexing of hash 73 * tables internal to the compiler */ 74 75enum { 76 PAN_SYSVAL_VIEWPORT_SCALE = 1, 77 PAN_SYSVAL_VIEWPORT_OFFSET = 2, 78 PAN_SYSVAL_TEXTURE_SIZE = 3, 79 PAN_SYSVAL_SSBO = 4, 80 PAN_SYSVAL_NUM_WORK_GROUPS = 5, 81 PAN_SYSVAL_SAMPLER = 7, 82 PAN_SYSVAL_LOCAL_GROUP_SIZE = 8, 83 PAN_SYSVAL_WORK_DIM = 9, 84 PAN_SYSVAL_IMAGE_SIZE = 10, 85 PAN_SYSVAL_SAMPLE_POSITIONS = 11, 86 PAN_SYSVAL_MULTISAMPLED = 12, 87 PAN_SYSVAL_RT_CONVERSION = 13, 88 PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14, 89 PAN_SYSVAL_DRAWID = 15, 90 PAN_SYSVAL_BLEND_CONSTANTS = 16, 91}; 92 93#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \ 94 ((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0)) 95 96#define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id) ((id) & 0x7f) 97#define PAN_SYSVAL_ID_TO_TXS_DIM(id) (((id) >> 7) & 0x3) 98#define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id) !!((id) & (1 << 9)) 99 100/* Special attribute slots for vertex builtins. Sort of arbitrary but let's be 101 * consistent with the blob so we can compare traces easier. */ 102 103enum { 104 PAN_VERTEX_ID = 16, 105 PAN_INSTANCE_ID = 17, 106 PAN_MAX_ATTRIBUTE 107}; 108 109struct panfrost_sysvals { 110 /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */ 111 unsigned sysvals[MAX_SYSVAL_COUNT]; 112 unsigned sysval_count; 113}; 114 115/* Technically Midgard could go up to 92 in a pathological case but we don't 116 * take advantage of that. Likewise Bifrost's FAU encoding can address 128 117 * words but actual implementations (G72, G76) are capped at 64 */ 118 119#define PAN_MAX_PUSH 64 120 121/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so 122 * an offset to a word must be < 2^16. There are less than 2^8 UBOs */ 123 124struct panfrost_ubo_word { 125 uint16_t ubo; 126 uint16_t offset; 127}; 128 129struct panfrost_ubo_push { 130 unsigned count; 131 struct panfrost_ubo_word words[PAN_MAX_PUSH]; 132}; 133 134/* Helper for searching the above. Note this is O(N) to the number of pushed 135 * constants, do not run in the draw call hot path */ 136 137unsigned 138pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs); 139 140struct hash_table_u64 * 141panfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx); 142 143unsigned 144pan_lookup_sysval(struct hash_table_u64 *sysval_to_id, 145 struct panfrost_sysvals *sysvals, 146 int sysval); 147 148int 149panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest); 150 151struct panfrost_compile_inputs { 152 unsigned gpu_id; 153 bool is_blend, is_blit; 154 struct { 155 unsigned rt; 156 unsigned nr_samples; 157 uint64_t bifrost_blend_desc; 158 } blend; 159 unsigned sysval_ubo; 160 bool shaderdb; 161 bool no_ubo_to_push; 162 163 enum pipe_format rt_formats[8]; 164 uint8_t raw_fmt_mask; 165 unsigned nr_cbufs; 166 167 union { 168 struct { 169 bool static_rt_conv; 170 uint32_t rt_conv[8]; 171 } bifrost; 172 }; 173}; 174 175struct pan_shader_varying { 176 gl_varying_slot location; 177 enum pipe_format format; 178}; 179 180struct bifrost_shader_blend_info { 181 nir_alu_type type; 182 uint32_t return_offset; 183 184 /* mali_bifrost_register_file_format corresponding to nir_alu_type */ 185 unsigned format; 186}; 187 188struct bifrost_shader_info { 189 struct bifrost_shader_blend_info blend[8]; 190 nir_alu_type blend_src1_type; 191 bool wait_6, wait_7; 192 193 /* Packed, preloaded message descriptors */ 194 uint16_t messages[2]; 195}; 196 197struct midgard_shader_info { 198 unsigned first_tag; 199}; 200 201struct pan_shader_info { 202 gl_shader_stage stage; 203 unsigned work_reg_count; 204 unsigned tls_size; 205 unsigned wls_size; 206 207 union { 208 struct { 209 bool reads_frag_coord; 210 bool reads_point_coord; 211 bool reads_face; 212 bool helper_invocations; 213 bool can_discard; 214 bool writes_depth; 215 bool writes_stencil; 216 bool writes_coverage; 217 bool sidefx; 218 bool reads_sample_id; 219 bool reads_sample_pos; 220 bool reads_sample_mask_in; 221 bool reads_helper_invocation; 222 bool sample_shading; 223 bool early_fragment_tests; 224 bool can_early_z, can_fpk; 225 BITSET_WORD outputs_read; 226 BITSET_WORD outputs_written; 227 } fs; 228 229 struct { 230 bool writes_point_size; 231 } vs; 232 }; 233 234 bool separable; 235 bool contains_barrier; 236 bool writes_global; 237 uint64_t outputs_written; 238 239 unsigned sampler_count; 240 unsigned texture_count; 241 unsigned ubo_count; 242 unsigned attribute_count; 243 244 struct { 245 unsigned input_count; 246 struct pan_shader_varying input[PAN_MAX_VARYINGS]; 247 unsigned output_count; 248 struct pan_shader_varying output[PAN_MAX_VARYINGS]; 249 } varyings; 250 251 struct panfrost_sysvals sysvals; 252 253 /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access 254 * Uniforms (Bifrost) */ 255 struct panfrost_ubo_push push; 256 257 uint32_t ubo_mask; 258 259 union { 260 struct bifrost_shader_info bifrost; 261 struct midgard_shader_info midgard; 262 }; 263}; 264 265typedef struct pan_block { 266 /* Link to next block. Must be first for mir_get_block */ 267 struct list_head link; 268 269 /* List of instructions emitted for the current block */ 270 struct list_head instructions; 271 272 /* Index of the block in source order */ 273 unsigned name; 274 275 /* Control flow graph */ 276 struct pan_block *successors[2]; 277 struct set *predecessors; 278 bool unconditional_jumps; 279 280 /* In liveness analysis, these are live masks (per-component) for 281 * indices for the block. Scalar compilers have the luxury of using 282 * simple bit fields, but for us, liveness is a vector idea. */ 283 uint16_t *live_in; 284 uint16_t *live_out; 285} pan_block; 286 287struct pan_instruction { 288 struct list_head link; 289}; 290 291#define pan_foreach_instr_in_block_rev(block, v) \ 292 list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, link) 293 294#define pan_foreach_successor(blk, v) \ 295 pan_block *v; \ 296 pan_block **_v; \ 297 for (_v = (pan_block **) &blk->successors[0], \ 298 v = *_v; \ 299 v != NULL && _v < (pan_block **) &blk->successors[2]; \ 300 _v++, v = *_v) \ 301 302#define pan_foreach_predecessor(blk, v) \ 303 struct set_entry *_entry_##v; \ 304 struct pan_block *v; \ 305 for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \ 306 v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL); \ 307 _entry_##v != NULL; \ 308 _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \ 309 v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL)) 310 311static inline pan_block * 312pan_exit_block(struct list_head *blocks) 313{ 314 pan_block *last = list_last_entry(blocks, pan_block, link); 315 assert(!last->successors[0] && !last->successors[1]); 316 return last; 317} 318 319typedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max); 320 321void pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask); 322void pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask); 323bool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max); 324 325void pan_compute_liveness(struct list_head *blocks, 326 unsigned temp_count, 327 pan_liveness_update callback); 328 329void pan_free_liveness(struct list_head *blocks); 330 331uint16_t 332pan_to_bytemask(unsigned bytes, unsigned mask); 333 334void pan_block_add_successor(pan_block *block, pan_block *successor); 335 336/* IR indexing */ 337#define PAN_IS_REG (1) 338 339static inline unsigned 340pan_ssa_index(nir_ssa_def *ssa) 341{ 342 /* Off-by-one ensures BIR_NO_ARG is skipped */ 343 return ((ssa->index + 1) << 1) | 0; 344} 345 346static inline unsigned 347pan_src_index(nir_src *src) 348{ 349 if (src->is_ssa) 350 return pan_ssa_index(src->ssa); 351 else { 352 assert(!src->reg.indirect); 353 return (src->reg.reg->index << 1) | PAN_IS_REG; 354 } 355} 356 357static inline unsigned 358pan_dest_index(nir_dest *dst) 359{ 360 if (dst->is_ssa) 361 return pan_ssa_index(&dst->ssa); 362 else { 363 assert(!dst->reg.indirect); 364 return (dst->reg.reg->index << 1) | PAN_IS_REG; 365 } 366} 367 368/* IR printing helpers */ 369void pan_print_alu_type(nir_alu_type t, FILE *fp); 370 371/* Until it can be upstreamed.. */ 372bool pan_has_source_mod(nir_alu_src *src, nir_op op); 373bool pan_has_dest_mod(nir_dest **dest, nir_op op); 374 375/* NIR passes to do some backend-specific lowering */ 376 377#define PAN_WRITEOUT_C 1 378#define PAN_WRITEOUT_Z 2 379#define PAN_WRITEOUT_S 4 380 381bool pan_nir_reorder_writeout(nir_shader *nir); 382bool pan_nir_lower_zs_store(nir_shader *nir); 383 384bool pan_nir_lower_64bit_intrin(nir_shader *shader); 385 386bool pan_lower_helper_invocation(nir_shader *shader); 387bool pan_lower_sample_pos(nir_shader *shader); 388 389#endif 390