agx_compiler.h revision 7ec681f3
1/* 2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 3 * Copyright (C) 2020 Collabora Ltd. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25#ifndef __AGX_COMPILER_H 26#define __AGX_COMPILER_H 27 28#include "compiler/nir/nir.h" 29#include "util/u_math.h" 30#include "util/half_float.h" 31#include "util/u_dynarray.h" 32#include "agx_compile.h" 33#include "agx_opcodes.h" 34#include "agx_minifloat.h" 35 36enum agx_dbg { 37 AGX_DBG_MSGS = BITFIELD_BIT(0), 38 AGX_DBG_SHADERS = BITFIELD_BIT(1), 39 AGX_DBG_SHADERDB = BITFIELD_BIT(2), 40 AGX_DBG_VERBOSE = BITFIELD_BIT(3), 41 AGX_DBG_INTERNAL = BITFIELD_BIT(4), 42}; 43 44extern int agx_debug; 45 46/* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */ 47#define AGX_NUM_REGS (256) 48 49enum agx_index_type { 50 AGX_INDEX_NULL = 0, 51 AGX_INDEX_NORMAL = 1, 52 AGX_INDEX_IMMEDIATE = 2, 53 AGX_INDEX_UNIFORM = 3, 54 AGX_INDEX_REGISTER = 4, 55 AGX_INDEX_NIR_REGISTER = 5, 56}; 57 58enum agx_size { 59 AGX_SIZE_16 = 0, 60 AGX_SIZE_32 = 1, 61 AGX_SIZE_64 = 2 62}; 63 64typedef struct { 65 /* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */ 66 unsigned value : 22; 67 68 /* Indicates that this source kills the referenced value (because it is the 69 * last use in a block and the source is not live after the block). Set by 70 * liveness analysis. */ 71 bool kill : 1; 72 73 /* Cache hints */ 74 bool cache : 1; 75 bool discard : 1; 76 77 /* src - float modifiers */ 78 bool abs : 1; 79 bool neg : 1; 80 81 enum agx_size size : 2; 82 enum agx_index_type type : 3; 83} agx_index; 84 85static inline agx_index 86agx_get_index(unsigned value, enum agx_size size) 87{ 88 return (agx_index) { 89 .type = AGX_INDEX_NORMAL, 90 .value = value, 91 .size = size 92 }; 93} 94 95static inline agx_index 96agx_immediate(uint16_t imm) 97{ 98 return (agx_index) { 99 .type = AGX_INDEX_IMMEDIATE, 100 .value = imm, 101 .size = AGX_SIZE_32 102 }; 103} 104 105static inline agx_index 106agx_immediate_f(float f) 107{ 108 assert(agx_minifloat_exact(f)); 109 return agx_immediate(agx_minifloat_encode(f)); 110} 111 112/* in half-words, specify r0h as 1, r1 as 2... */ 113static inline agx_index 114agx_register(uint8_t imm, enum agx_size size) 115{ 116 return (agx_index) { 117 .type = AGX_INDEX_REGISTER, 118 .value = imm, 119 .size = size 120 }; 121} 122 123static inline agx_index 124agx_nir_register(unsigned imm, enum agx_size size) 125{ 126 return (agx_index) { 127 .type = AGX_INDEX_NIR_REGISTER, 128 .value = imm, 129 .size = size 130 }; 131} 132 133/* Also in half-words */ 134static inline agx_index 135agx_uniform(uint8_t imm, enum agx_size size) 136{ 137 return (agx_index) { 138 .type = AGX_INDEX_UNIFORM, 139 .value = imm, 140 .size = size 141 }; 142} 143 144static inline agx_index 145agx_null() 146{ 147 return (agx_index) { .type = AGX_INDEX_NULL }; 148} 149 150static inline agx_index 151agx_zero() 152{ 153 return agx_immediate(0); 154} 155 156/* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa 157 * = exponent = 0, sign bit set */ 158 159static inline agx_index 160agx_negzero() 161{ 162 return agx_immediate(0x80); 163} 164 165static inline agx_index 166agx_abs(agx_index idx) 167{ 168 idx.abs = true; 169 idx.neg = false; 170 return idx; 171} 172 173static inline agx_index 174agx_neg(agx_index idx) 175{ 176 idx.neg ^= true; 177 return idx; 178} 179 180/* Replaces an index, preserving any modifiers */ 181 182static inline agx_index 183agx_replace_index(agx_index old, agx_index replacement) 184{ 185 replacement.abs = old.abs; 186 replacement.neg = old.neg; 187 return replacement; 188} 189 190static inline bool 191agx_is_null(agx_index idx) 192{ 193 return idx.type == AGX_INDEX_NULL; 194} 195 196/* Compares equivalence as references */ 197 198static inline bool 199agx_is_equiv(agx_index left, agx_index right) 200{ 201 return (left.type == right.type) && (left.value == right.value); 202} 203 204#define AGX_MAX_DESTS 1 205#define AGX_MAX_SRCS 5 206 207enum agx_icond { 208 AGX_ICOND_UEQ = 0, 209 AGX_ICOND_ULT = 1, 210 AGX_ICOND_UGT = 2, 211 /* unknown */ 212 AGX_ICOND_SEQ = 4, 213 AGX_ICOND_SLT = 5, 214 AGX_ICOND_SGT = 6, 215 /* unknown */ 216}; 217 218enum agx_fcond { 219 AGX_FCOND_EQ = 0, 220 AGX_FCOND_LT = 1, 221 AGX_FCOND_GT = 2, 222 AGX_FCOND_LTN = 3, 223 /* unknown */ 224 AGX_FCOND_GE = 5, 225 AGX_FCOND_LE = 6, 226 AGX_FCOND_GTN = 7, 227}; 228 229enum agx_round { 230 AGX_ROUND_RTZ = 0, 231 AGX_ROUND_RTE = 1, 232}; 233 234enum agx_convert { 235 AGX_CONVERT_U8_TO_F = 0, 236 AGX_CONVERT_S8_TO_F = 1, 237 AGX_CONVERT_F_TO_U16 = 4, 238 AGX_CONVERT_F_TO_S16 = 5, 239 AGX_CONVERT_U16_TO_F = 6, 240 AGX_CONVERT_S16_TO_F = 7, 241 AGX_CONVERT_F_TO_U32 = 8, 242 AGX_CONVERT_F_TO_S32 = 9, 243 AGX_CONVERT_U32_TO_F = 10, 244 AGX_CONVERT_S32_TO_F = 11 245}; 246 247enum agx_lod_mode { 248 AGX_LOD_MODE_AUTO_LOD = 0, 249 AGX_LOD_MODE_LOD_MIN = 6, 250 AGX_LOD_GRAD = 8, 251 AGX_LOD_GRAD_MIN = 12 252}; 253 254enum agx_dim { 255 AGX_DIM_TEX_1D = 0, 256 AGX_DIM_TEX_1D_ARRAY = 1, 257 AGX_DIM_TEX_2D = 2, 258 AGX_DIM_TEX_2D_ARRAY = 3, 259 AGX_DIM_TEX_2D_MS = 4, 260 AGX_DIM_TEX_3D = 5, 261 AGX_DIM_TEX_CUBE = 6, 262 AGX_DIM_TEX_CUBE_ARRAY = 7 263}; 264 265/* Forward declare for branch target */ 266struct agx_block; 267 268typedef struct { 269 /* Must be first */ 270 struct list_head link; 271 272 enum agx_opcode op; 273 274 /* Data flow */ 275 agx_index dest[AGX_MAX_DESTS]; 276 agx_index src[AGX_MAX_SRCS]; 277 278 union { 279 uint32_t imm; 280 uint32_t writeout; 281 uint32_t truth_table; 282 uint32_t component; 283 uint32_t channels; 284 uint32_t bfi_mask; 285 enum agx_sr sr; 286 enum agx_icond icond; 287 enum agx_fcond fcond; 288 enum agx_format format; 289 enum agx_round round; 290 enum agx_lod_mode lod_mode; 291 struct agx_block *target; 292 }; 293 294 /* For load varying */ 295 bool perspective : 1; 296 297 /* Invert icond/fcond */ 298 bool invert_cond : 1; 299 300 /* TODO: Handle tex ops more efficient */ 301 enum agx_dim dim : 3; 302 303 /* Final st_vary op */ 304 bool last : 1; 305 306 /* Shift for a bitwise or memory op (conflicts with format for memory ops) */ 307 unsigned shift : 4; 308 309 /* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require 310 * scoreboarding (everything but memory load/store and texturing). */ 311 unsigned scoreboard : 1; 312 313 /* Number of nested control flow layers to jump by */ 314 unsigned nest : 2; 315 316 /* Output modifiers */ 317 bool saturate : 1; 318 unsigned mask : 4; 319} agx_instr; 320 321struct agx_block; 322 323typedef struct agx_block { 324 /* Link to next block. Must be first */ 325 struct list_head link; 326 327 /* List of instructions emitted for the current block */ 328 struct list_head instructions; 329 330 /* Index of the block in source order */ 331 unsigned name; 332 333 /* Control flow graph */ 334 struct agx_block *successors[2]; 335 struct set *predecessors; 336 bool unconditional_jumps; 337 338 /* Liveness analysis results */ 339 BITSET_WORD *live_in; 340 BITSET_WORD *live_out; 341 342 /* Register allocation */ 343 BITSET_DECLARE(regs_out, AGX_NUM_REGS); 344 345 /* Offset of the block in the emitted binary */ 346 off_t offset; 347 348 /** Available for passes to use for metadata */ 349 uint8_t pass_flags; 350} agx_block; 351 352typedef struct { 353 nir_shader *nir; 354 gl_shader_stage stage; 355 struct list_head blocks; /* list of agx_block */ 356 struct agx_shader_info *out; 357 struct agx_shader_key *key; 358 359 /* Remapping table for varyings indexed by driver_location */ 360 unsigned varyings[AGX_MAX_VARYINGS]; 361 362 /* Handling phi nodes is still TODO while we bring up other parts of the 363 * driver. YOLO the mapping of nir_register to fixed hardware registers */ 364 unsigned *nir_regalloc; 365 366 /* We reserve the top (XXX: that hurts thread count) */ 367 unsigned max_register; 368 369 /* Place to start pushing new values */ 370 unsigned push_base; 371 372 /* For creating temporaries */ 373 unsigned alloc; 374 375 /* I don't really understand how writeout ops work yet */ 376 bool did_writeout; 377 378 /* Has r0l been zeroed yet due to control flow? */ 379 bool any_cf; 380 381 /** Computed metadata */ 382 bool has_liveness; 383 384 /* Number of nested control flow structures within the innermost loop. Since 385 * NIR is just loop and if-else, this is the number of nested if-else 386 * statements in the loop */ 387 unsigned loop_nesting; 388 389 /* During instruction selection, for inserting control flow */ 390 agx_block *current_block; 391 agx_block *continue_block; 392 agx_block *break_block; 393 agx_block *after_block; 394 395 /* Stats for shader-db */ 396 unsigned loop_count; 397 unsigned spills; 398 unsigned fills; 399} agx_context; 400 401static inline void 402agx_remove_instruction(agx_instr *ins) 403{ 404 list_del(&ins->link); 405} 406 407static inline agx_index 408agx_temp(agx_context *ctx, enum agx_size size) 409{ 410 return agx_get_index(ctx->alloc++, size); 411} 412 413static enum agx_size 414agx_size_for_bits(unsigned bits) 415{ 416 switch (bits) { 417 case 1: 418 case 16: return AGX_SIZE_16; 419 case 32: return AGX_SIZE_32; 420 case 64: return AGX_SIZE_64; 421 default: unreachable("Invalid bitsize"); 422 } 423} 424 425static inline agx_index 426agx_src_index(nir_src *src) 427{ 428 if (!src->is_ssa) { 429 return agx_nir_register(src->reg.reg->index, 430 agx_size_for_bits(nir_src_bit_size(*src))); 431 } 432 433 return agx_get_index(src->ssa->index, 434 agx_size_for_bits(nir_src_bit_size(*src))); 435} 436 437static inline agx_index 438agx_dest_index(nir_dest *dst) 439{ 440 if (!dst->is_ssa) { 441 return agx_nir_register(dst->reg.reg->index, 442 agx_size_for_bits(nir_dest_bit_size(*dst))); 443 } 444 445 return agx_get_index(dst->ssa.index, 446 agx_size_for_bits(nir_dest_bit_size(*dst))); 447} 448 449/* Iterators for AGX IR */ 450 451#define agx_foreach_block(ctx, v) \ 452 list_for_each_entry(agx_block, v, &ctx->blocks, link) 453 454#define agx_foreach_block_rev(ctx, v) \ 455 list_for_each_entry_rev(agx_block, v, &ctx->blocks, link) 456 457#define agx_foreach_block_from(ctx, from, v) \ 458 list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link) 459 460#define agx_foreach_block_from_rev(ctx, from, v) \ 461 list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link) 462 463#define agx_foreach_instr_in_block(block, v) \ 464 list_for_each_entry(agx_instr, v, &(block)->instructions, link) 465 466#define agx_foreach_instr_in_block_rev(block, v) \ 467 list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link) 468 469#define agx_foreach_instr_in_block_safe(block, v) \ 470 list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link) 471 472#define agx_foreach_instr_in_block_safe_rev(block, v) \ 473 list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link) 474 475#define agx_foreach_instr_in_block_from(block, v, from) \ 476 list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link) 477 478#define agx_foreach_instr_in_block_from_rev(block, v, from) \ 479 list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions, link) 480 481#define agx_foreach_instr_global(ctx, v) \ 482 agx_foreach_block(ctx, v_block) \ 483 agx_foreach_instr_in_block(v_block, v) 484 485#define agx_foreach_instr_global_rev(ctx, v) \ 486 agx_foreach_block_rev(ctx, v_block) \ 487 agx_foreach_instr_in_block_rev(v_block, v) 488 489#define agx_foreach_instr_global_safe(ctx, v) \ 490 agx_foreach_block(ctx, v_block) \ 491 agx_foreach_instr_in_block_safe(v_block, v) 492 493#define agx_foreach_instr_global_safe_rev(ctx, v) \ 494 agx_foreach_block_rev(ctx, v_block) \ 495 agx_foreach_instr_in_block_safe_rev(v_block, v) 496 497/* Based on set_foreach, expanded with automatic type casts */ 498 499#define agx_foreach_successor(blk, v) \ 500 agx_block *v; \ 501 agx_block **_v; \ 502 for (_v = (agx_block **) &blk->successors[0], \ 503 v = *_v; \ 504 v != NULL && _v < (agx_block **) &blk->successors[2]; \ 505 _v++, v = *_v) \ 506 507#define agx_foreach_predecessor(blk, v) \ 508 struct set_entry *_entry_##v; \ 509 agx_block *v; \ 510 for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \ 511 v = (agx_block *) (_entry_##v ? _entry_##v->key : NULL); \ 512 _entry_##v != NULL; \ 513 _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \ 514 v = (agx_block *) (_entry_##v ? _entry_##v->key : NULL)) 515 516#define agx_foreach_src(ins, v) \ 517 for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v) 518 519#define agx_foreach_dest(ins, v) \ 520 for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v) 521 522static inline agx_instr * 523agx_prev_op(agx_instr *ins) 524{ 525 return list_last_entry(&(ins->link), agx_instr, link); 526} 527 528static inline agx_instr * 529agx_next_op(agx_instr *ins) 530{ 531 return list_first_entry(&(ins->link), agx_instr, link); 532} 533 534static inline agx_block * 535agx_next_block(agx_block *block) 536{ 537 return list_first_entry(&(block->link), agx_block, link); 538} 539 540static inline agx_block * 541agx_exit_block(agx_context *ctx) 542{ 543 agx_block *last = list_last_entry(&ctx->blocks, agx_block, link); 544 assert(!last->successors[0] && !last->successors[1]); 545 return last; 546} 547 548/* Like in NIR, for use with the builder */ 549 550enum agx_cursor_option { 551 agx_cursor_after_block, 552 agx_cursor_before_instr, 553 agx_cursor_after_instr 554}; 555 556typedef struct { 557 enum agx_cursor_option option; 558 559 union { 560 agx_block *block; 561 agx_instr *instr; 562 }; 563} agx_cursor; 564 565static inline agx_cursor 566agx_after_block(agx_block *block) 567{ 568 return (agx_cursor) { 569 .option = agx_cursor_after_block, 570 .block = block 571 }; 572} 573 574static inline agx_cursor 575agx_before_instr(agx_instr *instr) 576{ 577 return (agx_cursor) { 578 .option = agx_cursor_before_instr, 579 .instr = instr 580 }; 581} 582 583static inline agx_cursor 584agx_after_instr(agx_instr *instr) 585{ 586 return (agx_cursor) { 587 .option = agx_cursor_after_instr, 588 .instr = instr 589 }; 590} 591 592/* IR builder in terms of cursor infrastructure */ 593 594typedef struct { 595 agx_context *shader; 596 agx_cursor cursor; 597} agx_builder; 598 599static inline agx_builder 600agx_init_builder(agx_context *ctx, agx_cursor cursor) 601{ 602 return (agx_builder) { 603 .shader = ctx, 604 .cursor = cursor 605 }; 606} 607 608/* Insert an instruction at the cursor and move the cursor */ 609 610static inline void 611agx_builder_insert(agx_cursor *cursor, agx_instr *I) 612{ 613 switch (cursor->option) { 614 case agx_cursor_after_instr: 615 list_add(&I->link, &cursor->instr->link); 616 cursor->instr = I; 617 return; 618 619 case agx_cursor_after_block: 620 list_addtail(&I->link, &cursor->block->instructions); 621 cursor->option = agx_cursor_after_instr; 622 cursor->instr = I; 623 return; 624 625 case agx_cursor_before_instr: 626 list_addtail(&I->link, &cursor->instr->link); 627 cursor->option = agx_cursor_after_instr; 628 cursor->instr = I; 629 return; 630 } 631 632 unreachable("Invalid cursor option"); 633} 634 635/* Uniform file management */ 636 637agx_index 638agx_indexed_sysval(agx_context *ctx, enum agx_push_type type, enum agx_size size, 639 unsigned index, unsigned length); 640 641/* Routines defined for AIR */ 642 643void agx_print_instr(agx_instr *I, FILE *fp); 644void agx_print_block(agx_block *block, FILE *fp); 645void agx_print_shader(agx_context *ctx, FILE *fp); 646void agx_optimizer(agx_context *ctx); 647void agx_dce(agx_context *ctx); 648void agx_ra(agx_context *ctx); 649void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission); 650 651void agx_compute_liveness(agx_context *ctx); 652void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I); 653 654#endif 655