1/* 2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#ifndef IR3_H_ 25#define IR3_H_ 26 27#include <stdint.h> 28#include <stdbool.h> 29 30#include "compiler/shader_enums.h" 31 32#include "util/bitscan.h" 33#include "util/list.h" 34#include "util/u_debug.h" 35 36#include "instr-a3xx.h" 37 38/* low level intermediate representation of an adreno shader program */ 39 40struct ir3_compiler; 41struct ir3; 42struct ir3_instruction; 43struct ir3_block; 44 45struct ir3_info { 46 uint32_t gpu_id; 47 uint16_t sizedwords; 48 uint16_t instrs_count; /* expanded to account for rpt's */ 49 /* NOTE: max_reg, etc, does not include registers not touched 50 * by the shader (ie. vertex fetched via VFD_DECODE but not 51 * touched by shader) 52 */ 53 int8_t max_reg; /* highest GPR # used by shader */ 54 int8_t max_half_reg; 55 int16_t max_const; 56 57 /* number of sync bits: */ 58 uint16_t ss, sy; 59}; 60 61struct ir3_register { 62 enum { 63 IR3_REG_CONST = 0x001, 64 IR3_REG_IMMED = 0x002, 65 IR3_REG_HALF = 0x004, 66 /* high registers are used for some things in compute shaders, 67 * for example. Seems to be for things that are global to all 68 * threads in a wave, so possibly these are global/shared by 69 * all the threads in the wave? 70 */ 71 IR3_REG_HIGH = 0x008, 72 IR3_REG_RELATIV= 0x010, 73 IR3_REG_R = 0x020, 74 /* Most instructions, it seems, can do float abs/neg but not 75 * integer. The CP pass needs to know what is intended (int or 76 * float) in order to do the right thing. For this reason the 77 * abs/neg flags are split out into float and int variants. In 78 * addition, .b (bitwise) operations, the negate is actually a 79 * bitwise not, so split that out into a new flag to make it 80 * more clear. 81 */ 82 IR3_REG_FNEG = 0x040, 83 IR3_REG_FABS = 0x080, 84 IR3_REG_SNEG = 0x100, 85 IR3_REG_SABS = 0x200, 86 IR3_REG_BNOT = 0x400, 87 IR3_REG_EVEN = 0x800, 88 IR3_REG_POS_INF= 0x1000, 89 /* (ei) flag, end-input? Set on last bary, presumably to signal 90 * that the shader needs no more input: 91 */ 92 IR3_REG_EI = 0x2000, 93 /* meta-flags, for intermediate stages of IR, ie. 94 * before register assignment is done: 95 */ 96 IR3_REG_SSA = 0x4000, /* 'instr' is ptr to assigning instr */ 97 IR3_REG_ARRAY = 0x8000, 98 99 } flags; 100 101 bool merged : 1; /* half-regs conflict with full regs (ie >= a6xx) */ 102 103 /* normal registers: 104 * the component is in the low two bits of the reg #, so 105 * rN.x becomes: (N << 2) | x 106 */ 107 uint16_t num; 108 union { 109 /* immediate: */ 110 int32_t iim_val; 111 uint32_t uim_val; 112 float fim_val; 113 /* relative: */ 114 struct { 115 uint16_t id; 116 int16_t offset; 117 } array; 118 }; 119 120 /* For IR3_REG_SSA, src registers contain ptr back to assigning 121 * instruction. 122 * 123 * For IR3_REG_ARRAY, the pointer is back to the last dependent 124 * array access (although the net effect is the same, it points 125 * back to a previous instruction that we depend on). 126 */ 127 struct ir3_instruction *instr; 128 129 union { 130 /* used for cat5 instructions, but also for internal/IR level 131 * tracking of what registers are read/written by an instruction. 132 * wrmask may be a bad name since it is used to represent both 133 * src and dst that touch multiple adjacent registers. 134 */ 135 unsigned wrmask; 136 /* for relative addressing, 32bits for array size is too small, 137 * but otoh we don't need to deal with disjoint sets, so instead 138 * use a simple size field (number of scalar components). 139 */ 140 unsigned size; 141 }; 142}; 143 144/* 145 * Stupid/simple growable array implementation: 146 */ 147#define DECLARE_ARRAY(type, name) \ 148 unsigned name ## _count, name ## _sz; \ 149 type * name; 150 151#define array_insert(ctx, arr, val) do { \ 152 if (arr ## _count == arr ## _sz) { \ 153 arr ## _sz = MAX2(2 * arr ## _sz, 16); \ 154 arr = reralloc_size(ctx, arr, arr ## _sz * sizeof(arr[0])); \ 155 } \ 156 arr[arr ##_count++] = val; \ 157 } while (0) 158 159struct ir3_instruction { 160 struct ir3_block *block; 161 opc_t opc; 162 enum { 163 /* (sy) flag is set on first instruction, and after sample 164 * instructions (probably just on RAW hazard). 165 */ 166 IR3_INSTR_SY = 0x001, 167 /* (ss) flag is set on first instruction, and first instruction 168 * to depend on the result of "long" instructions (RAW hazard): 169 * 170 * rcp, rsq, log2, exp2, sin, cos, sqrt 171 * 172 * It seems to synchronize until all in-flight instructions are 173 * completed, for example: 174 * 175 * rsq hr1.w, hr1.w 176 * add.f hr2.z, (neg)hr2.z, hc0.y 177 * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y 178 * rsq hr2.x, hr2.x 179 * (rpt1)nop 180 * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w 181 * nop 182 * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w 183 * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w 184 * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x 185 * 186 * The last mul.f does not have (ss) set, presumably because the 187 * (ss) on the previous instruction does the job. 188 * 189 * The blob driver also seems to set it on WAR hazards, although 190 * not really clear if this is needed or just blob compiler being 191 * sloppy. So far I haven't found a case where removing the (ss) 192 * causes problems for WAR hazard, but I could just be getting 193 * lucky: 194 * 195 * rcp r1.y, r3.y 196 * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z 197 * 198 */ 199 IR3_INSTR_SS = 0x002, 200 /* (jp) flag is set on jump targets: 201 */ 202 IR3_INSTR_JP = 0x004, 203 IR3_INSTR_UL = 0x008, 204 IR3_INSTR_3D = 0x010, 205 IR3_INSTR_A = 0x020, 206 IR3_INSTR_O = 0x040, 207 IR3_INSTR_P = 0x080, 208 IR3_INSTR_S = 0x100, 209 IR3_INSTR_S2EN = 0x200, 210 IR3_INSTR_G = 0x400, 211 IR3_INSTR_SAT = 0x800, 212 /* meta-flags, for intermediate stages of IR, ie. 213 * before register assignment is done: 214 */ 215 IR3_INSTR_MARK = 0x1000, 216 IR3_INSTR_UNUSED= 0x2000, 217 } flags; 218 uint8_t repeat; 219 uint8_t nop; 220#ifdef DEBUG 221 unsigned regs_max; 222#endif 223 unsigned regs_count; 224 struct ir3_register **regs; 225 union { 226 struct { 227 char inv; 228 char comp; 229 int immed; 230 struct ir3_block *target; 231 } cat0; 232 struct { 233 type_t src_type, dst_type; 234 } cat1; 235 struct { 236 enum { 237 IR3_COND_LT = 0, 238 IR3_COND_LE = 1, 239 IR3_COND_GT = 2, 240 IR3_COND_GE = 3, 241 IR3_COND_EQ = 4, 242 IR3_COND_NE = 5, 243 } condition; 244 } cat2; 245 struct { 246 unsigned samp, tex; 247 type_t type; 248 } cat5; 249 struct { 250 type_t type; 251 int src_offset; 252 int dst_offset; 253 int iim_val : 3; /* for ldgb/stgb, # of components */ 254 unsigned d : 3; 255 bool typed : 1; 256 } cat6; 257 struct { 258 unsigned w : 1; /* write */ 259 unsigned r : 1; /* read */ 260 unsigned l : 1; /* local */ 261 unsigned g : 1; /* global */ 262 } cat7; 263 /* for meta-instructions, just used to hold extra data 264 * before instruction scheduling, etc 265 */ 266 struct { 267 int off; /* component/offset */ 268 } fo; 269 struct { 270 struct ir3_block *block; 271 } inout; 272 }; 273 274 /* transient values used during various algorithms: */ 275 union { 276 /* The instruction depth is the max dependency distance to output. 277 * 278 * You can also think of it as the "cost", if we did any sort of 279 * optimization for register footprint. Ie. a value that is just 280 * result of moving a const to a reg would have a low cost, so to 281 * it could make sense to duplicate the instruction at various 282 * points where the result is needed to reduce register footprint. 283 */ 284 unsigned depth; 285 /* When we get to the RA stage, we no longer need depth, but 286 * we do need instruction's position/name: 287 */ 288 struct { 289 uint16_t ip; 290 uint16_t name; 291 }; 292 }; 293 294 /* used for per-pass extra instruction data. 295 */ 296 void *data; 297 298 int sun; /* Sethi–Ullman number, used by sched */ 299 int use_count; /* currently just updated/used by cp */ 300 301 /* Used during CP and RA stages. For fanin and shader inputs/ 302 * outputs where we need a sequence of consecutive registers, 303 * keep track of each src instructions left (ie 'n-1') and right 304 * (ie 'n+1') neighbor. The front-end must insert enough mov's 305 * to ensure that each instruction has at most one left and at 306 * most one right neighbor. During the copy-propagation pass, 307 * we only remove mov's when we can preserve this constraint. 308 * And during the RA stage, we use the neighbor information to 309 * allocate a block of registers in one shot. 310 * 311 * TODO: maybe just add something like: 312 * struct ir3_instruction_ref { 313 * struct ir3_instruction *instr; 314 * unsigned cnt; 315 * } 316 * 317 * Or can we get away without the refcnt stuff? It seems like 318 * it should be overkill.. the problem is if, potentially after 319 * already eliminating some mov's, if you have a single mov that 320 * needs to be grouped with it's neighbors in two different 321 * places (ex. shader output and a fanin). 322 */ 323 struct { 324 struct ir3_instruction *left, *right; 325 uint16_t left_cnt, right_cnt; 326 } cp; 327 328 /* an instruction can reference at most one address register amongst 329 * it's src/dst registers. Beyond that, you need to insert mov's. 330 * 331 * NOTE: do not write this directly, use ir3_instr_set_address() 332 */ 333 struct ir3_instruction *address; 334 335 /* Tracking for additional dependent instructions. Used to handle 336 * barriers, WAR hazards for arrays/SSBOs/etc. 337 */ 338 DECLARE_ARRAY(struct ir3_instruction *, deps); 339 340 /* 341 * From PoV of instruction scheduling, not execution (ie. ignores global/ 342 * local distinction): 343 * shared image atomic SSBO everything 344 * barrier()/ - R/W R/W R/W R/W X 345 * groupMemoryBarrier() 346 * memoryBarrier() - R/W R/W 347 * (but only images declared coherent?) 348 * memoryBarrierAtomic() - R/W 349 * memoryBarrierBuffer() - R/W 350 * memoryBarrierImage() - R/W 351 * memoryBarrierShared() - R/W 352 * 353 * TODO I think for SSBO/image/shared, in cases where we can determine 354 * which variable is accessed, we don't need to care about accesses to 355 * different variables (unless declared coherent??) 356 */ 357 enum { 358 IR3_BARRIER_EVERYTHING = 1 << 0, 359 IR3_BARRIER_SHARED_R = 1 << 1, 360 IR3_BARRIER_SHARED_W = 1 << 2, 361 IR3_BARRIER_IMAGE_R = 1 << 3, 362 IR3_BARRIER_IMAGE_W = 1 << 4, 363 IR3_BARRIER_BUFFER_R = 1 << 5, 364 IR3_BARRIER_BUFFER_W = 1 << 6, 365 IR3_BARRIER_ARRAY_R = 1 << 7, 366 IR3_BARRIER_ARRAY_W = 1 << 8, 367 } barrier_class, barrier_conflict; 368 369 /* Entry in ir3_block's instruction list: */ 370 struct list_head node; 371 372#ifdef DEBUG 373 uint32_t serialno; 374#endif 375}; 376 377static inline struct ir3_instruction * 378ir3_neighbor_first(struct ir3_instruction *instr) 379{ 380 int cnt = 0; 381 while (instr->cp.left) { 382 instr = instr->cp.left; 383 if (++cnt > 0xffff) { 384 debug_assert(0); 385 break; 386 } 387 } 388 return instr; 389} 390 391static inline int ir3_neighbor_count(struct ir3_instruction *instr) 392{ 393 int num = 1; 394 395 debug_assert(!instr->cp.left); 396 397 while (instr->cp.right) { 398 num++; 399 instr = instr->cp.right; 400 if (num > 0xffff) { 401 debug_assert(0); 402 break; 403 } 404 } 405 406 return num; 407} 408 409struct ir3 { 410 struct ir3_compiler *compiler; 411 gl_shader_stage type; 412 413 unsigned ninputs, noutputs; 414 struct ir3_instruction **inputs; 415 struct ir3_instruction **outputs; 416 417 /* Track bary.f (and ldlv) instructions.. this is needed in 418 * scheduling to ensure that all varying fetches happen before 419 * any potential kill instructions. The hw gets grumpy if all 420 * threads in a group are killed before the last bary.f gets 421 * a chance to signal end of input (ei). 422 */ 423 DECLARE_ARRAY(struct ir3_instruction *, baryfs); 424 425 /* Track all indirect instructions (read and write). To avoid 426 * deadlock scenario where an address register gets scheduled, 427 * but other dependent src instructions cannot be scheduled due 428 * to dependency on a *different* address register value, the 429 * scheduler needs to ensure that all dependencies other than 430 * the instruction other than the address register are scheduled 431 * before the one that writes the address register. Having a 432 * convenient list of instructions that reference some address 433 * register simplifies this. 434 */ 435 DECLARE_ARRAY(struct ir3_instruction *, indirects); 436 437 /* and same for instructions that consume predicate register: */ 438 DECLARE_ARRAY(struct ir3_instruction *, predicates); 439 440 /* Track texture sample instructions which need texture state 441 * patched in (for astc-srgb workaround): 442 */ 443 DECLARE_ARRAY(struct ir3_instruction *, astc_srgb); 444 445 /* List of blocks: */ 446 struct list_head block_list; 447 448 /* List of ir3_array's: */ 449 struct list_head array_list; 450 451 unsigned max_sun; /* max Sethi–Ullman number */ 452 453#ifdef DEBUG 454 unsigned block_count, instr_count; 455#endif 456}; 457 458struct ir3_array { 459 struct list_head node; 460 unsigned length; 461 unsigned id; 462 463 struct nir_register *r; 464 465 /* To avoid array write's from getting DCE'd, keep track of the 466 * most recent write. Any array access depends on the most 467 * recent write. This way, nothing depends on writes after the 468 * last read. But all the writes that happen before that have 469 * something depending on them 470 */ 471 struct ir3_instruction *last_write; 472 473 /* extra stuff used in RA pass: */ 474 unsigned base; /* base vreg name */ 475 unsigned reg; /* base physical reg */ 476 uint16_t start_ip, end_ip; 477}; 478 479struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id); 480 481struct ir3_block { 482 struct list_head node; 483 struct ir3 *shader; 484 485 const struct nir_block *nblock; 486 487 struct list_head instr_list; /* list of ir3_instruction */ 488 489 /* each block has either one or two successors.. in case of 490 * two successors, 'condition' decides which one to follow. 491 * A block preceding an if/else has two successors. 492 */ 493 struct ir3_instruction *condition; 494 struct ir3_block *successors[2]; 495 496 unsigned predecessors_count; 497 struct ir3_block **predecessors; 498 499 uint16_t start_ip, end_ip; 500 501 /* Track instructions which do not write a register but other- 502 * wise must not be discarded (such as kill, stg, etc) 503 */ 504 DECLARE_ARRAY(struct ir3_instruction *, keeps); 505 506 /* used for per-pass extra block data. Mainly used right 507 * now in RA step to track livein/liveout. 508 */ 509 void *data; 510 511#ifdef DEBUG 512 uint32_t serialno; 513#endif 514}; 515 516static inline uint32_t 517block_id(struct ir3_block *block) 518{ 519#ifdef DEBUG 520 return block->serialno; 521#else 522 return (uint32_t)(unsigned long)block; 523#endif 524} 525 526struct ir3 * ir3_create(struct ir3_compiler *compiler, 527 gl_shader_stage type, unsigned nin, unsigned nout); 528void ir3_destroy(struct ir3 *shader); 529void * ir3_assemble(struct ir3 *shader, 530 struct ir3_info *info, uint32_t gpu_id); 531void * ir3_alloc(struct ir3 *shader, int sz); 532 533struct ir3_block * ir3_block_create(struct ir3 *shader); 534 535struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc); 536struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, 537 opc_t opc, int nreg); 538struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); 539void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep); 540const char *ir3_instr_name(struct ir3_instruction *instr); 541 542struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, 543 int num, int flags); 544struct ir3_register * ir3_reg_clone(struct ir3 *shader, 545 struct ir3_register *reg); 546 547void ir3_instr_set_address(struct ir3_instruction *instr, 548 struct ir3_instruction *addr); 549 550static inline bool ir3_instr_check_mark(struct ir3_instruction *instr) 551{ 552 if (instr->flags & IR3_INSTR_MARK) 553 return true; /* already visited */ 554 instr->flags |= IR3_INSTR_MARK; 555 return false; 556} 557 558void ir3_block_clear_mark(struct ir3_block *block); 559void ir3_clear_mark(struct ir3 *shader); 560 561unsigned ir3_count_instructions(struct ir3 *ir); 562 563static inline int ir3_instr_regno(struct ir3_instruction *instr, 564 struct ir3_register *reg) 565{ 566 unsigned i; 567 for (i = 0; i < instr->regs_count; i++) 568 if (reg == instr->regs[i]) 569 return i; 570 return -1; 571} 572 573 574#define MAX_ARRAYS 16 575 576/* comp: 577 * 0 - x 578 * 1 - y 579 * 2 - z 580 * 3 - w 581 */ 582static inline uint32_t regid(int num, int comp) 583{ 584 return (num << 2) | (comp & 0x3); 585} 586 587static inline uint32_t reg_num(struct ir3_register *reg) 588{ 589 return reg->num >> 2; 590} 591 592static inline uint32_t reg_comp(struct ir3_register *reg) 593{ 594 return reg->num & 0x3; 595} 596 597static inline bool is_flow(struct ir3_instruction *instr) 598{ 599 return (opc_cat(instr->opc) == 0); 600} 601 602static inline bool is_kill(struct ir3_instruction *instr) 603{ 604 return instr->opc == OPC_KILL; 605} 606 607static inline bool is_nop(struct ir3_instruction *instr) 608{ 609 return instr->opc == OPC_NOP; 610} 611 612/* Is it a non-transformative (ie. not type changing) mov? This can 613 * also include absneg.s/absneg.f, which for the most part can be 614 * treated as a mov (single src argument). 615 */ 616static inline bool is_same_type_mov(struct ir3_instruction *instr) 617{ 618 struct ir3_register *dst; 619 620 switch (instr->opc) { 621 case OPC_MOV: 622 if (instr->cat1.src_type != instr->cat1.dst_type) 623 return false; 624 break; 625 case OPC_ABSNEG_F: 626 case OPC_ABSNEG_S: 627 if (instr->flags & IR3_INSTR_SAT) 628 return false; 629 break; 630 default: 631 return false; 632 } 633 634 dst = instr->regs[0]; 635 636 /* mov's that write to a0.x or p0.x are special: */ 637 if (dst->num == regid(REG_P0, 0)) 638 return false; 639 if (dst->num == regid(REG_A0, 0)) 640 return false; 641 642 if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) 643 return false; 644 645 return true; 646} 647 648static inline bool is_alu(struct ir3_instruction *instr) 649{ 650 return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3); 651} 652 653static inline bool is_sfu(struct ir3_instruction *instr) 654{ 655 return (opc_cat(instr->opc) == 4); 656} 657 658static inline bool is_tex(struct ir3_instruction *instr) 659{ 660 return (opc_cat(instr->opc) == 5); 661} 662 663static inline bool is_mem(struct ir3_instruction *instr) 664{ 665 return (opc_cat(instr->opc) == 6); 666} 667 668static inline bool is_barrier(struct ir3_instruction *instr) 669{ 670 return (opc_cat(instr->opc) == 7); 671} 672 673static inline bool 674is_store(struct ir3_instruction *instr) 675{ 676 /* these instructions, the "destination" register is 677 * actually a source, the address to store to. 678 */ 679 switch (instr->opc) { 680 case OPC_STG: 681 case OPC_STGB: 682 case OPC_STIB: 683 case OPC_STP: 684 case OPC_STL: 685 case OPC_STLW: 686 case OPC_L2G: 687 case OPC_G2L: 688 return true; 689 default: 690 return false; 691 } 692} 693 694static inline bool is_load(struct ir3_instruction *instr) 695{ 696 switch (instr->opc) { 697 case OPC_LDG: 698 case OPC_LDGB: 699 case OPC_LDIB: 700 case OPC_LDL: 701 case OPC_LDP: 702 case OPC_L2G: 703 case OPC_LDLW: 704 case OPC_LDC: 705 case OPC_LDLV: 706 /* probably some others too.. */ 707 return true; 708 default: 709 return false; 710 } 711} 712 713static inline bool is_input(struct ir3_instruction *instr) 714{ 715 /* in some cases, ldlv is used to fetch varying without 716 * interpolation.. fortunately inloc is the first src 717 * register in either case 718 */ 719 switch (instr->opc) { 720 case OPC_LDLV: 721 case OPC_BARY_F: 722 return true; 723 default: 724 return false; 725 } 726} 727 728static inline bool is_bool(struct ir3_instruction *instr) 729{ 730 switch (instr->opc) { 731 case OPC_CMPS_F: 732 case OPC_CMPS_S: 733 case OPC_CMPS_U: 734 return true; 735 default: 736 return false; 737 } 738} 739 740static inline bool is_meta(struct ir3_instruction *instr) 741{ 742 /* TODO how should we count PHI (and maybe fan-in/out) which 743 * might actually contribute some instructions to the final 744 * result? 745 */ 746 return (opc_cat(instr->opc) == -1); 747} 748 749static inline unsigned dest_regs(struct ir3_instruction *instr) 750{ 751 if ((instr->regs_count == 0) || is_store(instr)) 752 return 0; 753 754 return util_last_bit(instr->regs[0]->wrmask); 755} 756 757static inline bool writes_addr(struct ir3_instruction *instr) 758{ 759 if (instr->regs_count > 0) { 760 struct ir3_register *dst = instr->regs[0]; 761 return reg_num(dst) == REG_A0; 762 } 763 return false; 764} 765 766static inline bool writes_pred(struct ir3_instruction *instr) 767{ 768 if (instr->regs_count > 0) { 769 struct ir3_register *dst = instr->regs[0]; 770 return reg_num(dst) == REG_P0; 771 } 772 return false; 773} 774 775/* returns defining instruction for reg */ 776/* TODO better name */ 777static inline struct ir3_instruction *ssa(struct ir3_register *reg) 778{ 779 if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) { 780 return reg->instr; 781 } 782 return NULL; 783} 784 785static inline bool conflicts(struct ir3_instruction *a, 786 struct ir3_instruction *b) 787{ 788 return (a && b) && (a != b); 789} 790 791static inline bool reg_gpr(struct ir3_register *r) 792{ 793 if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED)) 794 return false; 795 if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) 796 return false; 797 return true; 798} 799 800static inline type_t half_type(type_t type) 801{ 802 switch (type) { 803 case TYPE_F32: return TYPE_F16; 804 case TYPE_U32: return TYPE_U16; 805 case TYPE_S32: return TYPE_S16; 806 case TYPE_F16: 807 case TYPE_U16: 808 case TYPE_S16: 809 return type; 810 default: 811 assert(0); 812 return ~0; 813 } 814} 815 816/* some cat2 instructions (ie. those which are not float) can embed an 817 * immediate: 818 */ 819static inline bool ir3_cat2_int(opc_t opc) 820{ 821 switch (opc) { 822 case OPC_ADD_U: 823 case OPC_ADD_S: 824 case OPC_SUB_U: 825 case OPC_SUB_S: 826 case OPC_CMPS_U: 827 case OPC_CMPS_S: 828 case OPC_MIN_U: 829 case OPC_MIN_S: 830 case OPC_MAX_U: 831 case OPC_MAX_S: 832 case OPC_CMPV_U: 833 case OPC_CMPV_S: 834 case OPC_MUL_U: 835 case OPC_MUL_S: 836 case OPC_MULL_U: 837 case OPC_CLZ_S: 838 case OPC_ABSNEG_S: 839 case OPC_AND_B: 840 case OPC_OR_B: 841 case OPC_NOT_B: 842 case OPC_XOR_B: 843 case OPC_BFREV_B: 844 case OPC_CLZ_B: 845 case OPC_SHL_B: 846 case OPC_SHR_B: 847 case OPC_ASHR_B: 848 case OPC_MGEN_B: 849 case OPC_GETBIT_B: 850 case OPC_CBITS_B: 851 case OPC_BARY_F: 852 return true; 853 854 default: 855 return false; 856 } 857} 858 859 860/* map cat2 instruction to valid abs/neg flags: */ 861static inline unsigned ir3_cat2_absneg(opc_t opc) 862{ 863 switch (opc) { 864 case OPC_ADD_F: 865 case OPC_MIN_F: 866 case OPC_MAX_F: 867 case OPC_MUL_F: 868 case OPC_SIGN_F: 869 case OPC_CMPS_F: 870 case OPC_ABSNEG_F: 871 case OPC_CMPV_F: 872 case OPC_FLOOR_F: 873 case OPC_CEIL_F: 874 case OPC_RNDNE_F: 875 case OPC_RNDAZ_F: 876 case OPC_TRUNC_F: 877 case OPC_BARY_F: 878 return IR3_REG_FABS | IR3_REG_FNEG; 879 880 case OPC_ADD_U: 881 case OPC_ADD_S: 882 case OPC_SUB_U: 883 case OPC_SUB_S: 884 case OPC_CMPS_U: 885 case OPC_CMPS_S: 886 case OPC_MIN_U: 887 case OPC_MIN_S: 888 case OPC_MAX_U: 889 case OPC_MAX_S: 890 case OPC_CMPV_U: 891 case OPC_CMPV_S: 892 case OPC_MUL_U: 893 case OPC_MUL_S: 894 case OPC_MULL_U: 895 case OPC_CLZ_S: 896 return 0; 897 898 case OPC_ABSNEG_S: 899 return IR3_REG_SABS | IR3_REG_SNEG; 900 901 case OPC_AND_B: 902 case OPC_OR_B: 903 case OPC_NOT_B: 904 case OPC_XOR_B: 905 case OPC_BFREV_B: 906 case OPC_CLZ_B: 907 case OPC_SHL_B: 908 case OPC_SHR_B: 909 case OPC_ASHR_B: 910 case OPC_MGEN_B: 911 case OPC_GETBIT_B: 912 case OPC_CBITS_B: 913 return IR3_REG_BNOT; 914 915 default: 916 return 0; 917 } 918} 919 920/* map cat3 instructions to valid abs/neg flags: */ 921static inline unsigned ir3_cat3_absneg(opc_t opc) 922{ 923 switch (opc) { 924 case OPC_MAD_F16: 925 case OPC_MAD_F32: 926 case OPC_SEL_F16: 927 case OPC_SEL_F32: 928 return IR3_REG_FNEG; 929 930 case OPC_MAD_U16: 931 case OPC_MADSH_U16: 932 case OPC_MAD_S16: 933 case OPC_MADSH_M16: 934 case OPC_MAD_U24: 935 case OPC_MAD_S24: 936 case OPC_SEL_S16: 937 case OPC_SEL_S32: 938 case OPC_SAD_S16: 939 case OPC_SAD_S32: 940 /* neg *may* work on 3rd src.. */ 941 942 case OPC_SEL_B16: 943 case OPC_SEL_B32: 944 945 default: 946 return 0; 947 } 948} 949 950#define MASK(n) ((1 << (n)) - 1) 951 952/* iterator for an instructions's sources (reg), also returns src #: */ 953#define foreach_src_n(__srcreg, __n, __instr) \ 954 if ((__instr)->regs_count) \ 955 for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \ 956 if ((__srcreg = (__instr)->regs[__n + 1])) 957 958/* iterator for an instructions's sources (reg): */ 959#define foreach_src(__srcreg, __instr) \ 960 foreach_src_n(__srcreg, __i, __instr) 961 962static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr) 963{ 964 unsigned cnt = instr->regs_count + instr->deps_count; 965 if (instr->address) 966 cnt++; 967 return cnt; 968} 969 970static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n) 971{ 972 if (n == (instr->regs_count + instr->deps_count)) 973 return instr->address; 974 if (n >= instr->regs_count) 975 return instr->deps[n - instr->regs_count]; 976 return ssa(instr->regs[n]); 977} 978 979static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n) 980{ 981 if (n == (instr->regs_count + instr->deps_count)) 982 return false; 983 if (n >= instr->regs_count) 984 return true; 985 return false; 986} 987 988#define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1) 989 990/* iterator for an instruction's SSA sources (instr), also returns src #: */ 991#define foreach_ssa_src_n(__srcinst, __n, __instr) \ 992 for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \ 993 if ((__srcinst = __ssa_src_n(__instr, __n))) 994 995/* iterator for an instruction's SSA sources (instr): */ 996#define foreach_ssa_src(__srcinst, __instr) \ 997 foreach_ssa_src_n(__srcinst, __i, __instr) 998 999 1000/* dump: */ 1001void ir3_print(struct ir3 *ir); 1002void ir3_print_instr(struct ir3_instruction *instr); 1003 1004/* depth calculation: */ 1005int ir3_delayslots(struct ir3_instruction *assigner, 1006 struct ir3_instruction *consumer, unsigned n); 1007void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list); 1008void ir3_depth(struct ir3 *ir); 1009 1010/* copy-propagate: */ 1011struct ir3_shader_variant; 1012void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so); 1013 1014/* group neighbors and insert mov's to resolve conflicts: */ 1015void ir3_group(struct ir3 *ir); 1016 1017/* Sethi–Ullman numbering: */ 1018void ir3_sun(struct ir3 *ir); 1019 1020/* scheduling: */ 1021void ir3_sched_add_deps(struct ir3 *ir); 1022int ir3_sched(struct ir3 *ir); 1023 1024void ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so); 1025 1026/* register assignment: */ 1027struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler); 1028int ir3_ra(struct ir3 *ir3, gl_shader_stage type, 1029 bool frag_coord, bool frag_face); 1030 1031/* legalize: */ 1032void ir3_legalize(struct ir3 *ir, bool *has_ssbo, bool *need_pixlod, int *max_bary); 1033 1034/* ************************************************************************* */ 1035/* instruction helpers */ 1036 1037static inline struct ir3_instruction * 1038create_immed_typed(struct ir3_block *block, uint32_t val, type_t type) 1039{ 1040 struct ir3_instruction *mov; 1041 unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0; 1042 1043 mov = ir3_instr_create(block, OPC_MOV); 1044 mov->cat1.src_type = type; 1045 mov->cat1.dst_type = type; 1046 ir3_reg_create(mov, 0, flags); 1047 ir3_reg_create(mov, 0, IR3_REG_IMMED)->uim_val = val; 1048 1049 return mov; 1050} 1051 1052static inline struct ir3_instruction * 1053create_immed(struct ir3_block *block, uint32_t val) 1054{ 1055 return create_immed_typed(block, val, TYPE_U32); 1056} 1057 1058static inline struct ir3_instruction * 1059create_uniform(struct ir3_block *block, unsigned n) 1060{ 1061 struct ir3_instruction *mov; 1062 1063 mov = ir3_instr_create(block, OPC_MOV); 1064 /* TODO get types right? */ 1065 mov->cat1.src_type = TYPE_F32; 1066 mov->cat1.dst_type = TYPE_F32; 1067 ir3_reg_create(mov, 0, 0); 1068 ir3_reg_create(mov, n, IR3_REG_CONST); 1069 1070 return mov; 1071} 1072 1073static inline struct ir3_instruction * 1074create_uniform_indirect(struct ir3_block *block, int n, 1075 struct ir3_instruction *address) 1076{ 1077 struct ir3_instruction *mov; 1078 1079 mov = ir3_instr_create(block, OPC_MOV); 1080 mov->cat1.src_type = TYPE_U32; 1081 mov->cat1.dst_type = TYPE_U32; 1082 ir3_reg_create(mov, 0, 0); 1083 ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n; 1084 1085 ir3_instr_set_address(mov, address); 1086 1087 return mov; 1088} 1089 1090/* creates SSA src of correct type (ie. half vs full precision) */ 1091static inline struct ir3_register * __ssa_src(struct ir3_instruction *instr, 1092 struct ir3_instruction *src, unsigned flags) 1093{ 1094 struct ir3_register *reg; 1095 if (src->regs[0]->flags & IR3_REG_HALF) 1096 flags |= IR3_REG_HALF; 1097 reg = ir3_reg_create(instr, 0, IR3_REG_SSA | flags); 1098 reg->instr = src; 1099 reg->wrmask = src->regs[0]->wrmask; 1100 return reg; 1101} 1102 1103static inline struct ir3_instruction * 1104ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) 1105{ 1106 struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); 1107 ir3_reg_create(instr, 0, 0); /* dst */ 1108 if (src->regs[0]->flags & IR3_REG_ARRAY) { 1109 struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY); 1110 src_reg->array = src->regs[0]->array; 1111 } else { 1112 __ssa_src(instr, src, src->regs[0]->flags & IR3_REG_HIGH); 1113 } 1114 debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV)); 1115 instr->cat1.src_type = type; 1116 instr->cat1.dst_type = type; 1117 return instr; 1118} 1119 1120static inline struct ir3_instruction * 1121ir3_COV(struct ir3_block *block, struct ir3_instruction *src, 1122 type_t src_type, type_t dst_type) 1123{ 1124 struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); 1125 unsigned dst_flags = (type_size(dst_type) < 32) ? IR3_REG_HALF : 0; 1126 unsigned src_flags = (type_size(src_type) < 32) ? IR3_REG_HALF : 0; 1127 1128 debug_assert((src->regs[0]->flags & IR3_REG_HALF) == src_flags); 1129 1130 ir3_reg_create(instr, 0, dst_flags); /* dst */ 1131 __ssa_src(instr, src, 0); 1132 instr->cat1.src_type = src_type; 1133 instr->cat1.dst_type = dst_type; 1134 debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY)); 1135 return instr; 1136} 1137 1138static inline struct ir3_instruction * 1139ir3_NOP(struct ir3_block *block) 1140{ 1141 return ir3_instr_create(block, OPC_NOP); 1142} 1143 1144#define IR3_INSTR_0 0 1145 1146#define __INSTR0(flag, name, opc) \ 1147static inline struct ir3_instruction * \ 1148ir3_##name(struct ir3_block *block) \ 1149{ \ 1150 struct ir3_instruction *instr = \ 1151 ir3_instr_create(block, opc); \ 1152 instr->flags |= flag; \ 1153 return instr; \ 1154} 1155#define INSTR0F(f, name) __INSTR0(IR3_INSTR_##f, name##_##f, OPC_##name) 1156#define INSTR0(name) __INSTR0(0, name, OPC_##name) 1157 1158#define __INSTR1(flag, name, opc) \ 1159static inline struct ir3_instruction * \ 1160ir3_##name(struct ir3_block *block, \ 1161 struct ir3_instruction *a, unsigned aflags) \ 1162{ \ 1163 struct ir3_instruction *instr = \ 1164 ir3_instr_create(block, opc); \ 1165 ir3_reg_create(instr, 0, 0); /* dst */ \ 1166 __ssa_src(instr, a, aflags); \ 1167 instr->flags |= flag; \ 1168 return instr; \ 1169} 1170#define INSTR1F(f, name) __INSTR1(IR3_INSTR_##f, name##_##f, OPC_##name) 1171#define INSTR1(name) __INSTR1(0, name, OPC_##name) 1172 1173#define __INSTR2(flag, name, opc) \ 1174static inline struct ir3_instruction * \ 1175ir3_##name(struct ir3_block *block, \ 1176 struct ir3_instruction *a, unsigned aflags, \ 1177 struct ir3_instruction *b, unsigned bflags) \ 1178{ \ 1179 struct ir3_instruction *instr = \ 1180 ir3_instr_create(block, opc); \ 1181 ir3_reg_create(instr, 0, 0); /* dst */ \ 1182 __ssa_src(instr, a, aflags); \ 1183 __ssa_src(instr, b, bflags); \ 1184 instr->flags |= flag; \ 1185 return instr; \ 1186} 1187#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, name##_##f, OPC_##name) 1188#define INSTR2(name) __INSTR2(0, name, OPC_##name) 1189 1190#define __INSTR3(flag, name, opc) \ 1191static inline struct ir3_instruction * \ 1192ir3_##name(struct ir3_block *block, \ 1193 struct ir3_instruction *a, unsigned aflags, \ 1194 struct ir3_instruction *b, unsigned bflags, \ 1195 struct ir3_instruction *c, unsigned cflags) \ 1196{ \ 1197 struct ir3_instruction *instr = \ 1198 ir3_instr_create2(block, opc, 4); \ 1199 ir3_reg_create(instr, 0, 0); /* dst */ \ 1200 __ssa_src(instr, a, aflags); \ 1201 __ssa_src(instr, b, bflags); \ 1202 __ssa_src(instr, c, cflags); \ 1203 instr->flags |= flag; \ 1204 return instr; \ 1205} 1206#define INSTR3F(f, name) __INSTR3(IR3_INSTR_##f, name##_##f, OPC_##name) 1207#define INSTR3(name) __INSTR3(0, name, OPC_##name) 1208 1209#define __INSTR4(flag, name, opc) \ 1210static inline struct ir3_instruction * \ 1211ir3_##name(struct ir3_block *block, \ 1212 struct ir3_instruction *a, unsigned aflags, \ 1213 struct ir3_instruction *b, unsigned bflags, \ 1214 struct ir3_instruction *c, unsigned cflags, \ 1215 struct ir3_instruction *d, unsigned dflags) \ 1216{ \ 1217 struct ir3_instruction *instr = \ 1218 ir3_instr_create2(block, opc, 5); \ 1219 ir3_reg_create(instr, 0, 0); /* dst */ \ 1220 __ssa_src(instr, a, aflags); \ 1221 __ssa_src(instr, b, bflags); \ 1222 __ssa_src(instr, c, cflags); \ 1223 __ssa_src(instr, d, dflags); \ 1224 instr->flags |= flag; \ 1225 return instr; \ 1226} 1227#define INSTR4F(f, name) __INSTR4(IR3_INSTR_##f, name##_##f, OPC_##name) 1228#define INSTR4(name) __INSTR4(0, name, OPC_##name) 1229 1230/* cat0 instructions: */ 1231INSTR0(BR) 1232INSTR0(JUMP) 1233INSTR1(KILL) 1234INSTR0(END) 1235 1236/* cat2 instructions, most 2 src but some 1 src: */ 1237INSTR2(ADD_F) 1238INSTR2(MIN_F) 1239INSTR2(MAX_F) 1240INSTR2(MUL_F) 1241INSTR1(SIGN_F) 1242INSTR2(CMPS_F) 1243INSTR1(ABSNEG_F) 1244INSTR2(CMPV_F) 1245INSTR1(FLOOR_F) 1246INSTR1(CEIL_F) 1247INSTR1(RNDNE_F) 1248INSTR1(RNDAZ_F) 1249INSTR1(TRUNC_F) 1250INSTR2(ADD_U) 1251INSTR2(ADD_S) 1252INSTR2(SUB_U) 1253INSTR2(SUB_S) 1254INSTR2(CMPS_U) 1255INSTR2(CMPS_S) 1256INSTR2(MIN_U) 1257INSTR2(MIN_S) 1258INSTR2(MAX_U) 1259INSTR2(MAX_S) 1260INSTR1(ABSNEG_S) 1261INSTR2(AND_B) 1262INSTR2(OR_B) 1263INSTR1(NOT_B) 1264INSTR2(XOR_B) 1265INSTR2(CMPV_U) 1266INSTR2(CMPV_S) 1267INSTR2(MUL_U) 1268INSTR2(MUL_S) 1269INSTR2(MULL_U) 1270INSTR1(BFREV_B) 1271INSTR1(CLZ_S) 1272INSTR1(CLZ_B) 1273INSTR2(SHL_B) 1274INSTR2(SHR_B) 1275INSTR2(ASHR_B) 1276INSTR2(BARY_F) 1277INSTR2(MGEN_B) 1278INSTR2(GETBIT_B) 1279INSTR1(SETRM) 1280INSTR1(CBITS_B) 1281INSTR2(SHB) 1282INSTR2(MSAD) 1283 1284/* cat3 instructions: */ 1285INSTR3(MAD_U16) 1286INSTR3(MADSH_U16) 1287INSTR3(MAD_S16) 1288INSTR3(MADSH_M16) 1289INSTR3(MAD_U24) 1290INSTR3(MAD_S24) 1291INSTR3(MAD_F16) 1292INSTR3(MAD_F32) 1293INSTR3(SEL_B16) 1294INSTR3(SEL_B32) 1295INSTR3(SEL_S16) 1296INSTR3(SEL_S32) 1297INSTR3(SEL_F16) 1298INSTR3(SEL_F32) 1299INSTR3(SAD_S16) 1300INSTR3(SAD_S32) 1301 1302/* cat4 instructions: */ 1303INSTR1(RCP) 1304INSTR1(RSQ) 1305INSTR1(LOG2) 1306INSTR1(EXP2) 1307INSTR1(SIN) 1308INSTR1(COS) 1309INSTR1(SQRT) 1310 1311/* cat5 instructions: */ 1312INSTR1(DSX) 1313INSTR1(DSY) 1314INSTR1F(3D, DSX) 1315INSTR1F(3D, DSY) 1316INSTR1(RGETPOS) 1317 1318static inline struct ir3_instruction * 1319ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, 1320 unsigned wrmask, unsigned flags, struct ir3_instruction *samp_tex, 1321 struct ir3_instruction *src0, struct ir3_instruction *src1) 1322{ 1323 struct ir3_instruction *sam; 1324 struct ir3_register *reg; 1325 1326 sam = ir3_instr_create(block, opc); 1327 sam->flags |= flags | IR3_INSTR_S2EN; 1328 ir3_reg_create(sam, 0, 0)->wrmask = wrmask; 1329 __ssa_src(sam, samp_tex, IR3_REG_HALF); 1330 if (src0) { 1331 reg = ir3_reg_create(sam, 0, IR3_REG_SSA); 1332 reg->wrmask = (1 << (src0->regs_count - 1)) - 1; 1333 reg->instr = src0; 1334 } 1335 if (src1) { 1336 reg = ir3_reg_create(sam, 0, IR3_REG_SSA); 1337 reg->instr = src1; 1338 reg->wrmask = (1 << (src1->regs_count - 1)) - 1; 1339 } 1340 sam->cat5.type = type; 1341 1342 return sam; 1343} 1344 1345/* cat6 instructions: */ 1346INSTR2(LDLV) 1347INSTR2(LDG) 1348INSTR2(LDL) 1349INSTR3(STG) 1350INSTR3(STL) 1351INSTR1(RESINFO) 1352INSTR1(RESFMT) 1353INSTR2(ATOMIC_ADD) 1354INSTR2(ATOMIC_SUB) 1355INSTR2(ATOMIC_XCHG) 1356INSTR2(ATOMIC_INC) 1357INSTR2(ATOMIC_DEC) 1358INSTR2(ATOMIC_CMPXCHG) 1359INSTR2(ATOMIC_MIN) 1360INSTR2(ATOMIC_MAX) 1361INSTR2(ATOMIC_AND) 1362INSTR2(ATOMIC_OR) 1363INSTR2(ATOMIC_XOR) 1364#if GPU >= 600 1365INSTR3(STIB); 1366INSTR2(LDIB); 1367INSTR3F(G, ATOMIC_ADD) 1368INSTR3F(G, ATOMIC_SUB) 1369INSTR3F(G, ATOMIC_XCHG) 1370INSTR3F(G, ATOMIC_INC) 1371INSTR3F(G, ATOMIC_DEC) 1372INSTR3F(G, ATOMIC_CMPXCHG) 1373INSTR3F(G, ATOMIC_MIN) 1374INSTR3F(G, ATOMIC_MAX) 1375INSTR3F(G, ATOMIC_AND) 1376INSTR3F(G, ATOMIC_OR) 1377INSTR3F(G, ATOMIC_XOR) 1378#elif GPU >= 400 1379INSTR3(LDGB) 1380INSTR4(STGB) 1381INSTR4(STIB) 1382INSTR4F(G, ATOMIC_ADD) 1383INSTR4F(G, ATOMIC_SUB) 1384INSTR4F(G, ATOMIC_XCHG) 1385INSTR4F(G, ATOMIC_INC) 1386INSTR4F(G, ATOMIC_DEC) 1387INSTR4F(G, ATOMIC_CMPXCHG) 1388INSTR4F(G, ATOMIC_MIN) 1389INSTR4F(G, ATOMIC_MAX) 1390INSTR4F(G, ATOMIC_AND) 1391INSTR4F(G, ATOMIC_OR) 1392INSTR4F(G, ATOMIC_XOR) 1393#endif 1394 1395/* cat7 instructions: */ 1396INSTR0(BAR) 1397INSTR0(FENCE) 1398 1399/* ************************************************************************* */ 1400/* split this out or find some helper to use.. like main/bitset.h.. */ 1401 1402#include <string.h> 1403 1404#define MAX_REG 256 1405 1406typedef uint8_t regmask_t[2 * MAX_REG / 8]; 1407 1408static inline unsigned regmask_idx(struct ir3_register *reg) 1409{ 1410 unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num; 1411 debug_assert(num < MAX_REG); 1412 if (reg->flags & IR3_REG_HALF) { 1413 if (reg->merged) { 1414 num /= 2; 1415 } else { 1416 num += MAX_REG; 1417 } 1418 } 1419 return num; 1420} 1421 1422static inline void regmask_init(regmask_t *regmask) 1423{ 1424 memset(regmask, 0, sizeof(*regmask)); 1425} 1426 1427static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) 1428{ 1429 unsigned idx = regmask_idx(reg); 1430 if (reg->flags & IR3_REG_RELATIV) { 1431 unsigned i; 1432 for (i = 0; i < reg->size; i++, idx++) 1433 (*regmask)[idx / 8] |= 1 << (idx % 8); 1434 } else { 1435 unsigned mask; 1436 for (mask = reg->wrmask; mask; mask >>= 1, idx++) 1437 if (mask & 1) 1438 (*regmask)[idx / 8] |= 1 << (idx % 8); 1439 } 1440} 1441 1442static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b) 1443{ 1444 unsigned i; 1445 for (i = 0; i < ARRAY_SIZE(*dst); i++) 1446 (*dst)[i] = (*a)[i] | (*b)[i]; 1447} 1448 1449/* set bits in a if not set in b, conceptually: 1450 * a |= (reg & ~b) 1451 */ 1452static inline void regmask_set_if_not(regmask_t *a, 1453 struct ir3_register *reg, regmask_t *b) 1454{ 1455 unsigned idx = regmask_idx(reg); 1456 if (reg->flags & IR3_REG_RELATIV) { 1457 unsigned i; 1458 for (i = 0; i < reg->size; i++, idx++) 1459 if (!((*b)[idx / 8] & (1 << (idx % 8)))) 1460 (*a)[idx / 8] |= 1 << (idx % 8); 1461 } else { 1462 unsigned mask; 1463 for (mask = reg->wrmask; mask; mask >>= 1, idx++) 1464 if (mask & 1) 1465 if (!((*b)[idx / 8] & (1 << (idx % 8)))) 1466 (*a)[idx / 8] |= 1 << (idx % 8); 1467 } 1468} 1469 1470static inline bool regmask_get(regmask_t *regmask, 1471 struct ir3_register *reg) 1472{ 1473 unsigned idx = regmask_idx(reg); 1474 if (reg->flags & IR3_REG_RELATIV) { 1475 unsigned i; 1476 for (i = 0; i < reg->size; i++, idx++) 1477 if ((*regmask)[idx / 8] & (1 << (idx % 8))) 1478 return true; 1479 } else { 1480 unsigned mask; 1481 for (mask = reg->wrmask; mask; mask >>= 1, idx++) 1482 if (mask & 1) 1483 if ((*regmask)[idx / 8] & (1 << (idx % 8))) 1484 return true; 1485 } 1486 return false; 1487} 1488 1489/* ************************************************************************* */ 1490 1491#endif /* IR3_H_ */ 1492