1/* 2 * Copyright (C) 2019 Google, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "util/dag.h" 28#include "util/u_math.h" 29 30#include "ir3.h" 31#include "ir3_compiler.h" 32#include "ir3_context.h" 33 34#ifdef DEBUG 35#define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS) 36#else 37#define SCHED_DEBUG 0 38#endif 39#define d(fmt, ...) \ 40 do { \ 41 if (SCHED_DEBUG) { \ 42 mesa_logi("PSCHED: " fmt, ##__VA_ARGS__); \ 43 } \ 44 } while (0) 45 46#define di(instr, fmt, ...) \ 47 do { \ 48 if (SCHED_DEBUG) { \ 49 struct log_stream *stream = mesa_log_streami(); \ 50 mesa_log_stream_printf(stream, "PSCHED: " fmt ": ", ##__VA_ARGS__); \ 51 ir3_print_instr_stream(stream, instr); \ 52 mesa_log_stream_destroy(stream); \ 53 } \ 54 } while (0) 55 56/* 57 * Post RA Instruction Scheduling 58 */ 59 60struct ir3_postsched_ctx { 61 struct ir3 *ir; 62 63 struct ir3_shader_variant *v; 64 65 void *mem_ctx; 66 struct ir3_block *block; /* the current block */ 67 struct dag *dag; 68 69 struct list_head unscheduled_list; /* unscheduled instructions */ 70 71 int sfu_delay; 72 int tex_delay; 73}; 74 75struct ir3_postsched_node { 76 struct dag_node dag; /* must be first for util_dynarray_foreach */ 77 struct ir3_instruction *instr; 78 bool partially_evaluated_path; 79 80 bool has_tex_src, has_sfu_src; 81 82 unsigned delay; 83 unsigned max_delay; 84}; 85 86#define foreach_sched_node(__n, __list) \ 87 list_for_each_entry (struct ir3_postsched_node, __n, __list, dag.link) 88 89static bool 90has_tex_src(struct ir3_instruction *instr) 91{ 92 struct ir3_postsched_node *node = instr->data; 93 return node->has_tex_src; 94} 95 96static bool 97has_sfu_src(struct ir3_instruction *instr) 98{ 99 struct ir3_postsched_node *node = instr->data; 100 return node->has_sfu_src; 101} 102 103static void 104schedule(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr) 105{ 106 debug_assert(ctx->block == instr->block); 107 108 /* remove from unscheduled_list: 109 */ 110 list_delinit(&instr->node); 111 112 di(instr, "schedule"); 113 114 list_addtail(&instr->node, &instr->block->instr_list); 115 116 struct ir3_postsched_node *n = instr->data; 117 dag_prune_head(ctx->dag, &n->dag); 118 119 if (is_meta(instr) && (instr->opc != OPC_META_TEX_PREFETCH)) 120 return; 121 122 if (is_sfu(instr)) { 123 ctx->sfu_delay = 8; 124 } else if (has_sfu_src(instr)) { 125 ctx->sfu_delay = 0; 126 } else if (ctx->sfu_delay > 0) { 127 ctx->sfu_delay--; 128 } 129 130 if (is_tex_or_prefetch(instr)) { 131 ctx->tex_delay = 10; 132 } else if (has_tex_src(instr)) { 133 ctx->tex_delay = 0; 134 } else if (ctx->tex_delay > 0) { 135 ctx->tex_delay--; 136 } 137} 138 139static void 140dump_state(struct ir3_postsched_ctx *ctx) 141{ 142 if (!SCHED_DEBUG) 143 return; 144 145 foreach_sched_node (n, &ctx->dag->heads) { 146 di(n->instr, "maxdel=%3d ", n->max_delay); 147 148 util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) { 149 struct ir3_postsched_node *child = 150 (struct ir3_postsched_node *)edge->child; 151 152 di(child->instr, " -> (%d parents) ", child->dag.parent_count); 153 } 154 } 155} 156 157/* Determine if this is an instruction that we'd prefer not to schedule 158 * yet, in order to avoid an (ss) sync. This is limited by the sfu_delay 159 * counter, ie. the more cycles it has been since the last SFU, the less 160 * costly a sync would be. 161 */ 162static bool 163would_sync(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr) 164{ 165 if (ctx->sfu_delay) { 166 if (has_sfu_src(instr)) 167 return true; 168 } 169 170 if (ctx->tex_delay) { 171 if (has_tex_src(instr)) 172 return true; 173 } 174 175 return false; 176} 177 178/* find instruction to schedule: */ 179static struct ir3_instruction * 180choose_instr(struct ir3_postsched_ctx *ctx) 181{ 182 struct ir3_postsched_node *chosen = NULL; 183 184 dump_state(ctx); 185 186 foreach_sched_node (n, &ctx->dag->heads) { 187 if (!is_meta(n->instr)) 188 continue; 189 190 if (!chosen || (chosen->max_delay < n->max_delay)) 191 chosen = n; 192 } 193 194 if (chosen) { 195 di(chosen->instr, "prio: chose (meta)"); 196 return chosen->instr; 197 } 198 199 /* Try to schedule inputs with a higher priority, if possible, as 200 * the last bary.f unlocks varying storage to unblock more VS 201 * warps. 202 */ 203 foreach_sched_node (n, &ctx->dag->heads) { 204 if (!is_input(n->instr)) 205 continue; 206 207 if (!chosen || (chosen->max_delay < n->max_delay)) 208 chosen = n; 209 } 210 211 if (chosen) { 212 di(chosen->instr, "prio: chose (input)"); 213 return chosen->instr; 214 } 215 216 /* Next prioritize discards: */ 217 foreach_sched_node (n, &ctx->dag->heads) { 218 unsigned d = 219 ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs); 220 221 if (d > 0) 222 continue; 223 224 if (!is_kill_or_demote(n->instr)) 225 continue; 226 227 if (!chosen || (chosen->max_delay < n->max_delay)) 228 chosen = n; 229 } 230 231 if (chosen) { 232 di(chosen->instr, "csp: chose (kill, hard ready)"); 233 return chosen->instr; 234 } 235 236 /* Next prioritize expensive instructions: */ 237 foreach_sched_node (n, &ctx->dag->heads) { 238 unsigned d = 239 ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs); 240 241 if (d > 0) 242 continue; 243 244 if (!(is_sfu(n->instr) || is_tex(n->instr))) 245 continue; 246 247 if (!chosen || (chosen->max_delay < n->max_delay)) 248 chosen = n; 249 } 250 251 if (chosen) { 252 di(chosen->instr, "csp: chose (sfu/tex, hard ready)"); 253 return chosen->instr; 254 } 255 256 /* 257 * Sometimes be better to take a nop, rather than scheduling an 258 * instruction that would require an (ss) shortly after another 259 * SFU.. ie. if last SFU was just one or two instr ago, and we 260 * could choose between taking a nop and then scheduling 261 * something else, vs scheduling the immed avail instruction that 262 * would require (ss), we are better with the nop. 263 */ 264 for (unsigned delay = 0; delay < 4; delay++) { 265 foreach_sched_node (n, &ctx->dag->heads) { 266 if (would_sync(ctx, n->instr)) 267 continue; 268 269 unsigned d = ir3_delay_calc_postra(ctx->block, n->instr, true, 270 ctx->v->mergedregs); 271 272 if (d > delay) 273 continue; 274 275 if (!chosen || (chosen->max_delay < n->max_delay)) 276 chosen = n; 277 } 278 279 if (chosen) { 280 di(chosen->instr, "csp: chose (soft ready, delay=%u)", delay); 281 return chosen->instr; 282 } 283 } 284 285 /* Next try to find a ready leader w/ soft delay (ie. including extra 286 * delay for things like tex fetch which can be synchronized w/ sync 287 * bit (but we probably do want to schedule some other instructions 288 * while we wait) 289 */ 290 foreach_sched_node (n, &ctx->dag->heads) { 291 unsigned d = 292 ir3_delay_calc_postra(ctx->block, n->instr, true, ctx->v->mergedregs); 293 294 if (d > 0) 295 continue; 296 297 if (!chosen || (chosen->max_delay < n->max_delay)) 298 chosen = n; 299 } 300 301 if (chosen) { 302 di(chosen->instr, "csp: chose (soft ready)"); 303 return chosen->instr; 304 } 305 306 /* Next try to find a ready leader that can be scheduled without nop's, 307 * which in the case of things that need (sy)/(ss) could result in 308 * stalls.. but we've already decided there is not a better option. 309 */ 310 foreach_sched_node (n, &ctx->dag->heads) { 311 unsigned d = 312 ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs); 313 314 if (d > 0) 315 continue; 316 317 if (!chosen || (chosen->max_delay < n->max_delay)) 318 chosen = n; 319 } 320 321 if (chosen) { 322 di(chosen->instr, "csp: chose (hard ready)"); 323 return chosen->instr; 324 } 325 326 /* Otherwise choose leader with maximum cost: 327 * 328 * TODO should we try to balance cost and delays? I guess it is 329 * a balance between now-nop's and future-nop's? 330 */ 331 foreach_sched_node (n, &ctx->dag->heads) { 332 if (!chosen || chosen->max_delay < n->max_delay) 333 chosen = n; 334 } 335 336 if (chosen) { 337 di(chosen->instr, "csp: chose (leader)"); 338 return chosen->instr; 339 } 340 341 return NULL; 342} 343 344struct ir3_postsched_deps_state { 345 struct ir3_postsched_ctx *ctx; 346 347 enum { F, R } direction; 348 349 bool merged; 350 351 /* Track the mapping between sched node (instruction) that last 352 * wrote a given register (in whichever direction we are iterating 353 * the block) 354 * 355 * Note, this table is twice as big as the # of regs, to deal with 356 * half-precision regs. The approach differs depending on whether 357 * the half and full precision register files are "merged" (conflict, 358 * ie. a6xx+) in which case we consider each full precision dep 359 * as two half-precision dependencies, vs older separate (non- 360 * conflicting) in which case the first half of the table is used 361 * for full precision and 2nd half for half-precision. 362 */ 363 struct ir3_postsched_node *regs[2 * 256]; 364}; 365 366/* bounds checking read/write accessors, since OoB access to stuff on 367 * the stack is gonna cause a bad day. 368 */ 369#define dep_reg(state, idx) \ 370 *({ \ 371 assert((idx) < ARRAY_SIZE((state)->regs)); \ 372 &(state)->regs[(idx)]; \ 373 }) 374 375static void 376add_dep(struct ir3_postsched_deps_state *state, 377 struct ir3_postsched_node *before, struct ir3_postsched_node *after) 378{ 379 if (!before || !after) 380 return; 381 382 assert(before != after); 383 384 if (state->direction == F) { 385 dag_add_edge(&before->dag, &after->dag, NULL); 386 } else { 387 dag_add_edge(&after->dag, &before->dag, NULL); 388 } 389} 390 391static void 392add_single_reg_dep(struct ir3_postsched_deps_state *state, 393 struct ir3_postsched_node *node, unsigned num, int src_n) 394{ 395 struct ir3_postsched_node *dep = dep_reg(state, num); 396 397 if (src_n >= 0 && dep && state->direction == F) { 398 unsigned d = ir3_delayslots(dep->instr, node->instr, src_n, true); 399 node->delay = MAX2(node->delay, d); 400 if (is_tex_or_prefetch(dep->instr)) 401 node->has_tex_src = true; 402 if (is_tex_or_prefetch(dep->instr)) 403 node->has_sfu_src = true; 404 } 405 406 add_dep(state, dep, node); 407 if (src_n < 0) { 408 dep_reg(state, num) = node; 409 } 410} 411 412/* This is where we handled full vs half-precision, and potential conflicts 413 * between half and full precision that result in additional dependencies. 414 * The 'reg' arg is really just to know half vs full precision. 415 * 416 * If non-negative, then this adds a dependency on a source register, and 417 * src_n is the index passed into ir3_delayslots() for calculating the delay: 418 * If positive, corresponds to node->instr->regs[src_n]. If negative, then 419 * this is for a destination register. 420 */ 421static void 422add_reg_dep(struct ir3_postsched_deps_state *state, 423 struct ir3_postsched_node *node, const struct ir3_register *reg, 424 unsigned num, int src_n) 425{ 426 if (state->merged) { 427 /* Make sure that special registers like a0.x that are written as 428 * half-registers don't alias random full registers by pretending that 429 * they're full registers: 430 */ 431 if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) { 432 /* single conflict in half-reg space: */ 433 add_single_reg_dep(state, node, num, src_n); 434 } else { 435 /* two conflicts in half-reg space: */ 436 add_single_reg_dep(state, node, 2 * num + 0, src_n); 437 add_single_reg_dep(state, node, 2 * num + 1, src_n); 438 } 439 } else { 440 if (reg->flags & IR3_REG_HALF) 441 num += ARRAY_SIZE(state->regs) / 2; 442 add_single_reg_dep(state, node, num, src_n); 443 } 444} 445 446static void 447calculate_deps(struct ir3_postsched_deps_state *state, 448 struct ir3_postsched_node *node) 449{ 450 /* Add dependencies on instructions that previously (or next, 451 * in the reverse direction) wrote any of our src registers: 452 */ 453 foreach_src_n (reg, i, node->instr) { 454 if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)) 455 continue; 456 457 if (reg->flags & IR3_REG_RELATIV) { 458 /* mark entire array as read: */ 459 for (unsigned j = 0; j < reg->size; j++) { 460 add_reg_dep(state, node, reg, reg->array.base + j, i); 461 } 462 } else { 463 assert(reg->wrmask >= 1); 464 u_foreach_bit (b, reg->wrmask) { 465 add_reg_dep(state, node, reg, reg->num + b, i); 466 } 467 } 468 } 469 470 /* And then after we update the state for what this instruction 471 * wrote: 472 */ 473 foreach_dst (reg, node->instr) { 474 if (reg->wrmask == 0) 475 continue; 476 if (reg->flags & IR3_REG_RELATIV) { 477 /* mark the entire array as written: */ 478 for (unsigned i = 0; i < reg->size; i++) { 479 add_reg_dep(state, node, reg, reg->array.base + i, -1); 480 } 481 } else { 482 assert(reg->wrmask >= 1); 483 u_foreach_bit (b, reg->wrmask) { 484 add_reg_dep(state, node, reg, reg->num + b, -1); 485 } 486 } 487 } 488} 489 490static void 491calculate_forward_deps(struct ir3_postsched_ctx *ctx) 492{ 493 struct ir3_postsched_deps_state state = { 494 .ctx = ctx, 495 .direction = F, 496 .merged = ctx->v->mergedregs, 497 }; 498 499 foreach_instr (instr, &ctx->unscheduled_list) { 500 calculate_deps(&state, instr->data); 501 } 502} 503 504static void 505calculate_reverse_deps(struct ir3_postsched_ctx *ctx) 506{ 507 struct ir3_postsched_deps_state state = { 508 .ctx = ctx, 509 .direction = R, 510 .merged = ctx->v->mergedregs, 511 }; 512 513 foreach_instr_rev (instr, &ctx->unscheduled_list) { 514 calculate_deps(&state, instr->data); 515 } 516} 517 518static void 519sched_node_init(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr) 520{ 521 struct ir3_postsched_node *n = 522 rzalloc(ctx->mem_ctx, struct ir3_postsched_node); 523 524 dag_init_node(ctx->dag, &n->dag); 525 526 n->instr = instr; 527 instr->data = n; 528} 529 530static void 531sched_dag_max_delay_cb(struct dag_node *node, void *state) 532{ 533 struct ir3_postsched_node *n = (struct ir3_postsched_node *)node; 534 uint32_t max_delay = 0; 535 536 util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) { 537 struct ir3_postsched_node *child = 538 (struct ir3_postsched_node *)edge->child; 539 max_delay = MAX2(child->max_delay, max_delay); 540 } 541 542 n->max_delay = MAX2(n->max_delay, max_delay + n->delay); 543} 544 545static void 546sched_dag_init(struct ir3_postsched_ctx *ctx) 547{ 548 ctx->mem_ctx = ralloc_context(NULL); 549 550 ctx->dag = dag_create(ctx->mem_ctx); 551 552 foreach_instr (instr, &ctx->unscheduled_list) 553 sched_node_init(ctx, instr); 554 555 calculate_forward_deps(ctx); 556 calculate_reverse_deps(ctx); 557 558 /* 559 * To avoid expensive texture fetches, etc, from being moved ahead 560 * of kills, track the kills we've seen so far, so we can add an 561 * extra dependency on them for tex/mem instructions 562 */ 563 struct util_dynarray kills; 564 util_dynarray_init(&kills, ctx->mem_ctx); 565 566 /* The last bary.f with the (ei) flag must be scheduled before any kills, 567 * or the hw gets angry. Keep track of inputs here so we can add the 568 * false dep on the kill instruction. 569 */ 570 struct util_dynarray inputs; 571 util_dynarray_init(&inputs, ctx->mem_ctx); 572 573 /* 574 * Normal srcs won't be in SSA at this point, those are dealt with in 575 * calculate_forward_deps() and calculate_reverse_deps(). But we still 576 * have the false-dep information in SSA form, so go ahead and add 577 * dependencies for that here: 578 */ 579 foreach_instr (instr, &ctx->unscheduled_list) { 580 struct ir3_postsched_node *n = instr->data; 581 582 foreach_ssa_src_n (src, i, instr) { 583 if (src->block != instr->block) 584 continue; 585 586 /* we can end up with unused false-deps.. just skip them: */ 587 if (src->flags & IR3_INSTR_UNUSED) 588 continue; 589 590 struct ir3_postsched_node *sn = src->data; 591 592 /* don't consider dependencies in other blocks: */ 593 if (src->block != instr->block) 594 continue; 595 596 dag_add_edge(&sn->dag, &n->dag, NULL); 597 } 598 599 if (is_input(instr)) { 600 util_dynarray_append(&inputs, struct ir3_instruction *, instr); 601 } else if (is_kill_or_demote(instr)) { 602 util_dynarray_foreach (&inputs, struct ir3_instruction *, instrp) { 603 struct ir3_instruction *input = *instrp; 604 struct ir3_postsched_node *in = input->data; 605 dag_add_edge(&in->dag, &n->dag, NULL); 606 } 607 util_dynarray_append(&kills, struct ir3_instruction *, instr); 608 } else if (is_tex(instr) || is_mem(instr)) { 609 util_dynarray_foreach (&kills, struct ir3_instruction *, instrp) { 610 struct ir3_instruction *kill = *instrp; 611 struct ir3_postsched_node *kn = kill->data; 612 dag_add_edge(&kn->dag, &n->dag, NULL); 613 } 614 } 615 } 616 617 // TODO do we want to do this after reverse-dependencies? 618 dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, NULL); 619} 620 621static void 622sched_dag_destroy(struct ir3_postsched_ctx *ctx) 623{ 624 ralloc_free(ctx->mem_ctx); 625 ctx->mem_ctx = NULL; 626 ctx->dag = NULL; 627} 628 629static void 630sched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block) 631{ 632 ctx->block = block; 633 ctx->tex_delay = 0; 634 ctx->sfu_delay = 0; 635 636 /* move all instructions to the unscheduled list, and 637 * empty the block's instruction list (to which we will 638 * be inserting). 639 */ 640 list_replace(&block->instr_list, &ctx->unscheduled_list); 641 list_inithead(&block->instr_list); 642 643 // TODO once we are using post-sched for everything we can 644 // just not stick in NOP's prior to post-sched, and drop this. 645 // for now keep this, since it makes post-sched optional: 646 foreach_instr_safe (instr, &ctx->unscheduled_list) { 647 switch (instr->opc) { 648 case OPC_NOP: 649 case OPC_B: 650 case OPC_JUMP: 651 list_delinit(&instr->node); 652 break; 653 default: 654 break; 655 } 656 } 657 658 sched_dag_init(ctx); 659 660 /* First schedule all meta:input instructions, followed by 661 * tex-prefetch. We want all of the instructions that load 662 * values into registers before the shader starts to go 663 * before any other instructions. But in particular we 664 * want inputs to come before prefetches. This is because 665 * a FS's bary_ij input may not actually be live in the 666 * shader, but it should not be scheduled on top of any 667 * other input (but can be overwritten by a tex prefetch) 668 */ 669 foreach_instr_safe (instr, &ctx->unscheduled_list) 670 if (instr->opc == OPC_META_INPUT) 671 schedule(ctx, instr); 672 673 foreach_instr_safe (instr, &ctx->unscheduled_list) 674 if (instr->opc == OPC_META_TEX_PREFETCH) 675 schedule(ctx, instr); 676 677 while (!list_is_empty(&ctx->unscheduled_list)) { 678 struct ir3_instruction *instr = choose_instr(ctx); 679 680 unsigned delay = 681 ir3_delay_calc_postra(ctx->block, instr, false, ctx->v->mergedregs); 682 d("delay=%u", delay); 683 684 /* and if we run out of instructions that can be scheduled, 685 * then it is time for nop's: 686 */ 687 debug_assert(delay <= 6); 688 while (delay > 0) { 689 ir3_NOP(block); 690 delay--; 691 } 692 693 schedule(ctx, instr); 694 } 695 696 sched_dag_destroy(ctx); 697} 698 699static bool 700is_self_mov(struct ir3_instruction *instr) 701{ 702 if (!is_same_type_mov(instr)) 703 return false; 704 705 if (instr->dsts[0]->num != instr->srcs[0]->num) 706 return false; 707 708 if (instr->dsts[0]->flags & IR3_REG_RELATIV) 709 return false; 710 711 if (instr->cat1.round != ROUND_ZERO) 712 return false; 713 714 if (instr->srcs[0]->flags & 715 (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_FNEG | 716 IR3_REG_FABS | IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)) 717 return false; 718 719 return true; 720} 721 722/* sometimes we end up w/ in-place mov's, ie. mov.u32u32 r1.y, r1.y 723 * as a result of places were before RA we are not sure that it is 724 * safe to eliminate. We could eliminate these earlier, but sometimes 725 * they are tangled up in false-dep's, etc, so it is easier just to 726 * let them exist until after RA 727 */ 728static void 729cleanup_self_movs(struct ir3 *ir) 730{ 731 foreach_block (block, &ir->block_list) { 732 foreach_instr_safe (instr, &block->instr_list) { 733 for (unsigned i = 0; i < instr->deps_count; i++) { 734 if (instr->deps[i] && is_self_mov(instr->deps[i])) { 735 instr->deps[i] = NULL; 736 } 737 } 738 739 if (is_self_mov(instr)) 740 list_delinit(&instr->node); 741 } 742 } 743} 744 745bool 746ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v) 747{ 748 struct ir3_postsched_ctx ctx = { 749 .ir = ir, 750 .v = v, 751 }; 752 753 ir3_remove_nops(ir); 754 cleanup_self_movs(ir); 755 756 foreach_block (block, &ir->block_list) { 757 sched_block(&ctx, block); 758 } 759 760 return true; 761} 762