1/*
2 * Copyright (C) 2019 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "util/dag.h"
28#include "util/u_math.h"
29
30#include "ir3.h"
31#include "ir3_compiler.h"
32#include "ir3_context.h"
33
34#ifdef DEBUG
35#define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS)
36#else
37#define SCHED_DEBUG 0
38#endif
39#define d(fmt, ...)                                                            \
40   do {                                                                        \
41      if (SCHED_DEBUG) {                                                       \
42         mesa_logi("PSCHED: " fmt, ##__VA_ARGS__);                             \
43      }                                                                        \
44   } while (0)
45
46#define di(instr, fmt, ...)                                                    \
47   do {                                                                        \
48      if (SCHED_DEBUG) {                                                       \
49         struct log_stream *stream = mesa_log_streami();                       \
50         mesa_log_stream_printf(stream, "PSCHED: " fmt ": ", ##__VA_ARGS__);   \
51         ir3_print_instr_stream(stream, instr);                                \
52         mesa_log_stream_destroy(stream);                                      \
53      }                                                                        \
54   } while (0)
55
56/*
57 * Post RA Instruction Scheduling
58 */
59
60struct ir3_postsched_ctx {
61   struct ir3 *ir;
62
63   struct ir3_shader_variant *v;
64
65   void *mem_ctx;
66   struct ir3_block *block; /* the current block */
67   struct dag *dag;
68
69   struct list_head unscheduled_list; /* unscheduled instructions */
70
71   int sfu_delay;
72   int tex_delay;
73};
74
75struct ir3_postsched_node {
76   struct dag_node dag; /* must be first for util_dynarray_foreach */
77   struct ir3_instruction *instr;
78   bool partially_evaluated_path;
79
80   bool has_tex_src, has_sfu_src;
81
82   unsigned delay;
83   unsigned max_delay;
84};
85
86#define foreach_sched_node(__n, __list)                                        \
87   list_for_each_entry (struct ir3_postsched_node, __n, __list, dag.link)
88
89static bool
90has_tex_src(struct ir3_instruction *instr)
91{
92   struct ir3_postsched_node *node = instr->data;
93   return node->has_tex_src;
94}
95
96static bool
97has_sfu_src(struct ir3_instruction *instr)
98{
99   struct ir3_postsched_node *node = instr->data;
100   return node->has_sfu_src;
101}
102
103static void
104schedule(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
105{
106   debug_assert(ctx->block == instr->block);
107
108   /* remove from unscheduled_list:
109    */
110   list_delinit(&instr->node);
111
112   di(instr, "schedule");
113
114   list_addtail(&instr->node, &instr->block->instr_list);
115
116   struct ir3_postsched_node *n = instr->data;
117   dag_prune_head(ctx->dag, &n->dag);
118
119   if (is_meta(instr) && (instr->opc != OPC_META_TEX_PREFETCH))
120      return;
121
122   if (is_sfu(instr)) {
123      ctx->sfu_delay = 8;
124   } else if (has_sfu_src(instr)) {
125      ctx->sfu_delay = 0;
126   } else if (ctx->sfu_delay > 0) {
127      ctx->sfu_delay--;
128   }
129
130   if (is_tex_or_prefetch(instr)) {
131      ctx->tex_delay = 10;
132   } else if (has_tex_src(instr)) {
133      ctx->tex_delay = 0;
134   } else if (ctx->tex_delay > 0) {
135      ctx->tex_delay--;
136   }
137}
138
139static void
140dump_state(struct ir3_postsched_ctx *ctx)
141{
142   if (!SCHED_DEBUG)
143      return;
144
145   foreach_sched_node (n, &ctx->dag->heads) {
146      di(n->instr, "maxdel=%3d    ", n->max_delay);
147
148      util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
149         struct ir3_postsched_node *child =
150            (struct ir3_postsched_node *)edge->child;
151
152         di(child->instr, " -> (%d parents) ", child->dag.parent_count);
153      }
154   }
155}
156
157/* Determine if this is an instruction that we'd prefer not to schedule
158 * yet, in order to avoid an (ss) sync.  This is limited by the sfu_delay
159 * counter, ie. the more cycles it has been since the last SFU, the less
160 * costly a sync would be.
161 */
162static bool
163would_sync(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
164{
165   if (ctx->sfu_delay) {
166      if (has_sfu_src(instr))
167         return true;
168   }
169
170   if (ctx->tex_delay) {
171      if (has_tex_src(instr))
172         return true;
173   }
174
175   return false;
176}
177
178/* find instruction to schedule: */
179static struct ir3_instruction *
180choose_instr(struct ir3_postsched_ctx *ctx)
181{
182   struct ir3_postsched_node *chosen = NULL;
183
184   dump_state(ctx);
185
186   foreach_sched_node (n, &ctx->dag->heads) {
187      if (!is_meta(n->instr))
188         continue;
189
190      if (!chosen || (chosen->max_delay < n->max_delay))
191         chosen = n;
192   }
193
194   if (chosen) {
195      di(chosen->instr, "prio: chose (meta)");
196      return chosen->instr;
197   }
198
199   /* Try to schedule inputs with a higher priority, if possible, as
200    * the last bary.f unlocks varying storage to unblock more VS
201    * warps.
202    */
203   foreach_sched_node (n, &ctx->dag->heads) {
204      if (!is_input(n->instr))
205         continue;
206
207      if (!chosen || (chosen->max_delay < n->max_delay))
208         chosen = n;
209   }
210
211   if (chosen) {
212      di(chosen->instr, "prio: chose (input)");
213      return chosen->instr;
214   }
215
216   /* Next prioritize discards: */
217   foreach_sched_node (n, &ctx->dag->heads) {
218      unsigned d =
219         ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs);
220
221      if (d > 0)
222         continue;
223
224      if (!is_kill_or_demote(n->instr))
225         continue;
226
227      if (!chosen || (chosen->max_delay < n->max_delay))
228         chosen = n;
229   }
230
231   if (chosen) {
232      di(chosen->instr, "csp: chose (kill, hard ready)");
233      return chosen->instr;
234   }
235
236   /* Next prioritize expensive instructions: */
237   foreach_sched_node (n, &ctx->dag->heads) {
238      unsigned d =
239         ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs);
240
241      if (d > 0)
242         continue;
243
244      if (!(is_sfu(n->instr) || is_tex(n->instr)))
245         continue;
246
247      if (!chosen || (chosen->max_delay < n->max_delay))
248         chosen = n;
249   }
250
251   if (chosen) {
252      di(chosen->instr, "csp: chose (sfu/tex, hard ready)");
253      return chosen->instr;
254   }
255
256   /*
257    * Sometimes be better to take a nop, rather than scheduling an
258    * instruction that would require an (ss) shortly after another
259    * SFU..  ie. if last SFU was just one or two instr ago, and we
260    * could choose between taking a nop and then scheduling
261    * something else, vs scheduling the immed avail instruction that
262    * would require (ss), we are better with the nop.
263    */
264   for (unsigned delay = 0; delay < 4; delay++) {
265      foreach_sched_node (n, &ctx->dag->heads) {
266         if (would_sync(ctx, n->instr))
267            continue;
268
269         unsigned d = ir3_delay_calc_postra(ctx->block, n->instr, true,
270                                            ctx->v->mergedregs);
271
272         if (d > delay)
273            continue;
274
275         if (!chosen || (chosen->max_delay < n->max_delay))
276            chosen = n;
277      }
278
279      if (chosen) {
280         di(chosen->instr, "csp: chose (soft ready, delay=%u)", delay);
281         return chosen->instr;
282      }
283   }
284
285   /* Next try to find a ready leader w/ soft delay (ie. including extra
286    * delay for things like tex fetch which can be synchronized w/ sync
287    * bit (but we probably do want to schedule some other instructions
288    * while we wait)
289    */
290   foreach_sched_node (n, &ctx->dag->heads) {
291      unsigned d =
292         ir3_delay_calc_postra(ctx->block, n->instr, true, ctx->v->mergedregs);
293
294      if (d > 0)
295         continue;
296
297      if (!chosen || (chosen->max_delay < n->max_delay))
298         chosen = n;
299   }
300
301   if (chosen) {
302      di(chosen->instr, "csp: chose (soft ready)");
303      return chosen->instr;
304   }
305
306   /* Next try to find a ready leader that can be scheduled without nop's,
307    * which in the case of things that need (sy)/(ss) could result in
308    * stalls.. but we've already decided there is not a better option.
309    */
310   foreach_sched_node (n, &ctx->dag->heads) {
311      unsigned d =
312         ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs);
313
314      if (d > 0)
315         continue;
316
317      if (!chosen || (chosen->max_delay < n->max_delay))
318         chosen = n;
319   }
320
321   if (chosen) {
322      di(chosen->instr, "csp: chose (hard ready)");
323      return chosen->instr;
324   }
325
326   /* Otherwise choose leader with maximum cost:
327    *
328    * TODO should we try to balance cost and delays?  I guess it is
329    * a balance between now-nop's and future-nop's?
330    */
331   foreach_sched_node (n, &ctx->dag->heads) {
332      if (!chosen || chosen->max_delay < n->max_delay)
333         chosen = n;
334   }
335
336   if (chosen) {
337      di(chosen->instr, "csp: chose (leader)");
338      return chosen->instr;
339   }
340
341   return NULL;
342}
343
344struct ir3_postsched_deps_state {
345   struct ir3_postsched_ctx *ctx;
346
347   enum { F, R } direction;
348
349   bool merged;
350
351   /* Track the mapping between sched node (instruction) that last
352    * wrote a given register (in whichever direction we are iterating
353    * the block)
354    *
355    * Note, this table is twice as big as the # of regs, to deal with
356    * half-precision regs.  The approach differs depending on whether
357    * the half and full precision register files are "merged" (conflict,
358    * ie. a6xx+) in which case we consider each full precision dep
359    * as two half-precision dependencies, vs older separate (non-
360    * conflicting) in which case the first half of the table is used
361    * for full precision and 2nd half for half-precision.
362    */
363   struct ir3_postsched_node *regs[2 * 256];
364};
365
366/* bounds checking read/write accessors, since OoB access to stuff on
367 * the stack is gonna cause a bad day.
368 */
369#define dep_reg(state, idx)                                                    \
370   *({                                                                         \
371      assert((idx) < ARRAY_SIZE((state)->regs));                               \
372      &(state)->regs[(idx)];                                                   \
373   })
374
375static void
376add_dep(struct ir3_postsched_deps_state *state,
377        struct ir3_postsched_node *before, struct ir3_postsched_node *after)
378{
379   if (!before || !after)
380      return;
381
382   assert(before != after);
383
384   if (state->direction == F) {
385      dag_add_edge(&before->dag, &after->dag, NULL);
386   } else {
387      dag_add_edge(&after->dag, &before->dag, NULL);
388   }
389}
390
391static void
392add_single_reg_dep(struct ir3_postsched_deps_state *state,
393                   struct ir3_postsched_node *node, unsigned num, int src_n)
394{
395   struct ir3_postsched_node *dep = dep_reg(state, num);
396
397   if (src_n >= 0 && dep && state->direction == F) {
398      unsigned d = ir3_delayslots(dep->instr, node->instr, src_n, true);
399      node->delay = MAX2(node->delay, d);
400      if (is_tex_or_prefetch(dep->instr))
401         node->has_tex_src = true;
402      if (is_tex_or_prefetch(dep->instr))
403         node->has_sfu_src = true;
404   }
405
406   add_dep(state, dep, node);
407   if (src_n < 0) {
408      dep_reg(state, num) = node;
409   }
410}
411
412/* This is where we handled full vs half-precision, and potential conflicts
413 * between half and full precision that result in additional dependencies.
414 * The 'reg' arg is really just to know half vs full precision.
415 *
416 * If non-negative, then this adds a dependency on a source register, and
417 * src_n is the index passed into ir3_delayslots() for calculating the delay:
418 * If positive, corresponds to node->instr->regs[src_n]. If negative, then
419 * this is for a destination register.
420 */
421static void
422add_reg_dep(struct ir3_postsched_deps_state *state,
423            struct ir3_postsched_node *node, const struct ir3_register *reg,
424            unsigned num, int src_n)
425{
426   if (state->merged) {
427      /* Make sure that special registers like a0.x that are written as
428       * half-registers don't alias random full registers by pretending that
429       * they're full registers:
430       */
431      if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) {
432         /* single conflict in half-reg space: */
433         add_single_reg_dep(state, node, num, src_n);
434      } else {
435         /* two conflicts in half-reg space: */
436         add_single_reg_dep(state, node, 2 * num + 0, src_n);
437         add_single_reg_dep(state, node, 2 * num + 1, src_n);
438      }
439   } else {
440      if (reg->flags & IR3_REG_HALF)
441         num += ARRAY_SIZE(state->regs) / 2;
442      add_single_reg_dep(state, node, num, src_n);
443   }
444}
445
446static void
447calculate_deps(struct ir3_postsched_deps_state *state,
448               struct ir3_postsched_node *node)
449{
450   /* Add dependencies on instructions that previously (or next,
451    * in the reverse direction) wrote any of our src registers:
452    */
453   foreach_src_n (reg, i, node->instr) {
454      if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
455         continue;
456
457      if (reg->flags & IR3_REG_RELATIV) {
458         /* mark entire array as read: */
459         for (unsigned j = 0; j < reg->size; j++) {
460            add_reg_dep(state, node, reg, reg->array.base + j, i);
461         }
462      } else {
463         assert(reg->wrmask >= 1);
464         u_foreach_bit (b, reg->wrmask) {
465            add_reg_dep(state, node, reg, reg->num + b, i);
466         }
467      }
468   }
469
470   /* And then after we update the state for what this instruction
471    * wrote:
472    */
473   foreach_dst (reg, node->instr) {
474      if (reg->wrmask == 0)
475         continue;
476      if (reg->flags & IR3_REG_RELATIV) {
477         /* mark the entire array as written: */
478         for (unsigned i = 0; i < reg->size; i++) {
479            add_reg_dep(state, node, reg, reg->array.base + i, -1);
480         }
481      } else {
482         assert(reg->wrmask >= 1);
483         u_foreach_bit (b, reg->wrmask) {
484            add_reg_dep(state, node, reg, reg->num + b, -1);
485         }
486      }
487   }
488}
489
490static void
491calculate_forward_deps(struct ir3_postsched_ctx *ctx)
492{
493   struct ir3_postsched_deps_state state = {
494      .ctx = ctx,
495      .direction = F,
496      .merged = ctx->v->mergedregs,
497   };
498
499   foreach_instr (instr, &ctx->unscheduled_list) {
500      calculate_deps(&state, instr->data);
501   }
502}
503
504static void
505calculate_reverse_deps(struct ir3_postsched_ctx *ctx)
506{
507   struct ir3_postsched_deps_state state = {
508      .ctx = ctx,
509      .direction = R,
510      .merged = ctx->v->mergedregs,
511   };
512
513   foreach_instr_rev (instr, &ctx->unscheduled_list) {
514      calculate_deps(&state, instr->data);
515   }
516}
517
518static void
519sched_node_init(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
520{
521   struct ir3_postsched_node *n =
522      rzalloc(ctx->mem_ctx, struct ir3_postsched_node);
523
524   dag_init_node(ctx->dag, &n->dag);
525
526   n->instr = instr;
527   instr->data = n;
528}
529
530static void
531sched_dag_max_delay_cb(struct dag_node *node, void *state)
532{
533   struct ir3_postsched_node *n = (struct ir3_postsched_node *)node;
534   uint32_t max_delay = 0;
535
536   util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
537      struct ir3_postsched_node *child =
538         (struct ir3_postsched_node *)edge->child;
539      max_delay = MAX2(child->max_delay, max_delay);
540   }
541
542   n->max_delay = MAX2(n->max_delay, max_delay + n->delay);
543}
544
545static void
546sched_dag_init(struct ir3_postsched_ctx *ctx)
547{
548   ctx->mem_ctx = ralloc_context(NULL);
549
550   ctx->dag = dag_create(ctx->mem_ctx);
551
552   foreach_instr (instr, &ctx->unscheduled_list)
553      sched_node_init(ctx, instr);
554
555   calculate_forward_deps(ctx);
556   calculate_reverse_deps(ctx);
557
558   /*
559    * To avoid expensive texture fetches, etc, from being moved ahead
560    * of kills, track the kills we've seen so far, so we can add an
561    * extra dependency on them for tex/mem instructions
562    */
563   struct util_dynarray kills;
564   util_dynarray_init(&kills, ctx->mem_ctx);
565
566   /* The last bary.f with the (ei) flag must be scheduled before any kills,
567    * or the hw gets angry. Keep track of inputs here so we can add the
568    * false dep on the kill instruction.
569    */
570   struct util_dynarray inputs;
571   util_dynarray_init(&inputs, ctx->mem_ctx);
572
573   /*
574    * Normal srcs won't be in SSA at this point, those are dealt with in
575    * calculate_forward_deps() and calculate_reverse_deps().  But we still
576    * have the false-dep information in SSA form, so go ahead and add
577    * dependencies for that here:
578    */
579   foreach_instr (instr, &ctx->unscheduled_list) {
580      struct ir3_postsched_node *n = instr->data;
581
582      foreach_ssa_src_n (src, i, instr) {
583         if (src->block != instr->block)
584            continue;
585
586         /* we can end up with unused false-deps.. just skip them: */
587         if (src->flags & IR3_INSTR_UNUSED)
588            continue;
589
590         struct ir3_postsched_node *sn = src->data;
591
592         /* don't consider dependencies in other blocks: */
593         if (src->block != instr->block)
594            continue;
595
596         dag_add_edge(&sn->dag, &n->dag, NULL);
597      }
598
599      if (is_input(instr)) {
600         util_dynarray_append(&inputs, struct ir3_instruction *, instr);
601      } else if (is_kill_or_demote(instr)) {
602         util_dynarray_foreach (&inputs, struct ir3_instruction *, instrp) {
603            struct ir3_instruction *input = *instrp;
604            struct ir3_postsched_node *in = input->data;
605            dag_add_edge(&in->dag, &n->dag, NULL);
606         }
607         util_dynarray_append(&kills, struct ir3_instruction *, instr);
608      } else if (is_tex(instr) || is_mem(instr)) {
609         util_dynarray_foreach (&kills, struct ir3_instruction *, instrp) {
610            struct ir3_instruction *kill = *instrp;
611            struct ir3_postsched_node *kn = kill->data;
612            dag_add_edge(&kn->dag, &n->dag, NULL);
613         }
614      }
615   }
616
617   // TODO do we want to do this after reverse-dependencies?
618   dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, NULL);
619}
620
621static void
622sched_dag_destroy(struct ir3_postsched_ctx *ctx)
623{
624   ralloc_free(ctx->mem_ctx);
625   ctx->mem_ctx = NULL;
626   ctx->dag = NULL;
627}
628
629static void
630sched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block)
631{
632   ctx->block = block;
633   ctx->tex_delay = 0;
634   ctx->sfu_delay = 0;
635
636   /* move all instructions to the unscheduled list, and
637    * empty the block's instruction list (to which we will
638    * be inserting).
639    */
640   list_replace(&block->instr_list, &ctx->unscheduled_list);
641   list_inithead(&block->instr_list);
642
643   // TODO once we are using post-sched for everything we can
644   // just not stick in NOP's prior to post-sched, and drop this.
645   // for now keep this, since it makes post-sched optional:
646   foreach_instr_safe (instr, &ctx->unscheduled_list) {
647      switch (instr->opc) {
648      case OPC_NOP:
649      case OPC_B:
650      case OPC_JUMP:
651         list_delinit(&instr->node);
652         break;
653      default:
654         break;
655      }
656   }
657
658   sched_dag_init(ctx);
659
660   /* First schedule all meta:input instructions, followed by
661    * tex-prefetch.  We want all of the instructions that load
662    * values into registers before the shader starts to go
663    * before any other instructions.  But in particular we
664    * want inputs to come before prefetches.  This is because
665    * a FS's bary_ij input may not actually be live in the
666    * shader, but it should not be scheduled on top of any
667    * other input (but can be overwritten by a tex prefetch)
668    */
669   foreach_instr_safe (instr, &ctx->unscheduled_list)
670      if (instr->opc == OPC_META_INPUT)
671         schedule(ctx, instr);
672
673   foreach_instr_safe (instr, &ctx->unscheduled_list)
674      if (instr->opc == OPC_META_TEX_PREFETCH)
675         schedule(ctx, instr);
676
677   while (!list_is_empty(&ctx->unscheduled_list)) {
678      struct ir3_instruction *instr = choose_instr(ctx);
679
680      unsigned delay =
681         ir3_delay_calc_postra(ctx->block, instr, false, ctx->v->mergedregs);
682      d("delay=%u", delay);
683
684      /* and if we run out of instructions that can be scheduled,
685       * then it is time for nop's:
686       */
687      debug_assert(delay <= 6);
688      while (delay > 0) {
689         ir3_NOP(block);
690         delay--;
691      }
692
693      schedule(ctx, instr);
694   }
695
696   sched_dag_destroy(ctx);
697}
698
699static bool
700is_self_mov(struct ir3_instruction *instr)
701{
702   if (!is_same_type_mov(instr))
703      return false;
704
705   if (instr->dsts[0]->num != instr->srcs[0]->num)
706      return false;
707
708   if (instr->dsts[0]->flags & IR3_REG_RELATIV)
709      return false;
710
711   if (instr->cat1.round != ROUND_ZERO)
712      return false;
713
714   if (instr->srcs[0]->flags &
715       (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_FNEG |
716        IR3_REG_FABS | IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT))
717      return false;
718
719   return true;
720}
721
722/* sometimes we end up w/ in-place mov's, ie. mov.u32u32 r1.y, r1.y
723 * as a result of places were before RA we are not sure that it is
724 * safe to eliminate.  We could eliminate these earlier, but sometimes
725 * they are tangled up in false-dep's, etc, so it is easier just to
726 * let them exist until after RA
727 */
728static void
729cleanup_self_movs(struct ir3 *ir)
730{
731   foreach_block (block, &ir->block_list) {
732      foreach_instr_safe (instr, &block->instr_list) {
733         for (unsigned i = 0; i < instr->deps_count; i++) {
734            if (instr->deps[i] && is_self_mov(instr->deps[i])) {
735               instr->deps[i] = NULL;
736            }
737         }
738
739         if (is_self_mov(instr))
740            list_delinit(&instr->node);
741      }
742   }
743}
744
745bool
746ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v)
747{
748   struct ir3_postsched_ctx ctx = {
749      .ir = ir,
750      .v = v,
751   };
752
753   ir3_remove_nops(ir);
754   cleanup_self_movs(ir);
755
756   foreach_block (block, &ir->block_list) {
757      sched_block(&ctx, block);
758   }
759
760   return true;
761}
762