1/*
2 * Copyright (C) 2021 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "util/rb_tree.h"
25#include "ir3_ra.h"
26#include "ir3_shader.h"
27
28/*
29 * This pass does two things:
30 *
31 * 1. Calculates the maximum register pressure. To do this, we need to use the
32 *    exact same technique that RA uses for combining meta_split instructions
33 *    with their sources, so that our calculation agrees with RA.
34 * 2. Spills when the register pressure is exceeded a limit calculated by RA.
35 *    The implementation is based on "Register Spilling and Live-Range Splitting
36 *    for SSA-Form Programs" by Braun and Hack, although again care has to be
37 *    taken to handle combining split/collect instructions.
38 */
39
40struct reg_or_immed {
41   unsigned flags;
42   union {
43      struct ir3_register *def;
44      uint32_t uimm;
45      unsigned const_num;
46   };
47};
48
49struct ra_spill_interval {
50   struct ir3_reg_interval interval;
51
52   struct rb_node node;
53   struct rb_node half_node;
54
55   /* The current SSA value/const/immed this source is mapped to. */
56   struct reg_or_immed dst;
57
58   /* When computing use distances we use the distance relative to the start
59    * of the block. So, for example, a value that's defined in cycle 5 of the
60    * block and used 6 cycles later will always have a next_use_distance of 11
61    * until we reach that use.
62    */
63   unsigned next_use_distance;
64
65   /* Whether this value was reloaded and therefore doesn't need to be
66    * spilled again. Corresponds to the S set in the paper.
67    */
68   bool already_spilled;
69
70   /* We need to add sources early for accounting purposes, but we have to
71    * insert the reload code for them last. Keep track of whether this interval
72    * needs to be reloaded later.
73    */
74   bool needs_reload;
75
76   /* Keep track of whether this interval currently can't be spilled because:
77    * - It or one of its children is a source and we're making space for
78    *   sources.
79    * - It is a destination and we're making space for destinations.
80    */
81   bool cant_spill;
82};
83
84struct ra_spill_block_state {
85   unsigned *next_use_end;
86   unsigned *next_use_start;
87
88   unsigned cycles;
89
90   /* Map from SSA def to reg_or_immed it is mapped to at the end of the block.
91    * This map only contains values which we didn't spill, so it also serves as
92    * a record of the new live-out set for this block.
93    */
94   struct hash_table *remap;
95
96   /* For blocks whose successors are visited first (i.e. loop backedges), which
97    * values should be live at the end.
98    */
99   BITSET_WORD *live_out;
100
101   bool visited;
102};
103
104struct ra_spill_ctx {
105   struct ir3_reg_ctx reg_ctx;
106
107   struct ra_spill_interval **intervals;
108   unsigned intervals_count;
109
110   /* rb tree of live intervals that we can spill, ordered by next-use distance.
111    * full_live_intervals contains the full+shared intervals in the merged_regs
112    * case. We use this list to determine what to spill.
113    */
114   struct rb_tree full_live_intervals;
115   struct rb_tree half_live_intervals;
116
117   struct ir3_pressure cur_pressure, max_pressure;
118
119   struct ir3_pressure limit_pressure;
120
121   /* When spilling, we need to reserve a register to serve as the zero'd
122    * "base". For simplicity we reserve a register at the beginning so that it's
123    * always available.
124    */
125   struct ir3_register *base_reg;
126
127   /* Current pvtmem offset in bytes. */
128   unsigned spill_slot;
129
130   struct ir3_liveness *live;
131
132   const struct ir3_compiler *compiler;
133
134   struct ra_spill_block_state *blocks;
135
136   bool spilling;
137
138   bool merged_regs;
139};
140
141static void
142add_base_reg(struct ra_spill_ctx *ctx, struct ir3 *ir)
143{
144   struct ir3_block *start = ir3_start_block(ir);
145
146   /* We need to stick it after any meta instructions which need to be first. */
147   struct ir3_instruction *after = NULL;
148   foreach_instr (instr, &start->instr_list) {
149      if (instr->opc != OPC_META_INPUT &&
150          instr->opc != OPC_META_TEX_PREFETCH) {
151         after = instr;
152         break;
153      }
154   }
155
156   struct ir3_instruction *mov = create_immed(start, 0);
157
158   if (after)
159      ir3_instr_move_before(mov, after);
160
161   ctx->base_reg = mov->dsts[0];
162
163   /* We don't create an interval, etc. for the base reg, so just lower the
164    * register pressure limit to account for it. We assume it's always
165    * available for simplicity.
166    */
167   ctx->limit_pressure.full -= reg_size(ctx->base_reg);
168}
169
170
171/* Compute the number of cycles per instruction used for next-use-distance
172 * analysis. This is just approximate, obviously.
173 */
174static unsigned
175instr_cycles(struct ir3_instruction *instr)
176{
177   if (instr->opc == OPC_META_PARALLEL_COPY) {
178      unsigned cycles = 0;
179      for (unsigned i = 0; i < instr->dsts_count; i++) {
180         if (!instr->srcs[i]->def ||
181             instr->srcs[i]->def->merge_set != instr->dsts[i]->merge_set) {
182            cycles += reg_elems(instr->srcs[i]);
183         }
184      }
185
186      return cycles;
187   }
188
189   if (instr->opc == OPC_META_COLLECT) {
190      unsigned cycles = 0;
191      for (unsigned i = 0; i < instr->srcs_count; i++) {
192         if (!instr->srcs[i]->def ||
193             instr->srcs[i]->def->merge_set != instr->dsts[0]->merge_set) {
194            cycles++;
195         }
196      }
197
198      return cycles;
199   }
200
201   if (is_meta(instr))
202      return 0;
203
204   return 1 + instr->repeat;
205}
206
207static bool
208compute_block_next_distance(struct ra_spill_ctx *ctx, struct ir3_block *block,
209                            unsigned *tmp_next_use)
210{
211   struct ra_spill_block_state *state = &ctx->blocks[block->index];
212   memcpy(tmp_next_use, state->next_use_end,
213          ctx->live->definitions_count * sizeof(*tmp_next_use));
214
215   unsigned cycle = state->cycles;
216   foreach_instr_rev (instr, &block->instr_list) {
217      ra_foreach_dst (dst, instr) {
218         dst->next_use = tmp_next_use[dst->name];
219      }
220
221      ra_foreach_src (src, instr) {
222         src->next_use = tmp_next_use[src->def->name];
223      }
224
225      cycle -= instr_cycles(instr);
226
227      if (instr->opc == OPC_META_PARALLEL_COPY) {
228         ra_foreach_src_n (src, i, instr) {
229            if (src->def->merge_set == instr->dsts[i]->merge_set &&
230                src->def->merge_set_offset == instr->dsts[i]->merge_set_offset) {
231               tmp_next_use[src->def->name] =
232                  tmp_next_use[instr->dsts[i]->name];
233            } else {
234               tmp_next_use[src->def->name] = cycle;
235            }
236         }
237      } else if (instr->opc != OPC_META_PHI) {
238         ra_foreach_src (src, instr) {
239            tmp_next_use[src->def->name] = cycle;
240         }
241      }
242
243      ra_foreach_dst (dst, instr) {
244         tmp_next_use[dst->name] = UINT_MAX;
245      }
246   }
247
248   memcpy(state->next_use_start, tmp_next_use,
249          ctx->live->definitions_count * sizeof(*tmp_next_use));
250
251   bool progress = false;
252   for (unsigned i = 0; i < block->predecessors_count; i++) {
253      const struct ir3_block *pred = block->predecessors[i];
254      struct ra_spill_block_state *pred_state = &ctx->blocks[pred->index];
255
256      /* Add a large-enough distance in front of edges exiting the loop so that
257       * variables that are live-through the loop but not used inside it are
258       * prioritized for spilling, as per the paper. This just needs to be
259       * larger than the longest path through the loop.
260       */
261      bool loop_exit = pred->loop_depth < block->loop_depth;
262      unsigned block_distance = pred_state->cycles + (loop_exit ? 100000 : 0);
263
264      for (unsigned j = 0; j < ctx->live->definitions_count; j++) {
265         if (state->next_use_start[j] < UINT_MAX &&
266             state->next_use_start[j] + block_distance <
267             pred_state->next_use_end[j]) {
268            pred_state->next_use_end[j] = state->next_use_start[j] +
269               block_distance;
270            progress = true;
271         }
272      }
273
274      foreach_instr (phi, &block->instr_list) {
275         if (phi->opc != OPC_META_PHI)
276            break;
277         if (!phi->srcs[i]->def)
278            continue;
279         unsigned src = phi->srcs[i]->def->name;
280         if (phi->dsts[0]->next_use < UINT_MAX &&
281             phi->dsts[0]->next_use + block_distance <
282             pred_state->next_use_end[src]) {
283            pred_state->next_use_end[src] = phi->dsts[0]->next_use +
284               block_distance;
285            progress = true;
286         }
287      }
288   }
289
290   return progress;
291}
292
293static void
294compute_next_distance(struct ra_spill_ctx *ctx, struct ir3 *ir)
295{
296   for (unsigned i = 0; i < ctx->live->block_count; i++) {
297      ctx->blocks[i].next_use_start =
298         ralloc_array(ctx, unsigned, ctx->live->definitions_count);
299      ctx->blocks[i].next_use_end =
300         ralloc_array(ctx, unsigned, ctx->live->definitions_count);
301
302      for (unsigned j = 0; j < ctx->live->definitions_count; j++) {
303         ctx->blocks[i].next_use_start[j] = UINT_MAX;
304         ctx->blocks[i].next_use_end[j] = UINT_MAX;
305      }
306   }
307
308   foreach_block (block, &ir->block_list) {
309      struct ra_spill_block_state *state = &ctx->blocks[block->index];
310      state->cycles = 0;
311      foreach_instr (instr, &block->instr_list) {
312         state->cycles += instr_cycles(instr);
313         foreach_dst (dst, instr) {
314            dst->spill_slot = ~0;
315         }
316      }
317   }
318
319   unsigned *tmp_next_use =
320      ralloc_array(ctx, unsigned, ctx->live->definitions_count);
321
322   bool progress = true;
323   while (progress) {
324      progress = false;
325      foreach_block_rev (block, &ir->block_list) {
326         progress |= compute_block_next_distance(ctx, block, tmp_next_use);
327      }
328   }
329}
330
331static void
332ra_spill_interval_init(struct ra_spill_interval *interval,
333                       struct ir3_register *reg)
334{
335   ir3_reg_interval_init(&interval->interval, reg);
336   interval->dst.flags = reg->flags;
337   interval->dst.def = reg;
338   interval->already_spilled = false;
339   interval->needs_reload = false;
340   interval->cant_spill = false;
341}
342
343static struct ra_spill_interval *
344ir3_reg_interval_to_interval(struct ir3_reg_interval *interval)
345{
346   return rb_node_data(struct ra_spill_interval, interval, interval);
347}
348
349static struct ra_spill_interval *
350ra_spill_interval_root(struct ra_spill_interval *interval)
351{
352   struct ir3_reg_interval *ir3_interval = &interval->interval;
353   while (ir3_interval->parent)
354      ir3_interval = ir3_interval->parent;
355   return ir3_reg_interval_to_interval(ir3_interval);
356}
357
358static struct ra_spill_ctx *
359ir3_reg_ctx_to_ctx(struct ir3_reg_ctx *ctx)
360{
361   return rb_node_data(struct ra_spill_ctx, ctx, reg_ctx);
362}
363
364static int
365ra_spill_interval_cmp(const struct rb_node *_a, const struct rb_node *_b)
366{
367   const struct ra_spill_interval *a =
368      rb_node_data(const struct ra_spill_interval, _a, node);
369   const struct ra_spill_interval *b =
370      rb_node_data(const struct ra_spill_interval, _b, node);
371   return a->next_use_distance - b->next_use_distance;
372}
373
374static int
375ra_spill_interval_half_cmp(const struct rb_node *_a, const struct rb_node *_b)
376{
377   const struct ra_spill_interval *a =
378      rb_node_data(const struct ra_spill_interval, _a, half_node);
379   const struct ra_spill_interval *b =
380      rb_node_data(const struct ra_spill_interval, _b, half_node);
381   return a->next_use_distance - b->next_use_distance;
382}
383
384static void
385interval_add(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval)
386{
387   struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
388   struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
389
390   unsigned size = reg_size(interval->interval.reg);
391   if (interval->interval.reg->flags & IR3_REG_SHARED) {
392      ctx->cur_pressure.shared += size;
393   } else {
394      if (interval->interval.reg->flags & IR3_REG_HALF) {
395         ctx->cur_pressure.half += size;
396         if (ctx->spilling) {
397            rb_tree_insert(&ctx->half_live_intervals, &interval->half_node,
398                           ra_spill_interval_half_cmp);
399         }
400      }
401      if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) {
402         ctx->cur_pressure.full += size;
403         if (ctx->spilling) {
404            rb_tree_insert(&ctx->full_live_intervals, &interval->node,
405                           ra_spill_interval_cmp);
406         }
407      }
408   }
409}
410
411static void
412interval_delete(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval)
413{
414   struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
415   struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
416
417   unsigned size = reg_size(interval->interval.reg);
418   if (interval->interval.reg->flags & IR3_REG_SHARED) {
419      ctx->cur_pressure.shared -= size;
420   } else {
421      if (interval->interval.reg->flags & IR3_REG_HALF) {
422         ctx->cur_pressure.half -= size;
423         if (ctx->spilling) {
424            rb_tree_remove(&ctx->half_live_intervals, &interval->half_node);
425         }
426      }
427      if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) {
428         ctx->cur_pressure.full -= size;
429         if (ctx->spilling) {
430            rb_tree_remove(&ctx->full_live_intervals, &interval->node);
431         }
432      }
433   }
434}
435
436static void
437interval_readd(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_parent,
438               struct ir3_reg_interval *_child)
439{
440   interval_add(_ctx, _child);
441}
442
443static void
444spill_ctx_init(struct ra_spill_ctx *ctx, struct ir3_shader_variant *v,
445               struct ir3_liveness *live)
446{
447   ctx->live = live;
448   ctx->intervals = ralloc_array(ctx, struct ra_spill_interval *,
449                                 ctx->live->definitions_count);
450   struct ra_spill_interval *intervals =
451      rzalloc_array(ctx, struct ra_spill_interval,
452                    ctx->live->definitions_count);
453   for (unsigned i = 0; i < ctx->live->definitions_count; i++)
454      ctx->intervals[i] = &intervals[i];
455
456   ctx->intervals_count = ctx->live->definitions_count;
457   ctx->compiler = v->shader->compiler;
458   ctx->merged_regs = v->mergedregs;
459
460   rb_tree_init(&ctx->reg_ctx.intervals);
461   ctx->reg_ctx.interval_add = interval_add;
462   ctx->reg_ctx.interval_delete = interval_delete;
463   ctx->reg_ctx.interval_readd = interval_readd;
464}
465
466static void
467ra_spill_ctx_insert(struct ra_spill_ctx *ctx,
468                    struct ra_spill_interval *interval)
469{
470   ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval);
471}
472
473static void
474ra_spill_ctx_remove(struct ra_spill_ctx *ctx,
475                    struct ra_spill_interval *interval)
476{
477   ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval);
478}
479
480static void
481init_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
482{
483   struct ra_spill_interval *interval = ctx->intervals[dst->name];
484   ra_spill_interval_init(interval, dst);
485   if (ctx->spilling)
486      interval->next_use_distance = dst->next_use;
487}
488
489static void
490insert_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
491{
492   struct ra_spill_interval *interval = ctx->intervals[dst->name];
493   if (interval->interval.inserted)
494      return;
495
496   ra_spill_ctx_insert(ctx, interval);
497   interval->cant_spill = true;
498
499   /* For precolored inputs, make sure we leave enough registers to allow for
500    * holes in the inputs. It can happen that the binning shader has a lower
501    * register pressure than the main shader, but the main shader decided to
502    * add holes between the inputs which means that the binning shader has a
503    * higher register demand.
504    */
505   if (dst->instr->opc == OPC_META_INPUT && dst->num != INVALID_REG) {
506      physreg_t physreg = ra_reg_get_physreg(dst);
507      physreg_t max = physreg + reg_size(dst);
508
509      if (interval->interval.reg->flags & IR3_REG_SHARED)
510         ctx->max_pressure.shared = MAX2(ctx->max_pressure.shared, max);
511      else if (interval->interval.reg->flags & IR3_REG_HALF)
512         ctx->max_pressure.half = MAX2(ctx->max_pressure.half, max);
513      else
514         ctx->max_pressure.full = MAX2(ctx->max_pressure.full, max);
515   }
516}
517
518static void
519insert_src(struct ra_spill_ctx *ctx, struct ir3_register *src)
520{
521   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
522
523   if (!interval->interval.inserted) {
524      ra_spill_ctx_insert(ctx, interval);
525      interval->needs_reload = true;
526      interval->already_spilled = true;
527   }
528
529   ra_spill_interval_root(interval)->cant_spill = true;
530
531}
532
533static void
534remove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
535                 struct ir3_register *src)
536{
537   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
538
539   if (!interval->interval.inserted || interval->interval.parent ||
540       !rb_tree_is_empty(&interval->interval.children))
541      return;
542
543   ra_spill_ctx_remove(ctx, interval);
544}
545
546static void
547remove_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
548           struct ir3_register *src)
549{
550   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
551
552   if (!interval->interval.inserted)
553      return;
554
555   ra_spill_ctx_remove(ctx, interval);
556}
557
558static void
559finish_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
560{
561   struct ra_spill_interval *interval = ctx->intervals[dst->name];
562   interval->cant_spill = false;
563}
564
565static void
566remove_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
567{
568   struct ra_spill_interval *interval = ctx->intervals[dst->name];
569
570   if (!interval->interval.inserted)
571      return;
572
573   ra_spill_ctx_remove(ctx, interval);
574}
575
576static void
577update_src_next_use(struct ra_spill_ctx *ctx, struct ir3_register *src)
578{
579   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
580
581   assert(interval->interval.inserted);
582
583   interval->next_use_distance = src->next_use;
584
585   /* If this node is inserted in one of the trees, then it needs to be resorted
586    * as its key has changed.
587    */
588   if (!interval->interval.parent && !(src->flags & IR3_REG_SHARED)) {
589      if (src->flags & IR3_REG_HALF) {
590         rb_tree_remove(&ctx->half_live_intervals, &interval->half_node);
591         rb_tree_insert(&ctx->half_live_intervals, &interval->half_node,
592                        ra_spill_interval_half_cmp);
593      }
594      if (ctx->merged_regs || !(src->flags & IR3_REG_HALF)) {
595         rb_tree_remove(&ctx->full_live_intervals, &interval->node);
596         rb_tree_insert(&ctx->full_live_intervals, &interval->node,
597                        ra_spill_interval_cmp);
598      }
599   }
600}
601
602static unsigned
603get_spill_slot(struct ra_spill_ctx *ctx, struct ir3_register *reg)
604{
605   if (reg->merge_set) {
606      if (reg->merge_set->spill_slot == ~0) {
607         reg->merge_set->spill_slot = ALIGN_POT(ctx->spill_slot,
608                                                reg->merge_set->alignment);
609         ctx->spill_slot = reg->merge_set->spill_slot + reg->merge_set->size * 2;
610      }
611      return reg->merge_set->spill_slot + reg->merge_set_offset * 2;
612   } else {
613      if (reg->spill_slot == ~0) {
614         reg->spill_slot = ALIGN_POT(ctx->spill_slot, reg_elem_size(reg));
615         ctx->spill_slot = reg->spill_slot + reg_size(reg) * 2;
616      }
617      return reg->spill_slot;
618   }
619}
620
621static void
622set_src_val(struct ir3_register *src, const struct reg_or_immed *val)
623{
624   if (val->flags & IR3_REG_IMMED) {
625      src->flags = IR3_REG_IMMED | (val->flags & IR3_REG_HALF);
626      src->uim_val = val->uimm;
627      src->def = NULL;
628   } else if (val->flags & IR3_REG_CONST) {
629      src->flags = IR3_REG_CONST | (val->flags & IR3_REG_HALF);
630      src->num = val->const_num;
631      src->def = NULL;
632   } else {
633      src->def = val->def;
634   }
635}
636
637static struct ir3_register *
638materialize_pcopy_src(const struct reg_or_immed *src,
639                      struct ir3_instruction *instr,
640                      struct ir3_block *block)
641{
642   struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
643   struct ir3_register *dst = __ssa_dst(mov);
644   dst->flags |= src->flags & IR3_REG_HALF;
645   struct ir3_register *mov_src = ir3_src_create(mov, INVALID_REG, src->flags);
646   set_src_val(mov_src, src);
647   mov->cat1.src_type = mov->cat1.dst_type =
648      (src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
649
650   if (instr)
651      ir3_instr_move_before(mov, instr);
652   return dst;
653}
654
655static void
656spill(struct ra_spill_ctx *ctx, const struct reg_or_immed *val,
657      unsigned spill_slot, struct ir3_instruction *instr, struct ir3_block *block)
658{
659   struct ir3_register *reg;
660
661   /* If spilling an immed/const pcopy src, we need to actually materialize it
662    * first with a mov.
663    */
664   if (val->flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
665      reg = materialize_pcopy_src(val, instr, block);
666   } else {
667      reg = val->def;
668   }
669
670   d("spilling ssa_%u:%u to %u", reg->instr->serialno, reg->name,
671     spill_slot);
672
673   unsigned elems = reg_elems(reg);
674   struct ir3_instruction *spill =
675      ir3_instr_create(block, OPC_SPILL_MACRO, 0, 3);
676   ir3_src_create(spill, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg;
677   unsigned src_flags = reg->flags & (IR3_REG_HALF | IR3_REG_IMMED |
678                                      IR3_REG_CONST | IR3_REG_SSA |
679                                      IR3_REG_ARRAY);
680   struct ir3_register *src = ir3_src_create(spill, INVALID_REG, src_flags);
681   ir3_src_create(spill, INVALID_REG, IR3_REG_IMMED)->uim_val = elems;
682   spill->cat6.dst_offset = spill_slot;
683   spill->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
684
685   src->def = reg;
686   if (reg->flags & IR3_REG_ARRAY) {
687      src->size = reg->size;
688      src->array.id = reg->array.id;
689      src->array.offset = 0;
690   } else {
691      src->wrmask = reg->wrmask;
692   }
693
694   if (instr)
695      ir3_instr_move_before(spill, instr);
696}
697
698static void
699spill_interval(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval,
700               struct ir3_instruction *instr, struct ir3_block *block)
701{
702   spill(ctx, &interval->dst, get_spill_slot(ctx, interval->interval.reg),
703         instr, block);
704}
705
706/* This is similar to "limit" in the paper. */
707static void
708limit(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
709{
710   if (ctx->cur_pressure.half > ctx->limit_pressure.half) {
711      d("cur half pressure %u exceeds %u", ctx->cur_pressure.half,
712        ctx->limit_pressure.half);
713      rb_tree_foreach_safe (struct ra_spill_interval, interval,
714                            &ctx->half_live_intervals, half_node) {
715         d("trying ssa_%u:%u", interval->interval.reg->instr->serialno,
716           interval->interval.reg->name);
717         if (!interval->cant_spill) {
718            if (!interval->already_spilled)
719               spill_interval(ctx, interval, instr, instr->block);
720            ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
721            if (ctx->cur_pressure.half <= ctx->limit_pressure.half)
722               break;
723         }
724      }
725
726      assert(ctx->cur_pressure.half <= ctx->limit_pressure.half);
727   }
728
729   if (ctx->cur_pressure.full > ctx->limit_pressure.full) {
730      d("cur full pressure %u exceeds %u", ctx->cur_pressure.full,
731        ctx->limit_pressure.full);
732      rb_tree_foreach_safe (struct ra_spill_interval, interval,
733                            &ctx->full_live_intervals, node) {
734         d("trying ssa_%u:%u", interval->interval.reg->instr->serialno,
735           interval->interval.reg->name);
736         if (!interval->cant_spill) {
737            if (!interval->already_spilled)
738               spill_interval(ctx, interval, instr, instr->block);
739            ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
740            if (ctx->cur_pressure.full <= ctx->limit_pressure.full)
741               break;
742         } else {
743            d("can't spill");
744         }
745      }
746
747      assert(ctx->cur_pressure.full <= ctx->limit_pressure.full);
748   }
749}
750
751/* There's a corner case where we reload a value which has overlapping live
752 * values already reloaded, either because it's the child of some other interval
753 * that was already reloaded or some of its children have already been
754 * reloaded. Because RA only expects overlapping source/dest intervals for meta
755 * instructions (split/collect), and we don't want to add register pressure by
756 * creating an entirely separate value, we need to add splits and collects to
757 * deal with this case. These splits/collects have to also have correct merge
758 * set information, so that it doesn't result in any actual code or register
759 * pressure in practice.
760 */
761
762static void
763add_to_merge_set(struct ir3_merge_set *set, struct ir3_register *def,
764                 unsigned offset)
765{
766   def->merge_set = set;
767   def->merge_set_offset = offset;
768   def->interval_start = set->interval_start + offset;
769   def->interval_end = set->interval_start + offset + reg_size(def);
770}
771
772static struct ir3_register *
773split(struct ir3_register *def, unsigned offset,
774      struct ir3_instruction *after, struct ir3_block *block)
775{
776   if (reg_elems(def) == 1) {
777      assert(offset == 0);
778      return def;
779   }
780
781   assert(!(def->flags & IR3_REG_ARRAY));
782   assert(def->merge_set);
783   struct ir3_instruction *split =
784      ir3_instr_create(after->block, OPC_META_SPLIT, 1, 1);
785   struct ir3_register *dst = __ssa_dst(split);
786   dst->flags |= def->flags & IR3_REG_HALF;
787   struct ir3_register *src = ir3_src_create(split, INVALID_REG, def->flags);
788   src->wrmask = def->wrmask;
789   src->def = def;
790   add_to_merge_set(def->merge_set, dst,
791                    def->merge_set_offset + offset * reg_elem_size(def));
792   if (after)
793      ir3_instr_move_before(split, after);
794   return dst;
795}
796
797static struct ir3_register *
798extract(struct ir3_register *parent_def, unsigned offset, unsigned elems,
799        struct ir3_instruction *after, struct ir3_block *block)
800{
801   if (offset == 0 && elems == reg_elems(parent_def))
802      return parent_def;
803
804   struct ir3_instruction *collect =
805      ir3_instr_create(after->block, OPC_META_COLLECT, 1, elems);
806   struct ir3_register *dst = __ssa_dst(collect);
807   dst->flags |= parent_def->flags & IR3_REG_HALF;
808   dst->wrmask = MASK(elems);
809   add_to_merge_set(parent_def->merge_set, dst, parent_def->merge_set_offset);
810
811   for (unsigned i = 0; i < elems; i++) {
812      ir3_src_create(collect, INVALID_REG, parent_def->flags)->def =
813         split(parent_def, offset + i, after, block);
814   }
815
816   if (after)
817      ir3_instr_move_before(collect, after);
818   return dst;
819}
820
821static struct ir3_register *
822reload(struct ra_spill_ctx *ctx, struct ir3_register *reg,
823       struct ir3_instruction *after, struct ir3_block *block)
824{
825   unsigned spill_slot = get_spill_slot(ctx, reg);
826
827   d("reloading ssa_%u:%u from %u", reg->instr->serialno, reg->name,
828     spill_slot);
829
830   unsigned elems = reg_elems(reg);
831   struct ir3_instruction *reload =
832      ir3_instr_create(block, OPC_RELOAD_MACRO, 1, 3);
833   struct ir3_register *dst = __ssa_dst(reload);
834   dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
835   ir3_src_create(reload, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg;
836   struct ir3_register *offset_reg =
837      ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED);
838   offset_reg->uim_val = spill_slot;
839   ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED)->uim_val = elems;
840   reload->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
841
842   if (reg->flags & IR3_REG_ARRAY) {
843      dst->array.offset = 0;
844      dst->array.id = reg->array.id;
845      dst->size = reg->size;
846   } else {
847      dst->wrmask = MASK(elems);
848   }
849
850   dst->merge_set = reg->merge_set;
851   dst->merge_set_offset = reg->merge_set_offset;
852   dst->interval_start = reg->interval_start;
853   dst->interval_end = reg->interval_end;
854
855   if (after)
856      ir3_instr_move_before(reload, after);
857
858   return dst;
859}
860
861static void
862rewrite_src_interval(struct ra_spill_ctx *ctx,
863                    struct ra_spill_interval *interval,
864                    struct ir3_register *def,
865                    struct ir3_instruction *instr,
866                    struct ir3_block *block)
867{
868   interval->dst.flags = def->flags;
869   interval->dst.def = def;
870   interval->needs_reload = false;
871
872   rb_tree_foreach (struct ra_spill_interval, child,
873                    &interval->interval.children, interval.node) {
874      struct ir3_register *child_reg = child->interval.reg;
875      struct ir3_register *child_def =
876         extract(def, (child_reg->interval_start -
877                       interval->interval.reg->interval_start) / reg_elem_size(def),
878                 reg_elems(child_reg), instr, block);
879      rewrite_src_interval(ctx, child, child_def, instr, block);
880   }
881}
882
883static void
884reload_def(struct ra_spill_ctx *ctx, struct ir3_register *def,
885           struct ir3_instruction *instr, struct ir3_block *block)
886{
887   unsigned elems = reg_elems(def);
888   struct ra_spill_interval *interval = ctx->intervals[def->name];
889
890   struct ir3_reg_interval *ir3_parent = interval->interval.parent;
891
892   if (ir3_parent) {
893      struct ra_spill_interval *parent =
894         ir3_reg_interval_to_interval(ir3_parent);
895      if (!parent->needs_reload) {
896         interval->dst.flags = def->flags;
897         interval->dst.def = extract(
898            parent->dst.def, (def->interval_start - parent->dst.def->interval_start) /
899            reg_elem_size(def), elems, instr, block);
900         return;
901      }
902   }
903
904   struct ir3_register *dst = reload(ctx, def, instr, block);
905
906   rewrite_src_interval(ctx, interval, dst, instr, block);
907}
908
909static void
910reload_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
911            struct ir3_register *src)
912{
913   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
914
915   if (interval->needs_reload) {
916      reload_def(ctx, src->def, instr, instr->block);
917   }
918
919   ra_spill_interval_root(interval)->cant_spill = false;
920}
921
922static void
923rewrite_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
924            struct ir3_register *src)
925{
926   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
927
928   set_src_val(src, &interval->dst);
929}
930
931static void
932update_max_pressure(struct ra_spill_ctx *ctx)
933{
934   d("pressure:");
935   d("\tfull: %u", ctx->cur_pressure.full);
936   d("\thalf: %u", ctx->cur_pressure.half);
937   d("\tshared: %u", ctx->cur_pressure.shared);
938
939   ctx->max_pressure.full =
940      MAX2(ctx->max_pressure.full, ctx->cur_pressure.full);
941   ctx->max_pressure.half =
942      MAX2(ctx->max_pressure.half, ctx->cur_pressure.half);
943   ctx->max_pressure.shared =
944      MAX2(ctx->max_pressure.shared, ctx->cur_pressure.shared);
945}
946
947static void
948handle_instr(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
949{
950   ra_foreach_dst (dst, instr) {
951      init_dst(ctx, dst);
952   }
953
954   if (ctx->spilling) {
955      ra_foreach_src (src, instr)
956         insert_src(ctx, src);
957   }
958
959   /* Handle tied destinations. If a destination is tied to a source and that
960    * source is live-through, then we need to allocate a new register for the
961    * destination which is live-through itself and cannot overlap the
962    * sources.
963    */
964
965   ra_foreach_dst (dst, instr) {
966      struct ir3_register *tied_src = dst->tied;
967      if (tied_src && !(tied_src->flags & IR3_REG_FIRST_KILL))
968         insert_dst(ctx, dst);
969   }
970
971   if (ctx->spilling)
972      limit(ctx, instr);
973   else
974      update_max_pressure(ctx);
975
976   if (ctx->spilling) {
977      ra_foreach_src (src, instr) {
978         reload_src(ctx, instr, src);
979         update_src_next_use(ctx, src);
980      }
981   }
982
983   ra_foreach_src (src, instr) {
984      if (src->flags & IR3_REG_FIRST_KILL)
985         remove_src_early(ctx, instr, src);
986   }
987
988   ra_foreach_dst (dst, instr) {
989      insert_dst(ctx, dst);
990   }
991
992   if (ctx->spilling)
993      limit(ctx, instr);
994   else
995      update_max_pressure(ctx);
996
997   /* We have to remove sources before rewriting them so that we can lookup the
998    * interval to remove before the source itself is changed.
999    */
1000   ra_foreach_src (src, instr) {
1001      if (src->flags & IR3_REG_FIRST_KILL)
1002         remove_src(ctx, instr, src);
1003   }
1004
1005   if (ctx->spilling) {
1006      ra_foreach_src (src, instr) {
1007         rewrite_src(ctx, instr, src);
1008      }
1009   }
1010
1011   ra_foreach_dst (dst, instr) {
1012      finish_dst(ctx, dst);
1013   }
1014
1015   for (unsigned i = 0; i < instr->dsts_count; i++) {
1016      if (ra_reg_is_dst(instr->dsts[i]) &&
1017          (instr->dsts[i]->flags & IR3_REG_UNUSED))
1018         remove_dst(ctx, instr->dsts[i]);
1019   }
1020}
1021
1022static struct ra_spill_interval *
1023create_temp_interval(struct ra_spill_ctx *ctx, struct ir3_register *def)
1024{
1025   unsigned name = ctx->intervals_count++;
1026   unsigned offset = ctx->live->interval_offset;
1027
1028   /* This is kinda hacky, but we need to create a fake SSA def here that is
1029    * only used as part of the pcopy accounting. See below.
1030    */
1031   struct ir3_register *reg = rzalloc(ctx, struct ir3_register);
1032   *reg = *def;
1033   reg->name = name;
1034   reg->interval_start = offset;
1035   reg->interval_end = offset + reg_size(def);
1036   reg->merge_set = NULL;
1037
1038   ctx->intervals = reralloc(ctx, ctx->intervals, struct ra_spill_interval *,
1039                             ctx->intervals_count);
1040   struct ra_spill_interval *interval = rzalloc(ctx, struct ra_spill_interval);
1041   ra_spill_interval_init(interval, reg);
1042   ctx->intervals[name] = interval;
1043   ctx->live->interval_offset += reg_size(def);
1044   return interval;
1045}
1046
1047/* In the sequence of copies generated (see below), would this source be killed?
1048 */
1049static bool
1050is_last_pcopy_src(struct ir3_instruction *pcopy, unsigned src_n)
1051{
1052   struct ir3_register *src = pcopy->srcs[src_n];
1053   if (!(src->flags & IR3_REG_KILL))
1054      return false;
1055   for (unsigned j = src_n + 1; j < pcopy->srcs_count; j++) {
1056      if (pcopy->srcs[j]->def == src->def)
1057         return false;
1058   }
1059   return true;
1060}
1061
1062/* Parallel copies are different from normal instructions. The sources together
1063 * may be larger than the entire register file, so we cannot just reload every
1064 * source like normal, and indeed that probably wouldn't be a great idea.
1065 * Instead we essentially need to lower the parallel copy to "copies," just like
1066 * in the normal CSSA construction, although we implement the copies by
1067 * reloading and then possibly spilling values. We essentially just shuffle
1068 * around the sources until each source either (a) is live or (b) has the same
1069 * spill slot as its corresponding destination. We do this by decomposing the
1070 * copy into a series of copies, so:
1071 *
1072 * a, b, c = d, e, f
1073 *
1074 * becomes:
1075 *
1076 * d' = d
1077 * e' = e
1078 * f' = f
1079 * a = d'
1080 * b = e'
1081 * c = f'
1082 *
1083 * the temporary SSA values d', e', and f' never actually show up in the result.
1084 * They are only used for our internal accounting. They may, however, have their
1085 * own spill slot created for them. Similarly, we don't actually emit any copy
1086 * instructions, although we emit the spills/reloads that *would've* been
1087 * required if those copies were there.
1088 *
1089 * TODO: in order to reduce the number of temporaries and therefore spill slots,
1090 * we could instead do a more complicated analysis that considers the location
1091 * transfer graph.
1092 *
1093 * In addition, we actually remove the parallel copy and rewrite all its uses
1094 * (in the phi nodes) rather than rewrite its sources at the end. Recreating it
1095 * later turns out to be easier than keeping it up-to-date throughout this pass,
1096 * since we may have to remove entries for phi sources that are spilled and add
1097 * entries for live-outs that are spilled and reloaded, which can happen here
1098 * and then possibly be undone or done again when processing live-ins of the
1099 * successor block.
1100 */
1101
1102static void
1103handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy)
1104{
1105   foreach_dst (dst, pcopy) {
1106      struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
1107      ra_spill_interval_init(dst_interval, dst);
1108   }
1109
1110   foreach_src_n (src, i, pcopy) {
1111      d("processing src %u", i);
1112      struct ir3_register *dst = pcopy->dsts[i];
1113
1114      /* Skip the intermediate copy for cases where the source is merged with
1115       * the destination. Crucially this means that we also don't reload/spill
1116       * it if it's been spilled, because it shares the same spill slot.
1117       */
1118      if (src->def && src->def->merge_set &&
1119          src->def->merge_set == dst->merge_set &&
1120          src->def->merge_set_offset == dst->merge_set_offset) {
1121         struct ra_spill_interval *src_interval = ctx->intervals[src->def->name];
1122         struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
1123         if (src_interval->interval.inserted) {
1124            update_src_next_use(ctx, src);
1125            if (is_last_pcopy_src(pcopy, i))
1126               ra_spill_ctx_remove(ctx, src_interval);
1127            dst_interval->cant_spill = true;
1128            ra_spill_ctx_insert(ctx, dst_interval);
1129            limit(ctx, pcopy);
1130            dst_interval->cant_spill = false;
1131            dst_interval->dst = src_interval->dst;
1132         }
1133      } else if (src->def) {
1134         struct ra_spill_interval *temp_interval =
1135            create_temp_interval(ctx, dst);
1136         struct ir3_register *temp = temp_interval->interval.reg;
1137         temp_interval->next_use_distance = src->next_use;
1138
1139         insert_src(ctx, src);
1140         limit(ctx, pcopy);
1141         reload_src(ctx, pcopy, src);
1142         update_src_next_use(ctx, src);
1143         if (is_last_pcopy_src(pcopy, i))
1144            remove_src(ctx, pcopy, src);
1145         struct ra_spill_interval *src_interval =
1146            ctx->intervals[src->def->name];
1147         temp_interval->dst = src_interval->dst;
1148
1149         temp_interval->cant_spill = true;
1150         ra_spill_ctx_insert(ctx, temp_interval);
1151         limit(ctx, pcopy);
1152         temp_interval->cant_spill = false;
1153
1154         src->flags = temp->flags;
1155         src->def = temp;
1156      }
1157   }
1158
1159   d("done with pcopy srcs");
1160
1161   foreach_src_n (src, i, pcopy) {
1162      struct ir3_register *dst = pcopy->dsts[i];
1163
1164      if (src->def && src->def->merge_set &&
1165          src->def->merge_set == dst->merge_set &&
1166          src->def->merge_set_offset == dst->merge_set_offset)
1167         continue;
1168
1169      struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
1170
1171      if (!src->def) {
1172         dst_interval->cant_spill = true;
1173         ra_spill_ctx_insert(ctx, dst_interval);
1174         limit(ctx, pcopy);
1175         dst_interval->cant_spill = false;
1176
1177         assert(src->flags & (IR3_REG_CONST | IR3_REG_IMMED));
1178         if (src->flags & IR3_REG_CONST) {
1179            dst_interval->dst.flags = src->flags;
1180            dst_interval->dst.const_num = src->num;
1181         } else {
1182            dst_interval->dst.flags = src->flags;
1183            dst_interval->dst.uimm = src->uim_val;
1184         }
1185      } else {
1186         struct ra_spill_interval *temp_interval = ctx->intervals[src->def->name];
1187
1188         insert_src(ctx, src);
1189         limit(ctx, pcopy);
1190         reload_src(ctx, pcopy, src);
1191         remove_src(ctx, pcopy, src);
1192
1193         dst_interval->dst = temp_interval->dst;
1194         ra_spill_ctx_insert(ctx, dst_interval);
1195      }
1196   }
1197
1198   pcopy->flags |= IR3_INSTR_UNUSED;
1199}
1200
1201static void
1202handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
1203{
1204   init_dst(ctx, instr->dsts[0]);
1205   insert_dst(ctx, instr->dsts[0]);
1206   finish_dst(ctx, instr->dsts[0]);
1207}
1208
1209static void
1210remove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
1211{
1212   if (instr->opc == OPC_META_TEX_PREFETCH) {
1213      ra_foreach_src (src, instr)
1214         remove_src(ctx, instr, src);
1215   }
1216   if (instr->dsts[0]->flags & IR3_REG_UNUSED)
1217      remove_dst(ctx, instr->dsts[0]);
1218}
1219
1220static void
1221handle_live_in(struct ra_spill_ctx *ctx, struct ir3_block *block,
1222               struct ir3_register *def)
1223{
1224   struct ra_spill_interval *interval = ctx->intervals[def->name];
1225   ra_spill_interval_init(interval, def);
1226   if (ctx->spilling) {
1227      interval->next_use_distance =
1228         ctx->blocks[block->index].next_use_start[def->name];
1229   }
1230
1231   ra_spill_ctx_insert(ctx, interval);
1232}
1233
1234static bool
1235is_live_in_phi(struct ir3_register *def, struct ir3_block *block)
1236{
1237   return def->instr->opc == OPC_META_PHI && def->instr->block == block;
1238}
1239
1240static bool
1241is_live_in_pred(struct ra_spill_ctx *ctx, struct ir3_register *def,
1242                struct ir3_block *block, unsigned pred_idx)
1243{
1244   struct ir3_block *pred = block->predecessors[pred_idx];
1245   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1246   if (is_live_in_phi(def, block)) {
1247      def = def->instr->srcs[pred_idx]->def;
1248      if (!def)
1249         return false;
1250   }
1251
1252   return _mesa_hash_table_search(state->remap, def);
1253}
1254
1255static bool
1256is_live_in_undef(struct ir3_register *def,
1257                 struct ir3_block *block, unsigned pred_idx)
1258{
1259   if (!is_live_in_phi(def, block))
1260      return false;
1261
1262   return !def->instr->srcs[pred_idx]->def;
1263}
1264
1265static struct reg_or_immed *
1266read_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
1267             struct ir3_block *block, unsigned pred_idx)
1268{
1269   struct ir3_block *pred = block->predecessors[pred_idx];
1270   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1271
1272   if (is_live_in_phi(def, block)) {
1273      def = def->instr->srcs[pred_idx]->def;
1274      if (!def)
1275         return NULL;
1276   }
1277
1278   struct hash_entry *entry = _mesa_hash_table_search(state->remap, def);
1279   if (entry)
1280      return entry->data;
1281   else
1282      return NULL;
1283}
1284
1285static bool
1286is_live_in_all_preds(struct ra_spill_ctx *ctx, struct ir3_register *def,
1287                     struct ir3_block *block)
1288{
1289   for (unsigned i = 0; i < block->predecessors_count; i++) {
1290      if (!is_live_in_pred(ctx, def, block, i))
1291         return false;
1292   }
1293
1294   return true;
1295}
1296
1297static void
1298spill_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
1299              struct ir3_block *block)
1300{
1301   for (unsigned i = 0; i < block->predecessors_count; i++) {
1302      struct ir3_block *pred = block->predecessors[i];
1303      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1304
1305      if (!state->visited)
1306         continue;
1307
1308      struct reg_or_immed *pred_def = read_live_in(ctx, def, block, i);
1309      if (pred_def) {
1310         spill(ctx, pred_def, get_spill_slot(ctx, def), NULL, pred);
1311      }
1312   }
1313}
1314
1315static void
1316spill_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block)
1317{
1318   bool all_preds_visited = true;
1319   for (unsigned i = 0; i < block->predecessors_count; i++) {
1320      struct ir3_block *pred = block->predecessors[i];
1321      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1322      if (!state->visited) {
1323         all_preds_visited = false;
1324         break;
1325      }
1326   }
1327
1328   /* Note: in the paper they explicitly spill live-through values first, but we
1329    * should be doing that automatically by virtue of picking the largest
1330    * distance due to the extra distance added to edges out of loops.
1331    *
1332    * TODO: Keep track of pressure in each block and preemptively spill
1333    * live-through values as described in the paper to avoid spilling them
1334    * inside the loop.
1335    */
1336
1337   if (ctx->cur_pressure.half > ctx->limit_pressure.half) {
1338      rb_tree_foreach_safe (struct ra_spill_interval, interval,
1339                            &ctx->half_live_intervals, half_node) {
1340         if (all_preds_visited &&
1341             is_live_in_all_preds(ctx, interval->interval.reg, block))
1342            continue;
1343         spill_live_in(ctx, interval->interval.reg, block);
1344         ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
1345         if (ctx->cur_pressure.half <= ctx->limit_pressure.half)
1346            break;
1347      }
1348   }
1349
1350   if (ctx->cur_pressure.full > ctx->limit_pressure.full) {
1351      rb_tree_foreach_safe (struct ra_spill_interval, interval,
1352                            &ctx->full_live_intervals, node) {
1353         if (all_preds_visited &&
1354             is_live_in_all_preds(ctx, interval->interval.reg, block))
1355            continue;
1356         spill_live_in(ctx, interval->interval.reg, block);
1357         ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
1358         if (ctx->cur_pressure.full <= ctx->limit_pressure.full)
1359            break;
1360      }
1361   }
1362}
1363
1364static void
1365live_in_rewrite(struct ra_spill_ctx *ctx,
1366                struct ra_spill_interval *interval,
1367                struct reg_or_immed *new_val,
1368                struct ir3_block *block, unsigned pred_idx)
1369{
1370   struct ir3_block *pred = block->predecessors[pred_idx];
1371   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1372   struct ir3_register *def = interval->interval.reg;
1373   if (is_live_in_phi(def, block)) {
1374      def = def->instr->srcs[pred_idx]->def;
1375   }
1376
1377   if (def)
1378      _mesa_hash_table_insert(state->remap, def, new_val);
1379
1380   rb_tree_foreach (struct ra_spill_interval, child,
1381                    &interval->interval.children, interval.node) {
1382      assert(new_val->flags & IR3_REG_SSA);
1383      struct ir3_register *child_def =
1384         extract(new_val->def,
1385                 (child->interval.reg->interval_start - def->interval_start) /
1386                 reg_elem_size(def), reg_elems(child->interval.reg),
1387                 NULL, pred);
1388      struct reg_or_immed *child_val = ralloc(ctx, struct reg_or_immed);
1389      child_val->def = child_def;
1390      child_val->flags = child_def->flags;
1391      live_in_rewrite(ctx, child, child_val, block, pred_idx);
1392   }
1393}
1394
1395static void
1396reload_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
1397               struct ir3_block *block)
1398{
1399   struct ra_spill_interval *interval = ctx->intervals[def->name];
1400   for (unsigned i = 0; i < block->predecessors_count; i++) {
1401      struct ir3_block *pred = block->predecessors[i];
1402      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1403      if (!state->visited)
1404         continue;
1405
1406      if (is_live_in_undef(def, block, i))
1407         continue;
1408
1409      struct reg_or_immed *new_val = read_live_in(ctx, def, block, i);
1410
1411      if (!new_val) {
1412         new_val = ralloc(ctx, struct reg_or_immed);
1413         new_val->def = reload(ctx, def, NULL, pred);
1414         new_val->flags = new_val->def->flags;
1415      }
1416      live_in_rewrite(ctx, interval, new_val, block, i);
1417   }
1418}
1419
1420static void
1421reload_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block)
1422{
1423   rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals,
1424                    interval.node) {
1425      reload_live_in(ctx, interval->interval.reg, block);
1426   }
1427}
1428
1429static void
1430add_live_in_phi(struct ra_spill_ctx *ctx, struct ir3_register *def,
1431                struct ir3_block *block)
1432{
1433   struct ra_spill_interval *interval = ctx->intervals[def->name];
1434   if (!interval->interval.inserted)
1435      return;
1436
1437   bool needs_phi = false;
1438   struct ir3_register *cur_def = NULL;
1439   for (unsigned i = 0; i < block->predecessors_count; i++) {
1440      struct ir3_block *pred = block->predecessors[i];
1441      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1442
1443      if (!state->visited) {
1444         needs_phi = true;
1445         break;
1446      }
1447
1448      struct hash_entry *entry =
1449         _mesa_hash_table_search(state->remap, def);
1450      assert(entry);
1451      struct reg_or_immed *pred_val = entry->data;
1452      if ((pred_val->flags & (IR3_REG_IMMED | IR3_REG_CONST)) ||
1453          !pred_val->def ||
1454          (cur_def && cur_def != pred_val->def)) {
1455         needs_phi = true;
1456         break;
1457      }
1458      cur_def = pred_val->def;
1459   }
1460
1461   if (!needs_phi) {
1462      interval->dst.def = cur_def;
1463      interval->dst.flags = cur_def->flags;
1464      return;
1465   }
1466
1467   struct ir3_instruction *phi =
1468      ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
1469   struct ir3_register *dst = __ssa_dst(phi);
1470   dst->flags |= def->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
1471   dst->size = def->size;
1472   dst->wrmask = def->wrmask;
1473
1474   dst->interval_start = def->interval_start;
1475   dst->interval_end = def->interval_end;
1476   dst->merge_set = def->merge_set;
1477   dst->merge_set_offset = def->merge_set_offset;
1478
1479   for (unsigned i = 0; i < block->predecessors_count; i++) {
1480      struct ir3_block *pred = block->predecessors[i];
1481      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1482      struct ir3_register *src = ir3_src_create(phi, INVALID_REG, dst->flags);
1483      src->size = def->size;
1484      src->wrmask = def->wrmask;
1485
1486      if (state->visited) {
1487         struct hash_entry *entry =
1488            _mesa_hash_table_search(state->remap, def);
1489         assert(entry);
1490         struct reg_or_immed *new_val = entry->data;
1491         set_src_val(src, new_val);
1492      } else {
1493         src->def = def;
1494      }
1495   }
1496
1497   interval->dst.def = dst;
1498   interval->dst.flags = dst->flags;
1499
1500   ir3_instr_move_before_block(phi, block);
1501}
1502
1503/* When spilling a block with a single predecessors, the pred may have other
1504 * successors so we can't choose what's live in and we can't spill/restore
1505 * anything. Just make the inserted intervals exactly match the predecessor. If
1506 * it wasn't live in the predecessor then it must've already been spilled. Also,
1507 * there are no phi nodes and no live-ins.
1508 */
1509static void
1510spill_single_pred_live_in(struct ra_spill_ctx *ctx,
1511                          struct ir3_block *block)
1512{
1513   unsigned name;
1514   BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
1515                       ctx->live->definitions_count) {
1516      struct ir3_register *reg = ctx->live->definitions[name];
1517      struct ra_spill_interval *interval = ctx->intervals[reg->name];
1518      struct reg_or_immed *val = read_live_in(ctx, reg, block, 0);
1519      if (val)
1520         interval->dst = *val;
1521      else
1522         ra_spill_ctx_remove(ctx, interval);
1523   }
1524}
1525
1526static void
1527rewrite_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *phi,
1528            struct ir3_block *block)
1529{
1530   if (!ctx->intervals[phi->dsts[0]->name]->interval.inserted) {
1531      phi->flags |= IR3_INSTR_UNUSED;
1532      return;
1533   }
1534
1535   for (unsigned i = 0; i < block->predecessors_count; i++) {
1536      struct ir3_block *pred = block->predecessors[i];
1537      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1538
1539      if (!state->visited)
1540         continue;
1541
1542      struct ir3_register *src = phi->srcs[i];
1543      if (!src->def)
1544         continue;
1545
1546      struct hash_entry *entry =
1547         _mesa_hash_table_search(state->remap, src->def);
1548      assert(entry);
1549      struct reg_or_immed *new_val = entry->data;
1550      set_src_val(src, new_val);
1551   }
1552}
1553
1554static void
1555spill_live_out(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval,
1556               struct ir3_block *block)
1557{
1558   struct ir3_register *def = interval->interval.reg;
1559
1560   spill(ctx, &interval->dst, get_spill_slot(ctx, def), NULL, block);
1561   ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
1562}
1563
1564static void
1565spill_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
1566{
1567   struct ra_spill_block_state *state = &ctx->blocks[block->index];
1568   rb_tree_foreach_safe (struct ra_spill_interval, interval,
1569                         &ctx->reg_ctx.intervals, interval.node) {
1570      if (!BITSET_TEST(state->live_out, interval->interval.reg->name)) {
1571         spill_live_out(ctx, interval, block);
1572      }
1573   }
1574}
1575
1576static void
1577reload_live_out(struct ra_spill_ctx *ctx, struct ir3_register *def,
1578                struct ir3_block *block)
1579{
1580   struct ra_spill_interval *interval = ctx->intervals[def->name];
1581   ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval);
1582
1583   reload_def(ctx, def, NULL, block);
1584}
1585
1586static void
1587reload_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
1588{
1589   struct ra_spill_block_state *state = &ctx->blocks[block->index];
1590   unsigned name;
1591   BITSET_FOREACH_SET (name, state->live_out, ctx->live->definitions_count) {
1592      struct ir3_register *reg = ctx->live->definitions[name];
1593      struct ra_spill_interval *interval = ctx->intervals[name];
1594      if (!interval->interval.inserted)
1595         reload_live_out(ctx, reg, block);
1596   }
1597}
1598
1599static void
1600update_live_out_phis(struct ra_spill_ctx *ctx, struct ir3_block *block)
1601{
1602   assert(!block->successors[1]);
1603   struct ir3_block *succ = block->successors[0];
1604   unsigned pred_idx = ir3_block_get_pred_index(succ, block);
1605
1606   foreach_instr (instr, &succ->instr_list) {
1607      if (instr->opc != OPC_META_PHI)
1608         break;
1609
1610      struct ir3_register *def = instr->srcs[pred_idx]->def;
1611      if (!def)
1612         continue;
1613
1614      struct ra_spill_interval *interval = ctx->intervals[def->name];
1615      if (!interval->interval.inserted)
1616         continue;
1617      set_src_val(instr->srcs[pred_idx], &interval->dst);
1618   }
1619}
1620
1621static void
1622record_pred_live_out(struct ra_spill_ctx *ctx,
1623                     struct ra_spill_interval *interval,
1624                     struct ir3_block *block, unsigned pred_idx)
1625{
1626   struct ir3_block *pred = block->predecessors[pred_idx];
1627   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1628
1629   struct ir3_register *def = interval->interval.reg;
1630   if (is_live_in_phi(def, block)) {
1631      def = def->instr->srcs[pred_idx]->def;
1632   }
1633   BITSET_SET(state->live_out, def->name);
1634
1635   rb_tree_foreach (struct ra_spill_interval, child,
1636                    &interval->interval.children, interval.node) {
1637      record_pred_live_out(ctx, child, block, pred_idx);
1638   }
1639}
1640
1641static void
1642record_pred_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
1643{
1644   for (unsigned i = 0; i < block->predecessors_count; i++) {
1645      struct ir3_block *pred = block->predecessors[i];
1646      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1647      if (state->visited)
1648         continue;
1649
1650      state->live_out = rzalloc_array(ctx, BITSET_WORD,
1651                                      BITSET_WORDS(ctx->live->definitions_count));
1652
1653
1654      rb_tree_foreach (struct ra_spill_interval, interval,
1655                       &ctx->reg_ctx.intervals, interval.node) {
1656         record_pred_live_out(ctx, interval, block, i);
1657      }
1658   }
1659}
1660
1661static void
1662record_live_out(struct ra_spill_ctx *ctx,
1663                struct ra_spill_block_state *state,
1664                struct ra_spill_interval *interval)
1665{
1666   if (!(interval->dst.flags & IR3_REG_SSA) ||
1667       interval->dst.def) {
1668      struct reg_or_immed *val = ralloc(ctx, struct reg_or_immed);
1669      *val = interval->dst;
1670      _mesa_hash_table_insert(state->remap, interval->interval.reg, val);
1671   }
1672   rb_tree_foreach (struct ra_spill_interval, child,
1673                    &interval->interval.children, interval.node) {
1674      record_live_out(ctx, state, child);
1675   }
1676}
1677
1678static void
1679record_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
1680{
1681   struct ra_spill_block_state *state = &ctx->blocks[block->index];
1682   state->remap = _mesa_pointer_hash_table_create(ctx);
1683
1684   rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals,
1685                    interval.node) {
1686      record_live_out(ctx, state, interval);
1687   }
1688}
1689
1690static void
1691handle_block(struct ra_spill_ctx *ctx, struct ir3_block *block)
1692{
1693   memset(&ctx->cur_pressure, 0, sizeof(ctx->cur_pressure));
1694   rb_tree_init(&ctx->reg_ctx.intervals);
1695   rb_tree_init(&ctx->full_live_intervals);
1696   rb_tree_init(&ctx->half_live_intervals);
1697
1698   unsigned name;
1699   BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
1700                       ctx->live->definitions_count) {
1701      struct ir3_register *reg = ctx->live->definitions[name];
1702      handle_live_in(ctx, block, reg);
1703   }
1704
1705   foreach_instr (instr, &block->instr_list) {
1706      if (instr->opc != OPC_META_PHI && instr->opc != OPC_META_INPUT &&
1707          instr->opc != OPC_META_TEX_PREFETCH)
1708         break;
1709      handle_input_phi(ctx, instr);
1710   }
1711
1712   if (ctx->spilling) {
1713      if (block->predecessors_count == 1) {
1714         spill_single_pred_live_in(ctx, block);
1715      } else {
1716         spill_live_ins(ctx, block);
1717         reload_live_ins(ctx, block);
1718         record_pred_live_outs(ctx, block);
1719         foreach_instr (instr, &block->instr_list) {
1720            if (instr->opc != OPC_META_PHI)
1721               break;
1722            rewrite_phi(ctx, instr, block);
1723         }
1724         BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
1725                             ctx->live->definitions_count) {
1726            struct ir3_register *reg = ctx->live->definitions[name];
1727            add_live_in_phi(ctx, reg, block);
1728         }
1729      }
1730   } else {
1731      update_max_pressure(ctx);
1732   }
1733
1734   foreach_instr (instr, &block->instr_list) {
1735      di(instr, "processing");
1736
1737      if (instr->opc == OPC_META_PHI || instr->opc == OPC_META_INPUT ||
1738          instr->opc == OPC_META_TEX_PREFETCH)
1739         remove_input_phi(ctx, instr);
1740      else if (ctx->spilling && instr->opc == OPC_META_PARALLEL_COPY)
1741         handle_pcopy(ctx, instr);
1742      else if (ctx->spilling && instr->opc == OPC_MOV &&
1743               instr->dsts[0] == ctx->base_reg)
1744         /* skip */;
1745      else
1746         handle_instr(ctx, instr);
1747   }
1748
1749   if (ctx->spilling && block->successors[0]) {
1750      struct ra_spill_block_state *state =
1751         &ctx->blocks[block->successors[0]->index];
1752      if (state->visited) {
1753         assert(!block->successors[1]);
1754
1755         spill_live_outs(ctx, block);
1756         reload_live_outs(ctx, block);
1757         update_live_out_phis(ctx, block);
1758      }
1759   }
1760
1761   if (ctx->spilling) {
1762      record_live_outs(ctx, block);
1763      ctx->blocks[block->index].visited = true;
1764   }
1765}
1766
1767static bool
1768simplify_phi_node(struct ir3_instruction *phi)
1769{
1770   struct ir3_register *def = NULL;
1771   foreach_src (src, phi) {
1772      /* Ignore phi sources which point to the phi itself. */
1773      if (src->def == phi->dsts[0])
1774         continue;
1775      /* If it's undef or it doesn't match the previous sources, bail */
1776      if (!src->def || (def && def != src->def))
1777         return false;
1778      def = src->def;
1779   }
1780
1781   phi->data = def;
1782   phi->flags |= IR3_INSTR_UNUSED;
1783   return true;
1784}
1785
1786static struct ir3_register *
1787simplify_phi_def(struct ir3_register *def)
1788{
1789   if (def->instr->opc == OPC_META_PHI) {
1790      struct ir3_instruction *phi = def->instr;
1791
1792      /* Note: this function is always called at least once after visiting the
1793       * phi, so either there has been a simplified phi in the meantime, in
1794       * which case we will set progress=true and visit the definition again, or
1795       * phi->data already has the most up-to-date value. Therefore we don't
1796       * have to recursively check phi->data.
1797       */
1798      if (phi->data)
1799         return phi->data;
1800   }
1801
1802   return def;
1803}
1804
1805static void
1806simplify_phi_srcs(struct ir3_instruction *instr)
1807{
1808   foreach_src (src, instr) {
1809      if (src->def)
1810         src->def = simplify_phi_def(src->def);
1811   }
1812}
1813
1814/* We insert phi nodes for all live-ins of loops in case we need to split the
1815 * live range. This pass cleans that up for the case where the live range didn't
1816 * actually need to be split.
1817 */
1818static void
1819simplify_phi_nodes(struct ir3 *ir)
1820{
1821   foreach_block (block, &ir->block_list) {
1822      foreach_instr (instr, &block->instr_list) {
1823         if (instr->opc != OPC_META_PHI)
1824            break;
1825         instr->data = NULL;
1826      }
1827   }
1828
1829   bool progress;
1830   do {
1831      progress = false;
1832      foreach_block (block, &ir->block_list) {
1833         foreach_instr (instr, &block->instr_list) {
1834            if (instr->opc == OPC_META_PHI || (instr->flags & IR3_INSTR_UNUSED))
1835               continue;
1836
1837            simplify_phi_srcs(instr);
1838         }
1839
1840         /* Visit phi nodes in the sucessors to make sure that phi sources are
1841          * always visited at least once after visiting the definition they
1842          * point to. See note in simplify_phi_def() for why this is necessary.
1843          */
1844         for (unsigned i = 0; i < 2; i++) {
1845            struct ir3_block *succ = block->successors[i];
1846            if (!succ)
1847               continue;
1848            foreach_instr (instr, &succ->instr_list) {
1849               if (instr->opc != OPC_META_PHI)
1850                  break;
1851               if (instr->flags & IR3_INSTR_UNUSED) {
1852                  if (instr->data)
1853                     instr->data = simplify_phi_def(instr->data);
1854               } else {
1855                  simplify_phi_srcs(instr);
1856                  progress |= simplify_phi_node(instr);
1857               }
1858            }
1859         }
1860      }
1861   } while (progress);
1862}
1863
1864static void
1865unmark_dead(struct ir3 *ir)
1866{
1867   foreach_block (block, &ir->block_list) {
1868      foreach_instr (instr, &block->instr_list) {
1869         instr->flags &= ~IR3_INSTR_UNUSED;
1870      }
1871   }
1872}
1873
1874/* Simple pass to remove now-dead phi nodes and pcopy instructions. We mark
1875 * which ones are dead along the way, so there's nothing to compute here.
1876 */
1877static void
1878cleanup_dead(struct ir3 *ir)
1879{
1880   foreach_block (block, &ir->block_list) {
1881      foreach_instr_safe (instr, &block->instr_list) {
1882         if (instr->flags & IR3_INSTR_UNUSED)
1883            list_delinit(&instr->node);
1884      }
1885   }
1886}
1887
1888/* Deal with merge sets after spilling. Spilling generally leaves the merge sets
1889 * in a mess, and even if we properly cleaned up after ourselves, we would want
1890 * to recompute the merge sets afterward anway. That's because
1891 * spilling/reloading can "break up" phi webs and split/collect webs so that
1892 * allocating them to the same register no longer gives any benefit. For
1893 * example, imagine we have this:
1894 *
1895 * if (...) {
1896 *    foo = ...
1897 * } else {
1898 *    bar = ...
1899 * }
1900 * baz = phi(foo, bar)
1901 *
1902 * and we spill "baz":
1903 *
1904 * if (...) {
1905 *    foo = ...
1906 *    spill(foo)
1907 * } else {
1908 *    bar = ...
1909 *    spill(bar)
1910 * }
1911 * baz = reload()
1912 *
1913 * now foo, bar, and baz don't have to be allocated to the same register. How
1914 * exactly the merge sets change can be complicated, so it's easier just to
1915 * recompute them.
1916 *
1917 * However, there's a wrinkle in this: those same merge sets determine the
1918 * register pressure, due to multiple values inhabiting the same register! And
1919 * we assume that this sharing happens when spilling. Therefore we need a
1920 * three-step procedure:
1921 *
1922 * 1. Drop the original merge sets.
1923 * 2. Calculate which values *must* be merged, being careful to only use the
1924 *    interval information which isn't trashed by spilling, and forcibly merge
1925 *    them.
1926 * 3. Let ir3_merge_regs() finish the job, including recalculating the
1927 *    intervals.
1928 */
1929
1930static void
1931fixup_merge_sets(struct ir3_liveness *live, struct ir3 *ir)
1932{
1933   foreach_block (block, &ir->block_list) {
1934      foreach_instr (instr, &block->instr_list) {
1935         ra_foreach_dst (dst, instr) {
1936            dst->merge_set = NULL;
1937            dst->merge_set_offset = 0;
1938         }
1939      }
1940   }
1941
1942   foreach_block (block, &ir->block_list) {
1943      foreach_instr (instr, &block->instr_list) {
1944         if (instr->opc != OPC_META_SPLIT &&
1945             instr->opc != OPC_META_COLLECT)
1946            continue;
1947
1948         struct ir3_register *dst = instr->dsts[0];
1949         ra_foreach_src (src, instr) {
1950            if (!(src->flags & IR3_REG_KILL) &&
1951                src->def->interval_start < dst->interval_end &&
1952                dst->interval_start < src->def->interval_end) {
1953               ir3_force_merge(dst, src->def,
1954                               src->def->interval_start - dst->interval_start);
1955            }
1956         }
1957      }
1958   }
1959
1960   ir3_merge_regs(live, ir);
1961}
1962
1963void
1964ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
1965                  struct ir3_pressure *max_pressure)
1966{
1967   struct ra_spill_ctx *ctx = rzalloc(NULL, struct ra_spill_ctx);
1968   spill_ctx_init(ctx, v, live);
1969
1970   foreach_block (block, &v->ir->block_list) {
1971      handle_block(ctx, block);
1972   }
1973
1974   assert(ctx->cur_pressure.full == 0);
1975   assert(ctx->cur_pressure.half == 0);
1976   assert(ctx->cur_pressure.shared == 0);
1977
1978   *max_pressure = ctx->max_pressure;
1979   ralloc_free(ctx);
1980}
1981
1982bool
1983ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
1984          struct ir3_liveness **live,
1985          const struct ir3_pressure *limit_pressure)
1986{
1987   void *mem_ctx = ralloc_parent(*live);
1988   struct ra_spill_ctx *ctx = rzalloc(mem_ctx, struct ra_spill_ctx);
1989   spill_ctx_init(ctx, v, *live);
1990
1991   ctx->spilling = true;
1992
1993   ctx->blocks = rzalloc_array(ctx, struct ra_spill_block_state,
1994                               ctx->live->block_count);
1995   rb_tree_init(&ctx->full_live_intervals);
1996   rb_tree_init(&ctx->half_live_intervals);
1997
1998   ctx->limit_pressure = *limit_pressure;
1999   ctx->spill_slot = v->pvtmem_size;
2000
2001   add_base_reg(ctx, ir);
2002   compute_next_distance(ctx, ir);
2003
2004   unmark_dead(ir);
2005
2006   foreach_block (block, &ir->block_list) {
2007      handle_block(ctx, block);
2008   }
2009
2010   simplify_phi_nodes(ir);
2011
2012   cleanup_dead(ir);
2013
2014   ir3_create_parallel_copies(ir);
2015
2016   /* After this point, we're done mutating the IR. Liveness has been trashed,
2017    * so recalculate it. We'll need it for recalculating the merge sets.
2018    */
2019   ralloc_free(ctx->live);
2020   *live = ir3_calc_liveness(mem_ctx, ir);
2021
2022   fixup_merge_sets(*live, ir);
2023
2024   v->pvtmem_size = ctx->spill_slot;
2025   ralloc_free(ctx);
2026
2027   return true;
2028}
2029