17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2021 Valve Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "util/rb_tree.h"
257ec681f3Smrg#include "ir3_ra.h"
267ec681f3Smrg#include "ir3_shader.h"
277ec681f3Smrg
287ec681f3Smrg/*
297ec681f3Smrg * This pass does two things:
307ec681f3Smrg *
317ec681f3Smrg * 1. Calculates the maximum register pressure. To do this, we need to use the
327ec681f3Smrg *    exact same technique that RA uses for combining meta_split instructions
337ec681f3Smrg *    with their sources, so that our calculation agrees with RA.
347ec681f3Smrg * 2. Spills when the register pressure is exceeded a limit calculated by RA.
357ec681f3Smrg *    The implementation is based on "Register Spilling and Live-Range Splitting
367ec681f3Smrg *    for SSA-Form Programs" by Braun and Hack, although again care has to be
377ec681f3Smrg *    taken to handle combining split/collect instructions.
387ec681f3Smrg */
397ec681f3Smrg
407ec681f3Smrgstruct reg_or_immed {
417ec681f3Smrg   unsigned flags;
427ec681f3Smrg   union {
437ec681f3Smrg      struct ir3_register *def;
447ec681f3Smrg      uint32_t uimm;
457ec681f3Smrg      unsigned const_num;
467ec681f3Smrg   };
477ec681f3Smrg};
487ec681f3Smrg
497ec681f3Smrgstruct ra_spill_interval {
507ec681f3Smrg   struct ir3_reg_interval interval;
517ec681f3Smrg
527ec681f3Smrg   struct rb_node node;
537ec681f3Smrg   struct rb_node half_node;
547ec681f3Smrg
557ec681f3Smrg   /* The current SSA value/const/immed this source is mapped to. */
567ec681f3Smrg   struct reg_or_immed dst;
577ec681f3Smrg
587ec681f3Smrg   /* When computing use distances we use the distance relative to the start
597ec681f3Smrg    * of the block. So, for example, a value that's defined in cycle 5 of the
607ec681f3Smrg    * block and used 6 cycles later will always have a next_use_distance of 11
617ec681f3Smrg    * until we reach that use.
627ec681f3Smrg    */
637ec681f3Smrg   unsigned next_use_distance;
647ec681f3Smrg
657ec681f3Smrg   /* Whether this value was reloaded and therefore doesn't need to be
667ec681f3Smrg    * spilled again. Corresponds to the S set in the paper.
677ec681f3Smrg    */
687ec681f3Smrg   bool already_spilled;
697ec681f3Smrg
707ec681f3Smrg   /* We need to add sources early for accounting purposes, but we have to
717ec681f3Smrg    * insert the reload code for them last. Keep track of whether this interval
727ec681f3Smrg    * needs to be reloaded later.
737ec681f3Smrg    */
747ec681f3Smrg   bool needs_reload;
757ec681f3Smrg
767ec681f3Smrg   /* Keep track of whether this interval currently can't be spilled because:
777ec681f3Smrg    * - It or one of its children is a source and we're making space for
787ec681f3Smrg    *   sources.
797ec681f3Smrg    * - It is a destination and we're making space for destinations.
807ec681f3Smrg    */
817ec681f3Smrg   bool cant_spill;
827ec681f3Smrg};
837ec681f3Smrg
847ec681f3Smrgstruct ra_spill_block_state {
857ec681f3Smrg   unsigned *next_use_end;
867ec681f3Smrg   unsigned *next_use_start;
877ec681f3Smrg
887ec681f3Smrg   unsigned cycles;
897ec681f3Smrg
907ec681f3Smrg   /* Map from SSA def to reg_or_immed it is mapped to at the end of the block.
917ec681f3Smrg    * This map only contains values which we didn't spill, so it also serves as
927ec681f3Smrg    * a record of the new live-out set for this block.
937ec681f3Smrg    */
947ec681f3Smrg   struct hash_table *remap;
957ec681f3Smrg
967ec681f3Smrg   /* For blocks whose successors are visited first (i.e. loop backedges), which
977ec681f3Smrg    * values should be live at the end.
987ec681f3Smrg    */
997ec681f3Smrg   BITSET_WORD *live_out;
1007ec681f3Smrg
1017ec681f3Smrg   bool visited;
1027ec681f3Smrg};
1037ec681f3Smrg
1047ec681f3Smrgstruct ra_spill_ctx {
1057ec681f3Smrg   struct ir3_reg_ctx reg_ctx;
1067ec681f3Smrg
1077ec681f3Smrg   struct ra_spill_interval **intervals;
1087ec681f3Smrg   unsigned intervals_count;
1097ec681f3Smrg
1107ec681f3Smrg   /* rb tree of live intervals that we can spill, ordered by next-use distance.
1117ec681f3Smrg    * full_live_intervals contains the full+shared intervals in the merged_regs
1127ec681f3Smrg    * case. We use this list to determine what to spill.
1137ec681f3Smrg    */
1147ec681f3Smrg   struct rb_tree full_live_intervals;
1157ec681f3Smrg   struct rb_tree half_live_intervals;
1167ec681f3Smrg
1177ec681f3Smrg   struct ir3_pressure cur_pressure, max_pressure;
1187ec681f3Smrg
1197ec681f3Smrg   struct ir3_pressure limit_pressure;
1207ec681f3Smrg
1217ec681f3Smrg   /* When spilling, we need to reserve a register to serve as the zero'd
1227ec681f3Smrg    * "base". For simplicity we reserve a register at the beginning so that it's
1237ec681f3Smrg    * always available.
1247ec681f3Smrg    */
1257ec681f3Smrg   struct ir3_register *base_reg;
1267ec681f3Smrg
1277ec681f3Smrg   /* Current pvtmem offset in bytes. */
1287ec681f3Smrg   unsigned spill_slot;
1297ec681f3Smrg
1307ec681f3Smrg   struct ir3_liveness *live;
1317ec681f3Smrg
1327ec681f3Smrg   const struct ir3_compiler *compiler;
1337ec681f3Smrg
1347ec681f3Smrg   struct ra_spill_block_state *blocks;
1357ec681f3Smrg
1367ec681f3Smrg   bool spilling;
1377ec681f3Smrg
1387ec681f3Smrg   bool merged_regs;
1397ec681f3Smrg};
1407ec681f3Smrg
1417ec681f3Smrgstatic void
1427ec681f3Smrgadd_base_reg(struct ra_spill_ctx *ctx, struct ir3 *ir)
1437ec681f3Smrg{
1447ec681f3Smrg   struct ir3_block *start = ir3_start_block(ir);
1457ec681f3Smrg
1467ec681f3Smrg   /* We need to stick it after any meta instructions which need to be first. */
1477ec681f3Smrg   struct ir3_instruction *after = NULL;
1487ec681f3Smrg   foreach_instr (instr, &start->instr_list) {
1497ec681f3Smrg      if (instr->opc != OPC_META_INPUT &&
1507ec681f3Smrg          instr->opc != OPC_META_TEX_PREFETCH) {
1517ec681f3Smrg         after = instr;
1527ec681f3Smrg         break;
1537ec681f3Smrg      }
1547ec681f3Smrg   }
1557ec681f3Smrg
1567ec681f3Smrg   struct ir3_instruction *mov = create_immed(start, 0);
1577ec681f3Smrg
1587ec681f3Smrg   if (after)
1597ec681f3Smrg      ir3_instr_move_before(mov, after);
1607ec681f3Smrg
1617ec681f3Smrg   ctx->base_reg = mov->dsts[0];
1627ec681f3Smrg
1637ec681f3Smrg   /* We don't create an interval, etc. for the base reg, so just lower the
1647ec681f3Smrg    * register pressure limit to account for it. We assume it's always
1657ec681f3Smrg    * available for simplicity.
1667ec681f3Smrg    */
1677ec681f3Smrg   ctx->limit_pressure.full -= reg_size(ctx->base_reg);
1687ec681f3Smrg}
1697ec681f3Smrg
1707ec681f3Smrg
1717ec681f3Smrg/* Compute the number of cycles per instruction used for next-use-distance
1727ec681f3Smrg * analysis. This is just approximate, obviously.
1737ec681f3Smrg */
1747ec681f3Smrgstatic unsigned
1757ec681f3Smrginstr_cycles(struct ir3_instruction *instr)
1767ec681f3Smrg{
1777ec681f3Smrg   if (instr->opc == OPC_META_PARALLEL_COPY) {
1787ec681f3Smrg      unsigned cycles = 0;
1797ec681f3Smrg      for (unsigned i = 0; i < instr->dsts_count; i++) {
1807ec681f3Smrg         if (!instr->srcs[i]->def ||
1817ec681f3Smrg             instr->srcs[i]->def->merge_set != instr->dsts[i]->merge_set) {
1827ec681f3Smrg            cycles += reg_elems(instr->srcs[i]);
1837ec681f3Smrg         }
1847ec681f3Smrg      }
1857ec681f3Smrg
1867ec681f3Smrg      return cycles;
1877ec681f3Smrg   }
1887ec681f3Smrg
1897ec681f3Smrg   if (instr->opc == OPC_META_COLLECT) {
1907ec681f3Smrg      unsigned cycles = 0;
1917ec681f3Smrg      for (unsigned i = 0; i < instr->srcs_count; i++) {
1927ec681f3Smrg         if (!instr->srcs[i]->def ||
1937ec681f3Smrg             instr->srcs[i]->def->merge_set != instr->dsts[0]->merge_set) {
1947ec681f3Smrg            cycles++;
1957ec681f3Smrg         }
1967ec681f3Smrg      }
1977ec681f3Smrg
1987ec681f3Smrg      return cycles;
1997ec681f3Smrg   }
2007ec681f3Smrg
2017ec681f3Smrg   if (is_meta(instr))
2027ec681f3Smrg      return 0;
2037ec681f3Smrg
2047ec681f3Smrg   return 1 + instr->repeat;
2057ec681f3Smrg}
2067ec681f3Smrg
2077ec681f3Smrgstatic bool
2087ec681f3Smrgcompute_block_next_distance(struct ra_spill_ctx *ctx, struct ir3_block *block,
2097ec681f3Smrg                            unsigned *tmp_next_use)
2107ec681f3Smrg{
2117ec681f3Smrg   struct ra_spill_block_state *state = &ctx->blocks[block->index];
2127ec681f3Smrg   memcpy(tmp_next_use, state->next_use_end,
2137ec681f3Smrg          ctx->live->definitions_count * sizeof(*tmp_next_use));
2147ec681f3Smrg
2157ec681f3Smrg   unsigned cycle = state->cycles;
2167ec681f3Smrg   foreach_instr_rev (instr, &block->instr_list) {
2177ec681f3Smrg      ra_foreach_dst (dst, instr) {
2187ec681f3Smrg         dst->next_use = tmp_next_use[dst->name];
2197ec681f3Smrg      }
2207ec681f3Smrg
2217ec681f3Smrg      ra_foreach_src (src, instr) {
2227ec681f3Smrg         src->next_use = tmp_next_use[src->def->name];
2237ec681f3Smrg      }
2247ec681f3Smrg
2257ec681f3Smrg      cycle -= instr_cycles(instr);
2267ec681f3Smrg
2277ec681f3Smrg      if (instr->opc == OPC_META_PARALLEL_COPY) {
2287ec681f3Smrg         ra_foreach_src_n (src, i, instr) {
2297ec681f3Smrg            if (src->def->merge_set == instr->dsts[i]->merge_set &&
2307ec681f3Smrg                src->def->merge_set_offset == instr->dsts[i]->merge_set_offset) {
2317ec681f3Smrg               tmp_next_use[src->def->name] =
2327ec681f3Smrg                  tmp_next_use[instr->dsts[i]->name];
2337ec681f3Smrg            } else {
2347ec681f3Smrg               tmp_next_use[src->def->name] = cycle;
2357ec681f3Smrg            }
2367ec681f3Smrg         }
2377ec681f3Smrg      } else if (instr->opc != OPC_META_PHI) {
2387ec681f3Smrg         ra_foreach_src (src, instr) {
2397ec681f3Smrg            tmp_next_use[src->def->name] = cycle;
2407ec681f3Smrg         }
2417ec681f3Smrg      }
2427ec681f3Smrg
2437ec681f3Smrg      ra_foreach_dst (dst, instr) {
2447ec681f3Smrg         tmp_next_use[dst->name] = UINT_MAX;
2457ec681f3Smrg      }
2467ec681f3Smrg   }
2477ec681f3Smrg
2487ec681f3Smrg   memcpy(state->next_use_start, tmp_next_use,
2497ec681f3Smrg          ctx->live->definitions_count * sizeof(*tmp_next_use));
2507ec681f3Smrg
2517ec681f3Smrg   bool progress = false;
2527ec681f3Smrg   for (unsigned i = 0; i < block->predecessors_count; i++) {
2537ec681f3Smrg      const struct ir3_block *pred = block->predecessors[i];
2547ec681f3Smrg      struct ra_spill_block_state *pred_state = &ctx->blocks[pred->index];
2557ec681f3Smrg
2567ec681f3Smrg      /* Add a large-enough distance in front of edges exiting the loop so that
2577ec681f3Smrg       * variables that are live-through the loop but not used inside it are
2587ec681f3Smrg       * prioritized for spilling, as per the paper. This just needs to be
2597ec681f3Smrg       * larger than the longest path through the loop.
2607ec681f3Smrg       */
2617ec681f3Smrg      bool loop_exit = pred->loop_depth < block->loop_depth;
2627ec681f3Smrg      unsigned block_distance = pred_state->cycles + (loop_exit ? 100000 : 0);
2637ec681f3Smrg
2647ec681f3Smrg      for (unsigned j = 0; j < ctx->live->definitions_count; j++) {
2657ec681f3Smrg         if (state->next_use_start[j] < UINT_MAX &&
2667ec681f3Smrg             state->next_use_start[j] + block_distance <
2677ec681f3Smrg             pred_state->next_use_end[j]) {
2687ec681f3Smrg            pred_state->next_use_end[j] = state->next_use_start[j] +
2697ec681f3Smrg               block_distance;
2707ec681f3Smrg            progress = true;
2717ec681f3Smrg         }
2727ec681f3Smrg      }
2737ec681f3Smrg
2747ec681f3Smrg      foreach_instr (phi, &block->instr_list) {
2757ec681f3Smrg         if (phi->opc != OPC_META_PHI)
2767ec681f3Smrg            break;
2777ec681f3Smrg         if (!phi->srcs[i]->def)
2787ec681f3Smrg            continue;
2797ec681f3Smrg         unsigned src = phi->srcs[i]->def->name;
2807ec681f3Smrg         if (phi->dsts[0]->next_use < UINT_MAX &&
2817ec681f3Smrg             phi->dsts[0]->next_use + block_distance <
2827ec681f3Smrg             pred_state->next_use_end[src]) {
2837ec681f3Smrg            pred_state->next_use_end[src] = phi->dsts[0]->next_use +
2847ec681f3Smrg               block_distance;
2857ec681f3Smrg            progress = true;
2867ec681f3Smrg         }
2877ec681f3Smrg      }
2887ec681f3Smrg   }
2897ec681f3Smrg
2907ec681f3Smrg   return progress;
2917ec681f3Smrg}
2927ec681f3Smrg
2937ec681f3Smrgstatic void
2947ec681f3Smrgcompute_next_distance(struct ra_spill_ctx *ctx, struct ir3 *ir)
2957ec681f3Smrg{
2967ec681f3Smrg   for (unsigned i = 0; i < ctx->live->block_count; i++) {
2977ec681f3Smrg      ctx->blocks[i].next_use_start =
2987ec681f3Smrg         ralloc_array(ctx, unsigned, ctx->live->definitions_count);
2997ec681f3Smrg      ctx->blocks[i].next_use_end =
3007ec681f3Smrg         ralloc_array(ctx, unsigned, ctx->live->definitions_count);
3017ec681f3Smrg
3027ec681f3Smrg      for (unsigned j = 0; j < ctx->live->definitions_count; j++) {
3037ec681f3Smrg         ctx->blocks[i].next_use_start[j] = UINT_MAX;
3047ec681f3Smrg         ctx->blocks[i].next_use_end[j] = UINT_MAX;
3057ec681f3Smrg      }
3067ec681f3Smrg   }
3077ec681f3Smrg
3087ec681f3Smrg   foreach_block (block, &ir->block_list) {
3097ec681f3Smrg      struct ra_spill_block_state *state = &ctx->blocks[block->index];
3107ec681f3Smrg      state->cycles = 0;
3117ec681f3Smrg      foreach_instr (instr, &block->instr_list) {
3127ec681f3Smrg         state->cycles += instr_cycles(instr);
3137ec681f3Smrg         foreach_dst (dst, instr) {
3147ec681f3Smrg            dst->spill_slot = ~0;
3157ec681f3Smrg         }
3167ec681f3Smrg      }
3177ec681f3Smrg   }
3187ec681f3Smrg
3197ec681f3Smrg   unsigned *tmp_next_use =
3207ec681f3Smrg      ralloc_array(ctx, unsigned, ctx->live->definitions_count);
3217ec681f3Smrg
3227ec681f3Smrg   bool progress = true;
3237ec681f3Smrg   while (progress) {
3247ec681f3Smrg      progress = false;
3257ec681f3Smrg      foreach_block_rev (block, &ir->block_list) {
3267ec681f3Smrg         progress |= compute_block_next_distance(ctx, block, tmp_next_use);
3277ec681f3Smrg      }
3287ec681f3Smrg   }
3297ec681f3Smrg}
3307ec681f3Smrg
3317ec681f3Smrgstatic void
3327ec681f3Smrgra_spill_interval_init(struct ra_spill_interval *interval,
3337ec681f3Smrg                       struct ir3_register *reg)
3347ec681f3Smrg{
3357ec681f3Smrg   ir3_reg_interval_init(&interval->interval, reg);
3367ec681f3Smrg   interval->dst.flags = reg->flags;
3377ec681f3Smrg   interval->dst.def = reg;
3387ec681f3Smrg   interval->already_spilled = false;
3397ec681f3Smrg   interval->needs_reload = false;
3407ec681f3Smrg   interval->cant_spill = false;
3417ec681f3Smrg}
3427ec681f3Smrg
3437ec681f3Smrgstatic struct ra_spill_interval *
3447ec681f3Smrgir3_reg_interval_to_interval(struct ir3_reg_interval *interval)
3457ec681f3Smrg{
3467ec681f3Smrg   return rb_node_data(struct ra_spill_interval, interval, interval);
3477ec681f3Smrg}
3487ec681f3Smrg
3497ec681f3Smrgstatic struct ra_spill_interval *
3507ec681f3Smrgra_spill_interval_root(struct ra_spill_interval *interval)
3517ec681f3Smrg{
3527ec681f3Smrg   struct ir3_reg_interval *ir3_interval = &interval->interval;
3537ec681f3Smrg   while (ir3_interval->parent)
3547ec681f3Smrg      ir3_interval = ir3_interval->parent;
3557ec681f3Smrg   return ir3_reg_interval_to_interval(ir3_interval);
3567ec681f3Smrg}
3577ec681f3Smrg
3587ec681f3Smrgstatic struct ra_spill_ctx *
3597ec681f3Smrgir3_reg_ctx_to_ctx(struct ir3_reg_ctx *ctx)
3607ec681f3Smrg{
3617ec681f3Smrg   return rb_node_data(struct ra_spill_ctx, ctx, reg_ctx);
3627ec681f3Smrg}
3637ec681f3Smrg
3647ec681f3Smrgstatic int
3657ec681f3Smrgra_spill_interval_cmp(const struct rb_node *_a, const struct rb_node *_b)
3667ec681f3Smrg{
3677ec681f3Smrg   const struct ra_spill_interval *a =
3687ec681f3Smrg      rb_node_data(const struct ra_spill_interval, _a, node);
3697ec681f3Smrg   const struct ra_spill_interval *b =
3707ec681f3Smrg      rb_node_data(const struct ra_spill_interval, _b, node);
3717ec681f3Smrg   return a->next_use_distance - b->next_use_distance;
3727ec681f3Smrg}
3737ec681f3Smrg
3747ec681f3Smrgstatic int
3757ec681f3Smrgra_spill_interval_half_cmp(const struct rb_node *_a, const struct rb_node *_b)
3767ec681f3Smrg{
3777ec681f3Smrg   const struct ra_spill_interval *a =
3787ec681f3Smrg      rb_node_data(const struct ra_spill_interval, _a, half_node);
3797ec681f3Smrg   const struct ra_spill_interval *b =
3807ec681f3Smrg      rb_node_data(const struct ra_spill_interval, _b, half_node);
3817ec681f3Smrg   return a->next_use_distance - b->next_use_distance;
3827ec681f3Smrg}
3837ec681f3Smrg
3847ec681f3Smrgstatic void
3857ec681f3Smrginterval_add(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval)
3867ec681f3Smrg{
3877ec681f3Smrg   struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
3887ec681f3Smrg   struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
3897ec681f3Smrg
3907ec681f3Smrg   unsigned size = reg_size(interval->interval.reg);
3917ec681f3Smrg   if (interval->interval.reg->flags & IR3_REG_SHARED) {
3927ec681f3Smrg      ctx->cur_pressure.shared += size;
3937ec681f3Smrg   } else {
3947ec681f3Smrg      if (interval->interval.reg->flags & IR3_REG_HALF) {
3957ec681f3Smrg         ctx->cur_pressure.half += size;
3967ec681f3Smrg         if (ctx->spilling) {
3977ec681f3Smrg            rb_tree_insert(&ctx->half_live_intervals, &interval->half_node,
3987ec681f3Smrg                           ra_spill_interval_half_cmp);
3997ec681f3Smrg         }
4007ec681f3Smrg      }
4017ec681f3Smrg      if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) {
4027ec681f3Smrg         ctx->cur_pressure.full += size;
4037ec681f3Smrg         if (ctx->spilling) {
4047ec681f3Smrg            rb_tree_insert(&ctx->full_live_intervals, &interval->node,
4057ec681f3Smrg                           ra_spill_interval_cmp);
4067ec681f3Smrg         }
4077ec681f3Smrg      }
4087ec681f3Smrg   }
4097ec681f3Smrg}
4107ec681f3Smrg
4117ec681f3Smrgstatic void
4127ec681f3Smrginterval_delete(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval)
4137ec681f3Smrg{
4147ec681f3Smrg   struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
4157ec681f3Smrg   struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
4167ec681f3Smrg
4177ec681f3Smrg   unsigned size = reg_size(interval->interval.reg);
4187ec681f3Smrg   if (interval->interval.reg->flags & IR3_REG_SHARED) {
4197ec681f3Smrg      ctx->cur_pressure.shared -= size;
4207ec681f3Smrg   } else {
4217ec681f3Smrg      if (interval->interval.reg->flags & IR3_REG_HALF) {
4227ec681f3Smrg         ctx->cur_pressure.half -= size;
4237ec681f3Smrg         if (ctx->spilling) {
4247ec681f3Smrg            rb_tree_remove(&ctx->half_live_intervals, &interval->half_node);
4257ec681f3Smrg         }
4267ec681f3Smrg      }
4277ec681f3Smrg      if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) {
4287ec681f3Smrg         ctx->cur_pressure.full -= size;
4297ec681f3Smrg         if (ctx->spilling) {
4307ec681f3Smrg            rb_tree_remove(&ctx->full_live_intervals, &interval->node);
4317ec681f3Smrg         }
4327ec681f3Smrg      }
4337ec681f3Smrg   }
4347ec681f3Smrg}
4357ec681f3Smrg
4367ec681f3Smrgstatic void
4377ec681f3Smrginterval_readd(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_parent,
4387ec681f3Smrg               struct ir3_reg_interval *_child)
4397ec681f3Smrg{
4407ec681f3Smrg   interval_add(_ctx, _child);
4417ec681f3Smrg}
4427ec681f3Smrg
4437ec681f3Smrgstatic void
4447ec681f3Smrgspill_ctx_init(struct ra_spill_ctx *ctx, struct ir3_shader_variant *v,
4457ec681f3Smrg               struct ir3_liveness *live)
4467ec681f3Smrg{
4477ec681f3Smrg   ctx->live = live;
4487ec681f3Smrg   ctx->intervals = ralloc_array(ctx, struct ra_spill_interval *,
4497ec681f3Smrg                                 ctx->live->definitions_count);
4507ec681f3Smrg   struct ra_spill_interval *intervals =
4517ec681f3Smrg      rzalloc_array(ctx, struct ra_spill_interval,
4527ec681f3Smrg                    ctx->live->definitions_count);
4537ec681f3Smrg   for (unsigned i = 0; i < ctx->live->definitions_count; i++)
4547ec681f3Smrg      ctx->intervals[i] = &intervals[i];
4557ec681f3Smrg
4567ec681f3Smrg   ctx->intervals_count = ctx->live->definitions_count;
4577ec681f3Smrg   ctx->compiler = v->shader->compiler;
4587ec681f3Smrg   ctx->merged_regs = v->mergedregs;
4597ec681f3Smrg
4607ec681f3Smrg   rb_tree_init(&ctx->reg_ctx.intervals);
4617ec681f3Smrg   ctx->reg_ctx.interval_add = interval_add;
4627ec681f3Smrg   ctx->reg_ctx.interval_delete = interval_delete;
4637ec681f3Smrg   ctx->reg_ctx.interval_readd = interval_readd;
4647ec681f3Smrg}
4657ec681f3Smrg
4667ec681f3Smrgstatic void
4677ec681f3Smrgra_spill_ctx_insert(struct ra_spill_ctx *ctx,
4687ec681f3Smrg                    struct ra_spill_interval *interval)
4697ec681f3Smrg{
4707ec681f3Smrg   ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval);
4717ec681f3Smrg}
4727ec681f3Smrg
4737ec681f3Smrgstatic void
4747ec681f3Smrgra_spill_ctx_remove(struct ra_spill_ctx *ctx,
4757ec681f3Smrg                    struct ra_spill_interval *interval)
4767ec681f3Smrg{
4777ec681f3Smrg   ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval);
4787ec681f3Smrg}
4797ec681f3Smrg
4807ec681f3Smrgstatic void
4817ec681f3Smrginit_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
4827ec681f3Smrg{
4837ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[dst->name];
4847ec681f3Smrg   ra_spill_interval_init(interval, dst);
4857ec681f3Smrg   if (ctx->spilling)
4867ec681f3Smrg      interval->next_use_distance = dst->next_use;
4877ec681f3Smrg}
4887ec681f3Smrg
4897ec681f3Smrgstatic void
4907ec681f3Smrginsert_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
4917ec681f3Smrg{
4927ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[dst->name];
4937ec681f3Smrg   if (interval->interval.inserted)
4947ec681f3Smrg      return;
4957ec681f3Smrg
4967ec681f3Smrg   ra_spill_ctx_insert(ctx, interval);
4977ec681f3Smrg   interval->cant_spill = true;
4987ec681f3Smrg
4997ec681f3Smrg   /* For precolored inputs, make sure we leave enough registers to allow for
5007ec681f3Smrg    * holes in the inputs. It can happen that the binning shader has a lower
5017ec681f3Smrg    * register pressure than the main shader, but the main shader decided to
5027ec681f3Smrg    * add holes between the inputs which means that the binning shader has a
5037ec681f3Smrg    * higher register demand.
5047ec681f3Smrg    */
5057ec681f3Smrg   if (dst->instr->opc == OPC_META_INPUT && dst->num != INVALID_REG) {
5067ec681f3Smrg      physreg_t physreg = ra_reg_get_physreg(dst);
5077ec681f3Smrg      physreg_t max = physreg + reg_size(dst);
5087ec681f3Smrg
5097ec681f3Smrg      if (interval->interval.reg->flags & IR3_REG_SHARED)
5107ec681f3Smrg         ctx->max_pressure.shared = MAX2(ctx->max_pressure.shared, max);
5117ec681f3Smrg      else if (interval->interval.reg->flags & IR3_REG_HALF)
5127ec681f3Smrg         ctx->max_pressure.half = MAX2(ctx->max_pressure.half, max);
5137ec681f3Smrg      else
5147ec681f3Smrg         ctx->max_pressure.full = MAX2(ctx->max_pressure.full, max);
5157ec681f3Smrg   }
5167ec681f3Smrg}
5177ec681f3Smrg
5187ec681f3Smrgstatic void
5197ec681f3Smrginsert_src(struct ra_spill_ctx *ctx, struct ir3_register *src)
5207ec681f3Smrg{
5217ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
5227ec681f3Smrg
5237ec681f3Smrg   if (!interval->interval.inserted) {
5247ec681f3Smrg      ra_spill_ctx_insert(ctx, interval);
5257ec681f3Smrg      interval->needs_reload = true;
5267ec681f3Smrg      interval->already_spilled = true;
5277ec681f3Smrg   }
5287ec681f3Smrg
5297ec681f3Smrg   ra_spill_interval_root(interval)->cant_spill = true;
5307ec681f3Smrg
5317ec681f3Smrg}
5327ec681f3Smrg
5337ec681f3Smrgstatic void
5347ec681f3Smrgremove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
5357ec681f3Smrg                 struct ir3_register *src)
5367ec681f3Smrg{
5377ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
5387ec681f3Smrg
5397ec681f3Smrg   if (!interval->interval.inserted || interval->interval.parent ||
5407ec681f3Smrg       !rb_tree_is_empty(&interval->interval.children))
5417ec681f3Smrg      return;
5427ec681f3Smrg
5437ec681f3Smrg   ra_spill_ctx_remove(ctx, interval);
5447ec681f3Smrg}
5457ec681f3Smrg
5467ec681f3Smrgstatic void
5477ec681f3Smrgremove_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
5487ec681f3Smrg           struct ir3_register *src)
5497ec681f3Smrg{
5507ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
5517ec681f3Smrg
5527ec681f3Smrg   if (!interval->interval.inserted)
5537ec681f3Smrg      return;
5547ec681f3Smrg
5557ec681f3Smrg   ra_spill_ctx_remove(ctx, interval);
5567ec681f3Smrg}
5577ec681f3Smrg
5587ec681f3Smrgstatic void
5597ec681f3Smrgfinish_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
5607ec681f3Smrg{
5617ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[dst->name];
5627ec681f3Smrg   interval->cant_spill = false;
5637ec681f3Smrg}
5647ec681f3Smrg
5657ec681f3Smrgstatic void
5667ec681f3Smrgremove_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
5677ec681f3Smrg{
5687ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[dst->name];
5697ec681f3Smrg
5707ec681f3Smrg   if (!interval->interval.inserted)
5717ec681f3Smrg      return;
5727ec681f3Smrg
5737ec681f3Smrg   ra_spill_ctx_remove(ctx, interval);
5747ec681f3Smrg}
5757ec681f3Smrg
5767ec681f3Smrgstatic void
5777ec681f3Smrgupdate_src_next_use(struct ra_spill_ctx *ctx, struct ir3_register *src)
5787ec681f3Smrg{
5797ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
5807ec681f3Smrg
5817ec681f3Smrg   assert(interval->interval.inserted);
5827ec681f3Smrg
5837ec681f3Smrg   interval->next_use_distance = src->next_use;
5847ec681f3Smrg
5857ec681f3Smrg   /* If this node is inserted in one of the trees, then it needs to be resorted
5867ec681f3Smrg    * as its key has changed.
5877ec681f3Smrg    */
5887ec681f3Smrg   if (!interval->interval.parent && !(src->flags & IR3_REG_SHARED)) {
5897ec681f3Smrg      if (src->flags & IR3_REG_HALF) {
5907ec681f3Smrg         rb_tree_remove(&ctx->half_live_intervals, &interval->half_node);
5917ec681f3Smrg         rb_tree_insert(&ctx->half_live_intervals, &interval->half_node,
5927ec681f3Smrg                        ra_spill_interval_half_cmp);
5937ec681f3Smrg      }
5947ec681f3Smrg      if (ctx->merged_regs || !(src->flags & IR3_REG_HALF)) {
5957ec681f3Smrg         rb_tree_remove(&ctx->full_live_intervals, &interval->node);
5967ec681f3Smrg         rb_tree_insert(&ctx->full_live_intervals, &interval->node,
5977ec681f3Smrg                        ra_spill_interval_cmp);
5987ec681f3Smrg      }
5997ec681f3Smrg   }
6007ec681f3Smrg}
6017ec681f3Smrg
6027ec681f3Smrgstatic unsigned
6037ec681f3Smrgget_spill_slot(struct ra_spill_ctx *ctx, struct ir3_register *reg)
6047ec681f3Smrg{
6057ec681f3Smrg   if (reg->merge_set) {
6067ec681f3Smrg      if (reg->merge_set->spill_slot == ~0) {
6077ec681f3Smrg         reg->merge_set->spill_slot = ALIGN_POT(ctx->spill_slot,
6087ec681f3Smrg                                                reg->merge_set->alignment);
6097ec681f3Smrg         ctx->spill_slot = reg->merge_set->spill_slot + reg->merge_set->size * 2;
6107ec681f3Smrg      }
6117ec681f3Smrg      return reg->merge_set->spill_slot + reg->merge_set_offset * 2;
6127ec681f3Smrg   } else {
6137ec681f3Smrg      if (reg->spill_slot == ~0) {
6147ec681f3Smrg         reg->spill_slot = ALIGN_POT(ctx->spill_slot, reg_elem_size(reg));
6157ec681f3Smrg         ctx->spill_slot = reg->spill_slot + reg_size(reg) * 2;
6167ec681f3Smrg      }
6177ec681f3Smrg      return reg->spill_slot;
6187ec681f3Smrg   }
6197ec681f3Smrg}
6207ec681f3Smrg
6217ec681f3Smrgstatic void
6227ec681f3Smrgset_src_val(struct ir3_register *src, const struct reg_or_immed *val)
6237ec681f3Smrg{
6247ec681f3Smrg   if (val->flags & IR3_REG_IMMED) {
6257ec681f3Smrg      src->flags = IR3_REG_IMMED | (val->flags & IR3_REG_HALF);
6267ec681f3Smrg      src->uim_val = val->uimm;
6277ec681f3Smrg      src->def = NULL;
6287ec681f3Smrg   } else if (val->flags & IR3_REG_CONST) {
6297ec681f3Smrg      src->flags = IR3_REG_CONST | (val->flags & IR3_REG_HALF);
6307ec681f3Smrg      src->num = val->const_num;
6317ec681f3Smrg      src->def = NULL;
6327ec681f3Smrg   } else {
6337ec681f3Smrg      src->def = val->def;
6347ec681f3Smrg   }
6357ec681f3Smrg}
6367ec681f3Smrg
6377ec681f3Smrgstatic struct ir3_register *
6387ec681f3Smrgmaterialize_pcopy_src(const struct reg_or_immed *src,
6397ec681f3Smrg                      struct ir3_instruction *instr,
6407ec681f3Smrg                      struct ir3_block *block)
6417ec681f3Smrg{
6427ec681f3Smrg   struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
6437ec681f3Smrg   struct ir3_register *dst = __ssa_dst(mov);
6447ec681f3Smrg   dst->flags |= src->flags & IR3_REG_HALF;
6457ec681f3Smrg   struct ir3_register *mov_src = ir3_src_create(mov, INVALID_REG, src->flags);
6467ec681f3Smrg   set_src_val(mov_src, src);
6477ec681f3Smrg   mov->cat1.src_type = mov->cat1.dst_type =
6487ec681f3Smrg      (src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
6497ec681f3Smrg
6507ec681f3Smrg   if (instr)
6517ec681f3Smrg      ir3_instr_move_before(mov, instr);
6527ec681f3Smrg   return dst;
6537ec681f3Smrg}
6547ec681f3Smrg
6557ec681f3Smrgstatic void
6567ec681f3Smrgspill(struct ra_spill_ctx *ctx, const struct reg_or_immed *val,
6577ec681f3Smrg      unsigned spill_slot, struct ir3_instruction *instr, struct ir3_block *block)
6587ec681f3Smrg{
6597ec681f3Smrg   struct ir3_register *reg;
6607ec681f3Smrg
6617ec681f3Smrg   /* If spilling an immed/const pcopy src, we need to actually materialize it
6627ec681f3Smrg    * first with a mov.
6637ec681f3Smrg    */
6647ec681f3Smrg   if (val->flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
6657ec681f3Smrg      reg = materialize_pcopy_src(val, instr, block);
6667ec681f3Smrg   } else {
6677ec681f3Smrg      reg = val->def;
6687ec681f3Smrg   }
6697ec681f3Smrg
6707ec681f3Smrg   d("spilling ssa_%u:%u to %u", reg->instr->serialno, reg->name,
6717ec681f3Smrg     spill_slot);
6727ec681f3Smrg
6737ec681f3Smrg   unsigned elems = reg_elems(reg);
6747ec681f3Smrg   struct ir3_instruction *spill =
6757ec681f3Smrg      ir3_instr_create(block, OPC_SPILL_MACRO, 0, 3);
6767ec681f3Smrg   ir3_src_create(spill, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg;
6777ec681f3Smrg   unsigned src_flags = reg->flags & (IR3_REG_HALF | IR3_REG_IMMED |
6787ec681f3Smrg                                      IR3_REG_CONST | IR3_REG_SSA |
6797ec681f3Smrg                                      IR3_REG_ARRAY);
6807ec681f3Smrg   struct ir3_register *src = ir3_src_create(spill, INVALID_REG, src_flags);
6817ec681f3Smrg   ir3_src_create(spill, INVALID_REG, IR3_REG_IMMED)->uim_val = elems;
6827ec681f3Smrg   spill->cat6.dst_offset = spill_slot;
6837ec681f3Smrg   spill->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
6847ec681f3Smrg
6857ec681f3Smrg   src->def = reg;
6867ec681f3Smrg   if (reg->flags & IR3_REG_ARRAY) {
6877ec681f3Smrg      src->size = reg->size;
6887ec681f3Smrg      src->array.id = reg->array.id;
6897ec681f3Smrg      src->array.offset = 0;
6907ec681f3Smrg   } else {
6917ec681f3Smrg      src->wrmask = reg->wrmask;
6927ec681f3Smrg   }
6937ec681f3Smrg
6947ec681f3Smrg   if (instr)
6957ec681f3Smrg      ir3_instr_move_before(spill, instr);
6967ec681f3Smrg}
6977ec681f3Smrg
6987ec681f3Smrgstatic void
6997ec681f3Smrgspill_interval(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval,
7007ec681f3Smrg               struct ir3_instruction *instr, struct ir3_block *block)
7017ec681f3Smrg{
7027ec681f3Smrg   spill(ctx, &interval->dst, get_spill_slot(ctx, interval->interval.reg),
7037ec681f3Smrg         instr, block);
7047ec681f3Smrg}
7057ec681f3Smrg
7067ec681f3Smrg/* This is similar to "limit" in the paper. */
7077ec681f3Smrgstatic void
7087ec681f3Smrglimit(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
7097ec681f3Smrg{
7107ec681f3Smrg   if (ctx->cur_pressure.half > ctx->limit_pressure.half) {
7117ec681f3Smrg      d("cur half pressure %u exceeds %u", ctx->cur_pressure.half,
7127ec681f3Smrg        ctx->limit_pressure.half);
7137ec681f3Smrg      rb_tree_foreach_safe (struct ra_spill_interval, interval,
7147ec681f3Smrg                            &ctx->half_live_intervals, half_node) {
7157ec681f3Smrg         d("trying ssa_%u:%u", interval->interval.reg->instr->serialno,
7167ec681f3Smrg           interval->interval.reg->name);
7177ec681f3Smrg         if (!interval->cant_spill) {
7187ec681f3Smrg            if (!interval->already_spilled)
7197ec681f3Smrg               spill_interval(ctx, interval, instr, instr->block);
7207ec681f3Smrg            ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
7217ec681f3Smrg            if (ctx->cur_pressure.half <= ctx->limit_pressure.half)
7227ec681f3Smrg               break;
7237ec681f3Smrg         }
7247ec681f3Smrg      }
7257ec681f3Smrg
7267ec681f3Smrg      assert(ctx->cur_pressure.half <= ctx->limit_pressure.half);
7277ec681f3Smrg   }
7287ec681f3Smrg
7297ec681f3Smrg   if (ctx->cur_pressure.full > ctx->limit_pressure.full) {
7307ec681f3Smrg      d("cur full pressure %u exceeds %u", ctx->cur_pressure.full,
7317ec681f3Smrg        ctx->limit_pressure.full);
7327ec681f3Smrg      rb_tree_foreach_safe (struct ra_spill_interval, interval,
7337ec681f3Smrg                            &ctx->full_live_intervals, node) {
7347ec681f3Smrg         d("trying ssa_%u:%u", interval->interval.reg->instr->serialno,
7357ec681f3Smrg           interval->interval.reg->name);
7367ec681f3Smrg         if (!interval->cant_spill) {
7377ec681f3Smrg            if (!interval->already_spilled)
7387ec681f3Smrg               spill_interval(ctx, interval, instr, instr->block);
7397ec681f3Smrg            ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
7407ec681f3Smrg            if (ctx->cur_pressure.full <= ctx->limit_pressure.full)
7417ec681f3Smrg               break;
7427ec681f3Smrg         } else {
7437ec681f3Smrg            d("can't spill");
7447ec681f3Smrg         }
7457ec681f3Smrg      }
7467ec681f3Smrg
7477ec681f3Smrg      assert(ctx->cur_pressure.full <= ctx->limit_pressure.full);
7487ec681f3Smrg   }
7497ec681f3Smrg}
7507ec681f3Smrg
7517ec681f3Smrg/* There's a corner case where we reload a value which has overlapping live
7527ec681f3Smrg * values already reloaded, either because it's the child of some other interval
7537ec681f3Smrg * that was already reloaded or some of its children have already been
7547ec681f3Smrg * reloaded. Because RA only expects overlapping source/dest intervals for meta
7557ec681f3Smrg * instructions (split/collect), and we don't want to add register pressure by
7567ec681f3Smrg * creating an entirely separate value, we need to add splits and collects to
7577ec681f3Smrg * deal with this case. These splits/collects have to also have correct merge
7587ec681f3Smrg * set information, so that it doesn't result in any actual code or register
7597ec681f3Smrg * pressure in practice.
7607ec681f3Smrg */
7617ec681f3Smrg
7627ec681f3Smrgstatic void
7637ec681f3Smrgadd_to_merge_set(struct ir3_merge_set *set, struct ir3_register *def,
7647ec681f3Smrg                 unsigned offset)
7657ec681f3Smrg{
7667ec681f3Smrg   def->merge_set = set;
7677ec681f3Smrg   def->merge_set_offset = offset;
7687ec681f3Smrg   def->interval_start = set->interval_start + offset;
7697ec681f3Smrg   def->interval_end = set->interval_start + offset + reg_size(def);
7707ec681f3Smrg}
7717ec681f3Smrg
7727ec681f3Smrgstatic struct ir3_register *
7737ec681f3Smrgsplit(struct ir3_register *def, unsigned offset,
7747ec681f3Smrg      struct ir3_instruction *after, struct ir3_block *block)
7757ec681f3Smrg{
7767ec681f3Smrg   if (reg_elems(def) == 1) {
7777ec681f3Smrg      assert(offset == 0);
7787ec681f3Smrg      return def;
7797ec681f3Smrg   }
7807ec681f3Smrg
7817ec681f3Smrg   assert(!(def->flags & IR3_REG_ARRAY));
7827ec681f3Smrg   assert(def->merge_set);
7837ec681f3Smrg   struct ir3_instruction *split =
7847ec681f3Smrg      ir3_instr_create(after->block, OPC_META_SPLIT, 1, 1);
7857ec681f3Smrg   struct ir3_register *dst = __ssa_dst(split);
7867ec681f3Smrg   dst->flags |= def->flags & IR3_REG_HALF;
7877ec681f3Smrg   struct ir3_register *src = ir3_src_create(split, INVALID_REG, def->flags);
7887ec681f3Smrg   src->wrmask = def->wrmask;
7897ec681f3Smrg   src->def = def;
7907ec681f3Smrg   add_to_merge_set(def->merge_set, dst,
7917ec681f3Smrg                    def->merge_set_offset + offset * reg_elem_size(def));
7927ec681f3Smrg   if (after)
7937ec681f3Smrg      ir3_instr_move_before(split, after);
7947ec681f3Smrg   return dst;
7957ec681f3Smrg}
7967ec681f3Smrg
7977ec681f3Smrgstatic struct ir3_register *
7987ec681f3Smrgextract(struct ir3_register *parent_def, unsigned offset, unsigned elems,
7997ec681f3Smrg        struct ir3_instruction *after, struct ir3_block *block)
8007ec681f3Smrg{
8017ec681f3Smrg   if (offset == 0 && elems == reg_elems(parent_def))
8027ec681f3Smrg      return parent_def;
8037ec681f3Smrg
8047ec681f3Smrg   struct ir3_instruction *collect =
8057ec681f3Smrg      ir3_instr_create(after->block, OPC_META_COLLECT, 1, elems);
8067ec681f3Smrg   struct ir3_register *dst = __ssa_dst(collect);
8077ec681f3Smrg   dst->flags |= parent_def->flags & IR3_REG_HALF;
8087ec681f3Smrg   dst->wrmask = MASK(elems);
8097ec681f3Smrg   add_to_merge_set(parent_def->merge_set, dst, parent_def->merge_set_offset);
8107ec681f3Smrg
8117ec681f3Smrg   for (unsigned i = 0; i < elems; i++) {
8127ec681f3Smrg      ir3_src_create(collect, INVALID_REG, parent_def->flags)->def =
8137ec681f3Smrg         split(parent_def, offset + i, after, block);
8147ec681f3Smrg   }
8157ec681f3Smrg
8167ec681f3Smrg   if (after)
8177ec681f3Smrg      ir3_instr_move_before(collect, after);
8187ec681f3Smrg   return dst;
8197ec681f3Smrg}
8207ec681f3Smrg
8217ec681f3Smrgstatic struct ir3_register *
8227ec681f3Smrgreload(struct ra_spill_ctx *ctx, struct ir3_register *reg,
8237ec681f3Smrg       struct ir3_instruction *after, struct ir3_block *block)
8247ec681f3Smrg{
8257ec681f3Smrg   unsigned spill_slot = get_spill_slot(ctx, reg);
8267ec681f3Smrg
8277ec681f3Smrg   d("reloading ssa_%u:%u from %u", reg->instr->serialno, reg->name,
8287ec681f3Smrg     spill_slot);
8297ec681f3Smrg
8307ec681f3Smrg   unsigned elems = reg_elems(reg);
8317ec681f3Smrg   struct ir3_instruction *reload =
8327ec681f3Smrg      ir3_instr_create(block, OPC_RELOAD_MACRO, 1, 3);
8337ec681f3Smrg   struct ir3_register *dst = __ssa_dst(reload);
8347ec681f3Smrg   dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
8357ec681f3Smrg   ir3_src_create(reload, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg;
8367ec681f3Smrg   struct ir3_register *offset_reg =
8377ec681f3Smrg      ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED);
8387ec681f3Smrg   offset_reg->uim_val = spill_slot;
8397ec681f3Smrg   ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED)->uim_val = elems;
8407ec681f3Smrg   reload->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
8417ec681f3Smrg
8427ec681f3Smrg   if (reg->flags & IR3_REG_ARRAY) {
8437ec681f3Smrg      dst->array.offset = 0;
8447ec681f3Smrg      dst->array.id = reg->array.id;
8457ec681f3Smrg      dst->size = reg->size;
8467ec681f3Smrg   } else {
8477ec681f3Smrg      dst->wrmask = MASK(elems);
8487ec681f3Smrg   }
8497ec681f3Smrg
8507ec681f3Smrg   dst->merge_set = reg->merge_set;
8517ec681f3Smrg   dst->merge_set_offset = reg->merge_set_offset;
8527ec681f3Smrg   dst->interval_start = reg->interval_start;
8537ec681f3Smrg   dst->interval_end = reg->interval_end;
8547ec681f3Smrg
8557ec681f3Smrg   if (after)
8567ec681f3Smrg      ir3_instr_move_before(reload, after);
8577ec681f3Smrg
8587ec681f3Smrg   return dst;
8597ec681f3Smrg}
8607ec681f3Smrg
8617ec681f3Smrgstatic void
8627ec681f3Smrgrewrite_src_interval(struct ra_spill_ctx *ctx,
8637ec681f3Smrg                    struct ra_spill_interval *interval,
8647ec681f3Smrg                    struct ir3_register *def,
8657ec681f3Smrg                    struct ir3_instruction *instr,
8667ec681f3Smrg                    struct ir3_block *block)
8677ec681f3Smrg{
8687ec681f3Smrg   interval->dst.flags = def->flags;
8697ec681f3Smrg   interval->dst.def = def;
8707ec681f3Smrg   interval->needs_reload = false;
8717ec681f3Smrg
8727ec681f3Smrg   rb_tree_foreach (struct ra_spill_interval, child,
8737ec681f3Smrg                    &interval->interval.children, interval.node) {
8747ec681f3Smrg      struct ir3_register *child_reg = child->interval.reg;
8757ec681f3Smrg      struct ir3_register *child_def =
8767ec681f3Smrg         extract(def, (child_reg->interval_start -
8777ec681f3Smrg                       interval->interval.reg->interval_start) / reg_elem_size(def),
8787ec681f3Smrg                 reg_elems(child_reg), instr, block);
8797ec681f3Smrg      rewrite_src_interval(ctx, child, child_def, instr, block);
8807ec681f3Smrg   }
8817ec681f3Smrg}
8827ec681f3Smrg
8837ec681f3Smrgstatic void
8847ec681f3Smrgreload_def(struct ra_spill_ctx *ctx, struct ir3_register *def,
8857ec681f3Smrg           struct ir3_instruction *instr, struct ir3_block *block)
8867ec681f3Smrg{
8877ec681f3Smrg   unsigned elems = reg_elems(def);
8887ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[def->name];
8897ec681f3Smrg
8907ec681f3Smrg   struct ir3_reg_interval *ir3_parent = interval->interval.parent;
8917ec681f3Smrg
8927ec681f3Smrg   if (ir3_parent) {
8937ec681f3Smrg      struct ra_spill_interval *parent =
8947ec681f3Smrg         ir3_reg_interval_to_interval(ir3_parent);
8957ec681f3Smrg      if (!parent->needs_reload) {
8967ec681f3Smrg         interval->dst.flags = def->flags;
8977ec681f3Smrg         interval->dst.def = extract(
8987ec681f3Smrg            parent->dst.def, (def->interval_start - parent->dst.def->interval_start) /
8997ec681f3Smrg            reg_elem_size(def), elems, instr, block);
9007ec681f3Smrg         return;
9017ec681f3Smrg      }
9027ec681f3Smrg   }
9037ec681f3Smrg
9047ec681f3Smrg   struct ir3_register *dst = reload(ctx, def, instr, block);
9057ec681f3Smrg
9067ec681f3Smrg   rewrite_src_interval(ctx, interval, dst, instr, block);
9077ec681f3Smrg}
9087ec681f3Smrg
9097ec681f3Smrgstatic void
9107ec681f3Smrgreload_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
9117ec681f3Smrg            struct ir3_register *src)
9127ec681f3Smrg{
9137ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
9147ec681f3Smrg
9157ec681f3Smrg   if (interval->needs_reload) {
9167ec681f3Smrg      reload_def(ctx, src->def, instr, instr->block);
9177ec681f3Smrg   }
9187ec681f3Smrg
9197ec681f3Smrg   ra_spill_interval_root(interval)->cant_spill = false;
9207ec681f3Smrg}
9217ec681f3Smrg
9227ec681f3Smrgstatic void
9237ec681f3Smrgrewrite_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
9247ec681f3Smrg            struct ir3_register *src)
9257ec681f3Smrg{
9267ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
9277ec681f3Smrg
9287ec681f3Smrg   set_src_val(src, &interval->dst);
9297ec681f3Smrg}
9307ec681f3Smrg
9317ec681f3Smrgstatic void
9327ec681f3Smrgupdate_max_pressure(struct ra_spill_ctx *ctx)
9337ec681f3Smrg{
9347ec681f3Smrg   d("pressure:");
9357ec681f3Smrg   d("\tfull: %u", ctx->cur_pressure.full);
9367ec681f3Smrg   d("\thalf: %u", ctx->cur_pressure.half);
9377ec681f3Smrg   d("\tshared: %u", ctx->cur_pressure.shared);
9387ec681f3Smrg
9397ec681f3Smrg   ctx->max_pressure.full =
9407ec681f3Smrg      MAX2(ctx->max_pressure.full, ctx->cur_pressure.full);
9417ec681f3Smrg   ctx->max_pressure.half =
9427ec681f3Smrg      MAX2(ctx->max_pressure.half, ctx->cur_pressure.half);
9437ec681f3Smrg   ctx->max_pressure.shared =
9447ec681f3Smrg      MAX2(ctx->max_pressure.shared, ctx->cur_pressure.shared);
9457ec681f3Smrg}
9467ec681f3Smrg
9477ec681f3Smrgstatic void
9487ec681f3Smrghandle_instr(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
9497ec681f3Smrg{
9507ec681f3Smrg   ra_foreach_dst (dst, instr) {
9517ec681f3Smrg      init_dst(ctx, dst);
9527ec681f3Smrg   }
9537ec681f3Smrg
9547ec681f3Smrg   if (ctx->spilling) {
9557ec681f3Smrg      ra_foreach_src (src, instr)
9567ec681f3Smrg         insert_src(ctx, src);
9577ec681f3Smrg   }
9587ec681f3Smrg
9597ec681f3Smrg   /* Handle tied destinations. If a destination is tied to a source and that
9607ec681f3Smrg    * source is live-through, then we need to allocate a new register for the
9617ec681f3Smrg    * destination which is live-through itself and cannot overlap the
9627ec681f3Smrg    * sources.
9637ec681f3Smrg    */
9647ec681f3Smrg
9657ec681f3Smrg   ra_foreach_dst (dst, instr) {
9667ec681f3Smrg      struct ir3_register *tied_src = dst->tied;
9677ec681f3Smrg      if (tied_src && !(tied_src->flags & IR3_REG_FIRST_KILL))
9687ec681f3Smrg         insert_dst(ctx, dst);
9697ec681f3Smrg   }
9707ec681f3Smrg
9717ec681f3Smrg   if (ctx->spilling)
9727ec681f3Smrg      limit(ctx, instr);
9737ec681f3Smrg   else
9747ec681f3Smrg      update_max_pressure(ctx);
9757ec681f3Smrg
9767ec681f3Smrg   if (ctx->spilling) {
9777ec681f3Smrg      ra_foreach_src (src, instr) {
9787ec681f3Smrg         reload_src(ctx, instr, src);
9797ec681f3Smrg         update_src_next_use(ctx, src);
9807ec681f3Smrg      }
9817ec681f3Smrg   }
9827ec681f3Smrg
9837ec681f3Smrg   ra_foreach_src (src, instr) {
9847ec681f3Smrg      if (src->flags & IR3_REG_FIRST_KILL)
9857ec681f3Smrg         remove_src_early(ctx, instr, src);
9867ec681f3Smrg   }
9877ec681f3Smrg
9887ec681f3Smrg   ra_foreach_dst (dst, instr) {
9897ec681f3Smrg      insert_dst(ctx, dst);
9907ec681f3Smrg   }
9917ec681f3Smrg
9927ec681f3Smrg   if (ctx->spilling)
9937ec681f3Smrg      limit(ctx, instr);
9947ec681f3Smrg   else
9957ec681f3Smrg      update_max_pressure(ctx);
9967ec681f3Smrg
9977ec681f3Smrg   /* We have to remove sources before rewriting them so that we can lookup the
9987ec681f3Smrg    * interval to remove before the source itself is changed.
9997ec681f3Smrg    */
10007ec681f3Smrg   ra_foreach_src (src, instr) {
10017ec681f3Smrg      if (src->flags & IR3_REG_FIRST_KILL)
10027ec681f3Smrg         remove_src(ctx, instr, src);
10037ec681f3Smrg   }
10047ec681f3Smrg
10057ec681f3Smrg   if (ctx->spilling) {
10067ec681f3Smrg      ra_foreach_src (src, instr) {
10077ec681f3Smrg         rewrite_src(ctx, instr, src);
10087ec681f3Smrg      }
10097ec681f3Smrg   }
10107ec681f3Smrg
10117ec681f3Smrg   ra_foreach_dst (dst, instr) {
10127ec681f3Smrg      finish_dst(ctx, dst);
10137ec681f3Smrg   }
10147ec681f3Smrg
10157ec681f3Smrg   for (unsigned i = 0; i < instr->dsts_count; i++) {
10167ec681f3Smrg      if (ra_reg_is_dst(instr->dsts[i]) &&
10177ec681f3Smrg          (instr->dsts[i]->flags & IR3_REG_UNUSED))
10187ec681f3Smrg         remove_dst(ctx, instr->dsts[i]);
10197ec681f3Smrg   }
10207ec681f3Smrg}
10217ec681f3Smrg
10227ec681f3Smrgstatic struct ra_spill_interval *
10237ec681f3Smrgcreate_temp_interval(struct ra_spill_ctx *ctx, struct ir3_register *def)
10247ec681f3Smrg{
10257ec681f3Smrg   unsigned name = ctx->intervals_count++;
10267ec681f3Smrg   unsigned offset = ctx->live->interval_offset;
10277ec681f3Smrg
10287ec681f3Smrg   /* This is kinda hacky, but we need to create a fake SSA def here that is
10297ec681f3Smrg    * only used as part of the pcopy accounting. See below.
10307ec681f3Smrg    */
10317ec681f3Smrg   struct ir3_register *reg = rzalloc(ctx, struct ir3_register);
10327ec681f3Smrg   *reg = *def;
10337ec681f3Smrg   reg->name = name;
10347ec681f3Smrg   reg->interval_start = offset;
10357ec681f3Smrg   reg->interval_end = offset + reg_size(def);
10367ec681f3Smrg   reg->merge_set = NULL;
10377ec681f3Smrg
10387ec681f3Smrg   ctx->intervals = reralloc(ctx, ctx->intervals, struct ra_spill_interval *,
10397ec681f3Smrg                             ctx->intervals_count);
10407ec681f3Smrg   struct ra_spill_interval *interval = rzalloc(ctx, struct ra_spill_interval);
10417ec681f3Smrg   ra_spill_interval_init(interval, reg);
10427ec681f3Smrg   ctx->intervals[name] = interval;
10437ec681f3Smrg   ctx->live->interval_offset += reg_size(def);
10447ec681f3Smrg   return interval;
10457ec681f3Smrg}
10467ec681f3Smrg
10477ec681f3Smrg/* In the sequence of copies generated (see below), would this source be killed?
10487ec681f3Smrg */
10497ec681f3Smrgstatic bool
10507ec681f3Smrgis_last_pcopy_src(struct ir3_instruction *pcopy, unsigned src_n)
10517ec681f3Smrg{
10527ec681f3Smrg   struct ir3_register *src = pcopy->srcs[src_n];
10537ec681f3Smrg   if (!(src->flags & IR3_REG_KILL))
10547ec681f3Smrg      return false;
10557ec681f3Smrg   for (unsigned j = src_n + 1; j < pcopy->srcs_count; j++) {
10567ec681f3Smrg      if (pcopy->srcs[j]->def == src->def)
10577ec681f3Smrg         return false;
10587ec681f3Smrg   }
10597ec681f3Smrg   return true;
10607ec681f3Smrg}
10617ec681f3Smrg
10627ec681f3Smrg/* Parallel copies are different from normal instructions. The sources together
10637ec681f3Smrg * may be larger than the entire register file, so we cannot just reload every
10647ec681f3Smrg * source like normal, and indeed that probably wouldn't be a great idea.
10657ec681f3Smrg * Instead we essentially need to lower the parallel copy to "copies," just like
10667ec681f3Smrg * in the normal CSSA construction, although we implement the copies by
10677ec681f3Smrg * reloading and then possibly spilling values. We essentially just shuffle
10687ec681f3Smrg * around the sources until each source either (a) is live or (b) has the same
10697ec681f3Smrg * spill slot as its corresponding destination. We do this by decomposing the
10707ec681f3Smrg * copy into a series of copies, so:
10717ec681f3Smrg *
10727ec681f3Smrg * a, b, c = d, e, f
10737ec681f3Smrg *
10747ec681f3Smrg * becomes:
10757ec681f3Smrg *
10767ec681f3Smrg * d' = d
10777ec681f3Smrg * e' = e
10787ec681f3Smrg * f' = f
10797ec681f3Smrg * a = d'
10807ec681f3Smrg * b = e'
10817ec681f3Smrg * c = f'
10827ec681f3Smrg *
10837ec681f3Smrg * the temporary SSA values d', e', and f' never actually show up in the result.
10847ec681f3Smrg * They are only used for our internal accounting. They may, however, have their
10857ec681f3Smrg * own spill slot created for them. Similarly, we don't actually emit any copy
10867ec681f3Smrg * instructions, although we emit the spills/reloads that *would've* been
10877ec681f3Smrg * required if those copies were there.
10887ec681f3Smrg *
10897ec681f3Smrg * TODO: in order to reduce the number of temporaries and therefore spill slots,
10907ec681f3Smrg * we could instead do a more complicated analysis that considers the location
10917ec681f3Smrg * transfer graph.
10927ec681f3Smrg *
10937ec681f3Smrg * In addition, we actually remove the parallel copy and rewrite all its uses
10947ec681f3Smrg * (in the phi nodes) rather than rewrite its sources at the end. Recreating it
10957ec681f3Smrg * later turns out to be easier than keeping it up-to-date throughout this pass,
10967ec681f3Smrg * since we may have to remove entries for phi sources that are spilled and add
10977ec681f3Smrg * entries for live-outs that are spilled and reloaded, which can happen here
10987ec681f3Smrg * and then possibly be undone or done again when processing live-ins of the
10997ec681f3Smrg * successor block.
11007ec681f3Smrg */
11017ec681f3Smrg
11027ec681f3Smrgstatic void
11037ec681f3Smrghandle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy)
11047ec681f3Smrg{
11057ec681f3Smrg   foreach_dst (dst, pcopy) {
11067ec681f3Smrg      struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
11077ec681f3Smrg      ra_spill_interval_init(dst_interval, dst);
11087ec681f3Smrg   }
11097ec681f3Smrg
11107ec681f3Smrg   foreach_src_n (src, i, pcopy) {
11117ec681f3Smrg      d("processing src %u", i);
11127ec681f3Smrg      struct ir3_register *dst = pcopy->dsts[i];
11137ec681f3Smrg
11147ec681f3Smrg      /* Skip the intermediate copy for cases where the source is merged with
11157ec681f3Smrg       * the destination. Crucially this means that we also don't reload/spill
11167ec681f3Smrg       * it if it's been spilled, because it shares the same spill slot.
11177ec681f3Smrg       */
11187ec681f3Smrg      if (src->def && src->def->merge_set &&
11197ec681f3Smrg          src->def->merge_set == dst->merge_set &&
11207ec681f3Smrg          src->def->merge_set_offset == dst->merge_set_offset) {
11217ec681f3Smrg         struct ra_spill_interval *src_interval = ctx->intervals[src->def->name];
11227ec681f3Smrg         struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
11237ec681f3Smrg         if (src_interval->interval.inserted) {
11247ec681f3Smrg            update_src_next_use(ctx, src);
11257ec681f3Smrg            if (is_last_pcopy_src(pcopy, i))
11267ec681f3Smrg               ra_spill_ctx_remove(ctx, src_interval);
11277ec681f3Smrg            dst_interval->cant_spill = true;
11287ec681f3Smrg            ra_spill_ctx_insert(ctx, dst_interval);
11297ec681f3Smrg            limit(ctx, pcopy);
11307ec681f3Smrg            dst_interval->cant_spill = false;
11317ec681f3Smrg            dst_interval->dst = src_interval->dst;
11327ec681f3Smrg         }
11337ec681f3Smrg      } else if (src->def) {
11347ec681f3Smrg         struct ra_spill_interval *temp_interval =
11357ec681f3Smrg            create_temp_interval(ctx, dst);
11367ec681f3Smrg         struct ir3_register *temp = temp_interval->interval.reg;
11377ec681f3Smrg         temp_interval->next_use_distance = src->next_use;
11387ec681f3Smrg
11397ec681f3Smrg         insert_src(ctx, src);
11407ec681f3Smrg         limit(ctx, pcopy);
11417ec681f3Smrg         reload_src(ctx, pcopy, src);
11427ec681f3Smrg         update_src_next_use(ctx, src);
11437ec681f3Smrg         if (is_last_pcopy_src(pcopy, i))
11447ec681f3Smrg            remove_src(ctx, pcopy, src);
11457ec681f3Smrg         struct ra_spill_interval *src_interval =
11467ec681f3Smrg            ctx->intervals[src->def->name];
11477ec681f3Smrg         temp_interval->dst = src_interval->dst;
11487ec681f3Smrg
11497ec681f3Smrg         temp_interval->cant_spill = true;
11507ec681f3Smrg         ra_spill_ctx_insert(ctx, temp_interval);
11517ec681f3Smrg         limit(ctx, pcopy);
11527ec681f3Smrg         temp_interval->cant_spill = false;
11537ec681f3Smrg
11547ec681f3Smrg         src->flags = temp->flags;
11557ec681f3Smrg         src->def = temp;
11567ec681f3Smrg      }
11577ec681f3Smrg   }
11587ec681f3Smrg
11597ec681f3Smrg   d("done with pcopy srcs");
11607ec681f3Smrg
11617ec681f3Smrg   foreach_src_n (src, i, pcopy) {
11627ec681f3Smrg      struct ir3_register *dst = pcopy->dsts[i];
11637ec681f3Smrg
11647ec681f3Smrg      if (src->def && src->def->merge_set &&
11657ec681f3Smrg          src->def->merge_set == dst->merge_set &&
11667ec681f3Smrg          src->def->merge_set_offset == dst->merge_set_offset)
11677ec681f3Smrg         continue;
11687ec681f3Smrg
11697ec681f3Smrg      struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
11707ec681f3Smrg
11717ec681f3Smrg      if (!src->def) {
11727ec681f3Smrg         dst_interval->cant_spill = true;
11737ec681f3Smrg         ra_spill_ctx_insert(ctx, dst_interval);
11747ec681f3Smrg         limit(ctx, pcopy);
11757ec681f3Smrg         dst_interval->cant_spill = false;
11767ec681f3Smrg
11777ec681f3Smrg         assert(src->flags & (IR3_REG_CONST | IR3_REG_IMMED));
11787ec681f3Smrg         if (src->flags & IR3_REG_CONST) {
11797ec681f3Smrg            dst_interval->dst.flags = src->flags;
11807ec681f3Smrg            dst_interval->dst.const_num = src->num;
11817ec681f3Smrg         } else {
11827ec681f3Smrg            dst_interval->dst.flags = src->flags;
11837ec681f3Smrg            dst_interval->dst.uimm = src->uim_val;
11847ec681f3Smrg         }
11857ec681f3Smrg      } else {
11867ec681f3Smrg         struct ra_spill_interval *temp_interval = ctx->intervals[src->def->name];
11877ec681f3Smrg
11887ec681f3Smrg         insert_src(ctx, src);
11897ec681f3Smrg         limit(ctx, pcopy);
11907ec681f3Smrg         reload_src(ctx, pcopy, src);
11917ec681f3Smrg         remove_src(ctx, pcopy, src);
11927ec681f3Smrg
11937ec681f3Smrg         dst_interval->dst = temp_interval->dst;
11947ec681f3Smrg         ra_spill_ctx_insert(ctx, dst_interval);
11957ec681f3Smrg      }
11967ec681f3Smrg   }
11977ec681f3Smrg
11987ec681f3Smrg   pcopy->flags |= IR3_INSTR_UNUSED;
11997ec681f3Smrg}
12007ec681f3Smrg
12017ec681f3Smrgstatic void
12027ec681f3Smrghandle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
12037ec681f3Smrg{
12047ec681f3Smrg   init_dst(ctx, instr->dsts[0]);
12057ec681f3Smrg   insert_dst(ctx, instr->dsts[0]);
12067ec681f3Smrg   finish_dst(ctx, instr->dsts[0]);
12077ec681f3Smrg}
12087ec681f3Smrg
12097ec681f3Smrgstatic void
12107ec681f3Smrgremove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
12117ec681f3Smrg{
12127ec681f3Smrg   if (instr->opc == OPC_META_TEX_PREFETCH) {
12137ec681f3Smrg      ra_foreach_src (src, instr)
12147ec681f3Smrg         remove_src(ctx, instr, src);
12157ec681f3Smrg   }
12167ec681f3Smrg   if (instr->dsts[0]->flags & IR3_REG_UNUSED)
12177ec681f3Smrg      remove_dst(ctx, instr->dsts[0]);
12187ec681f3Smrg}
12197ec681f3Smrg
12207ec681f3Smrgstatic void
12217ec681f3Smrghandle_live_in(struct ra_spill_ctx *ctx, struct ir3_block *block,
12227ec681f3Smrg               struct ir3_register *def)
12237ec681f3Smrg{
12247ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[def->name];
12257ec681f3Smrg   ra_spill_interval_init(interval, def);
12267ec681f3Smrg   if (ctx->spilling) {
12277ec681f3Smrg      interval->next_use_distance =
12287ec681f3Smrg         ctx->blocks[block->index].next_use_start[def->name];
12297ec681f3Smrg   }
12307ec681f3Smrg
12317ec681f3Smrg   ra_spill_ctx_insert(ctx, interval);
12327ec681f3Smrg}
12337ec681f3Smrg
12347ec681f3Smrgstatic bool
12357ec681f3Smrgis_live_in_phi(struct ir3_register *def, struct ir3_block *block)
12367ec681f3Smrg{
12377ec681f3Smrg   return def->instr->opc == OPC_META_PHI && def->instr->block == block;
12387ec681f3Smrg}
12397ec681f3Smrg
12407ec681f3Smrgstatic bool
12417ec681f3Smrgis_live_in_pred(struct ra_spill_ctx *ctx, struct ir3_register *def,
12427ec681f3Smrg                struct ir3_block *block, unsigned pred_idx)
12437ec681f3Smrg{
12447ec681f3Smrg   struct ir3_block *pred = block->predecessors[pred_idx];
12457ec681f3Smrg   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
12467ec681f3Smrg   if (is_live_in_phi(def, block)) {
12477ec681f3Smrg      def = def->instr->srcs[pred_idx]->def;
12487ec681f3Smrg      if (!def)
12497ec681f3Smrg         return false;
12507ec681f3Smrg   }
12517ec681f3Smrg
12527ec681f3Smrg   return _mesa_hash_table_search(state->remap, def);
12537ec681f3Smrg}
12547ec681f3Smrg
12557ec681f3Smrgstatic bool
12567ec681f3Smrgis_live_in_undef(struct ir3_register *def,
12577ec681f3Smrg                 struct ir3_block *block, unsigned pred_idx)
12587ec681f3Smrg{
12597ec681f3Smrg   if (!is_live_in_phi(def, block))
12607ec681f3Smrg      return false;
12617ec681f3Smrg
12627ec681f3Smrg   return !def->instr->srcs[pred_idx]->def;
12637ec681f3Smrg}
12647ec681f3Smrg
12657ec681f3Smrgstatic struct reg_or_immed *
12667ec681f3Smrgread_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
12677ec681f3Smrg             struct ir3_block *block, unsigned pred_idx)
12687ec681f3Smrg{
12697ec681f3Smrg   struct ir3_block *pred = block->predecessors[pred_idx];
12707ec681f3Smrg   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
12717ec681f3Smrg
12727ec681f3Smrg   if (is_live_in_phi(def, block)) {
12737ec681f3Smrg      def = def->instr->srcs[pred_idx]->def;
12747ec681f3Smrg      if (!def)
12757ec681f3Smrg         return NULL;
12767ec681f3Smrg   }
12777ec681f3Smrg
12787ec681f3Smrg   struct hash_entry *entry = _mesa_hash_table_search(state->remap, def);
12797ec681f3Smrg   if (entry)
12807ec681f3Smrg      return entry->data;
12817ec681f3Smrg   else
12827ec681f3Smrg      return NULL;
12837ec681f3Smrg}
12847ec681f3Smrg
12857ec681f3Smrgstatic bool
12867ec681f3Smrgis_live_in_all_preds(struct ra_spill_ctx *ctx, struct ir3_register *def,
12877ec681f3Smrg                     struct ir3_block *block)
12887ec681f3Smrg{
12897ec681f3Smrg   for (unsigned i = 0; i < block->predecessors_count; i++) {
12907ec681f3Smrg      if (!is_live_in_pred(ctx, def, block, i))
12917ec681f3Smrg         return false;
12927ec681f3Smrg   }
12937ec681f3Smrg
12947ec681f3Smrg   return true;
12957ec681f3Smrg}
12967ec681f3Smrg
12977ec681f3Smrgstatic void
12987ec681f3Smrgspill_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
12997ec681f3Smrg              struct ir3_block *block)
13007ec681f3Smrg{
13017ec681f3Smrg   for (unsigned i = 0; i < block->predecessors_count; i++) {
13027ec681f3Smrg      struct ir3_block *pred = block->predecessors[i];
13037ec681f3Smrg      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
13047ec681f3Smrg
13057ec681f3Smrg      if (!state->visited)
13067ec681f3Smrg         continue;
13077ec681f3Smrg
13087ec681f3Smrg      struct reg_or_immed *pred_def = read_live_in(ctx, def, block, i);
13097ec681f3Smrg      if (pred_def) {
13107ec681f3Smrg         spill(ctx, pred_def, get_spill_slot(ctx, def), NULL, pred);
13117ec681f3Smrg      }
13127ec681f3Smrg   }
13137ec681f3Smrg}
13147ec681f3Smrg
13157ec681f3Smrgstatic void
13167ec681f3Smrgspill_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block)
13177ec681f3Smrg{
13187ec681f3Smrg   bool all_preds_visited = true;
13197ec681f3Smrg   for (unsigned i = 0; i < block->predecessors_count; i++) {
13207ec681f3Smrg      struct ir3_block *pred = block->predecessors[i];
13217ec681f3Smrg      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
13227ec681f3Smrg      if (!state->visited) {
13237ec681f3Smrg         all_preds_visited = false;
13247ec681f3Smrg         break;
13257ec681f3Smrg      }
13267ec681f3Smrg   }
13277ec681f3Smrg
13287ec681f3Smrg   /* Note: in the paper they explicitly spill live-through values first, but we
13297ec681f3Smrg    * should be doing that automatically by virtue of picking the largest
13307ec681f3Smrg    * distance due to the extra distance added to edges out of loops.
13317ec681f3Smrg    *
13327ec681f3Smrg    * TODO: Keep track of pressure in each block and preemptively spill
13337ec681f3Smrg    * live-through values as described in the paper to avoid spilling them
13347ec681f3Smrg    * inside the loop.
13357ec681f3Smrg    */
13367ec681f3Smrg
13377ec681f3Smrg   if (ctx->cur_pressure.half > ctx->limit_pressure.half) {
13387ec681f3Smrg      rb_tree_foreach_safe (struct ra_spill_interval, interval,
13397ec681f3Smrg                            &ctx->half_live_intervals, half_node) {
13407ec681f3Smrg         if (all_preds_visited &&
13417ec681f3Smrg             is_live_in_all_preds(ctx, interval->interval.reg, block))
13427ec681f3Smrg            continue;
13437ec681f3Smrg         spill_live_in(ctx, interval->interval.reg, block);
13447ec681f3Smrg         ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
13457ec681f3Smrg         if (ctx->cur_pressure.half <= ctx->limit_pressure.half)
13467ec681f3Smrg            break;
13477ec681f3Smrg      }
13487ec681f3Smrg   }
13497ec681f3Smrg
13507ec681f3Smrg   if (ctx->cur_pressure.full > ctx->limit_pressure.full) {
13517ec681f3Smrg      rb_tree_foreach_safe (struct ra_spill_interval, interval,
13527ec681f3Smrg                            &ctx->full_live_intervals, node) {
13537ec681f3Smrg         if (all_preds_visited &&
13547ec681f3Smrg             is_live_in_all_preds(ctx, interval->interval.reg, block))
13557ec681f3Smrg            continue;
13567ec681f3Smrg         spill_live_in(ctx, interval->interval.reg, block);
13577ec681f3Smrg         ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
13587ec681f3Smrg         if (ctx->cur_pressure.full <= ctx->limit_pressure.full)
13597ec681f3Smrg            break;
13607ec681f3Smrg      }
13617ec681f3Smrg   }
13627ec681f3Smrg}
13637ec681f3Smrg
13647ec681f3Smrgstatic void
13657ec681f3Smrglive_in_rewrite(struct ra_spill_ctx *ctx,
13667ec681f3Smrg                struct ra_spill_interval *interval,
13677ec681f3Smrg                struct reg_or_immed *new_val,
13687ec681f3Smrg                struct ir3_block *block, unsigned pred_idx)
13697ec681f3Smrg{
13707ec681f3Smrg   struct ir3_block *pred = block->predecessors[pred_idx];
13717ec681f3Smrg   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
13727ec681f3Smrg   struct ir3_register *def = interval->interval.reg;
13737ec681f3Smrg   if (is_live_in_phi(def, block)) {
13747ec681f3Smrg      def = def->instr->srcs[pred_idx]->def;
13757ec681f3Smrg   }
13767ec681f3Smrg
13777ec681f3Smrg   if (def)
13787ec681f3Smrg      _mesa_hash_table_insert(state->remap, def, new_val);
13797ec681f3Smrg
13807ec681f3Smrg   rb_tree_foreach (struct ra_spill_interval, child,
13817ec681f3Smrg                    &interval->interval.children, interval.node) {
13827ec681f3Smrg      assert(new_val->flags & IR3_REG_SSA);
13837ec681f3Smrg      struct ir3_register *child_def =
13847ec681f3Smrg         extract(new_val->def,
13857ec681f3Smrg                 (child->interval.reg->interval_start - def->interval_start) /
13867ec681f3Smrg                 reg_elem_size(def), reg_elems(child->interval.reg),
13877ec681f3Smrg                 NULL, pred);
13887ec681f3Smrg      struct reg_or_immed *child_val = ralloc(ctx, struct reg_or_immed);
13897ec681f3Smrg      child_val->def = child_def;
13907ec681f3Smrg      child_val->flags = child_def->flags;
13917ec681f3Smrg      live_in_rewrite(ctx, child, child_val, block, pred_idx);
13927ec681f3Smrg   }
13937ec681f3Smrg}
13947ec681f3Smrg
13957ec681f3Smrgstatic void
13967ec681f3Smrgreload_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
13977ec681f3Smrg               struct ir3_block *block)
13987ec681f3Smrg{
13997ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[def->name];
14007ec681f3Smrg   for (unsigned i = 0; i < block->predecessors_count; i++) {
14017ec681f3Smrg      struct ir3_block *pred = block->predecessors[i];
14027ec681f3Smrg      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
14037ec681f3Smrg      if (!state->visited)
14047ec681f3Smrg         continue;
14057ec681f3Smrg
14067ec681f3Smrg      if (is_live_in_undef(def, block, i))
14077ec681f3Smrg         continue;
14087ec681f3Smrg
14097ec681f3Smrg      struct reg_or_immed *new_val = read_live_in(ctx, def, block, i);
14107ec681f3Smrg
14117ec681f3Smrg      if (!new_val) {
14127ec681f3Smrg         new_val = ralloc(ctx, struct reg_or_immed);
14137ec681f3Smrg         new_val->def = reload(ctx, def, NULL, pred);
14147ec681f3Smrg         new_val->flags = new_val->def->flags;
14157ec681f3Smrg      }
14167ec681f3Smrg      live_in_rewrite(ctx, interval, new_val, block, i);
14177ec681f3Smrg   }
14187ec681f3Smrg}
14197ec681f3Smrg
14207ec681f3Smrgstatic void
14217ec681f3Smrgreload_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block)
14227ec681f3Smrg{
14237ec681f3Smrg   rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals,
14247ec681f3Smrg                    interval.node) {
14257ec681f3Smrg      reload_live_in(ctx, interval->interval.reg, block);
14267ec681f3Smrg   }
14277ec681f3Smrg}
14287ec681f3Smrg
14297ec681f3Smrgstatic void
14307ec681f3Smrgadd_live_in_phi(struct ra_spill_ctx *ctx, struct ir3_register *def,
14317ec681f3Smrg                struct ir3_block *block)
14327ec681f3Smrg{
14337ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[def->name];
14347ec681f3Smrg   if (!interval->interval.inserted)
14357ec681f3Smrg      return;
14367ec681f3Smrg
14377ec681f3Smrg   bool needs_phi = false;
14387ec681f3Smrg   struct ir3_register *cur_def = NULL;
14397ec681f3Smrg   for (unsigned i = 0; i < block->predecessors_count; i++) {
14407ec681f3Smrg      struct ir3_block *pred = block->predecessors[i];
14417ec681f3Smrg      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
14427ec681f3Smrg
14437ec681f3Smrg      if (!state->visited) {
14447ec681f3Smrg         needs_phi = true;
14457ec681f3Smrg         break;
14467ec681f3Smrg      }
14477ec681f3Smrg
14487ec681f3Smrg      struct hash_entry *entry =
14497ec681f3Smrg         _mesa_hash_table_search(state->remap, def);
14507ec681f3Smrg      assert(entry);
14517ec681f3Smrg      struct reg_or_immed *pred_val = entry->data;
14527ec681f3Smrg      if ((pred_val->flags & (IR3_REG_IMMED | IR3_REG_CONST)) ||
14537ec681f3Smrg          !pred_val->def ||
14547ec681f3Smrg          (cur_def && cur_def != pred_val->def)) {
14557ec681f3Smrg         needs_phi = true;
14567ec681f3Smrg         break;
14577ec681f3Smrg      }
14587ec681f3Smrg      cur_def = pred_val->def;
14597ec681f3Smrg   }
14607ec681f3Smrg
14617ec681f3Smrg   if (!needs_phi) {
14627ec681f3Smrg      interval->dst.def = cur_def;
14637ec681f3Smrg      interval->dst.flags = cur_def->flags;
14647ec681f3Smrg      return;
14657ec681f3Smrg   }
14667ec681f3Smrg
14677ec681f3Smrg   struct ir3_instruction *phi =
14687ec681f3Smrg      ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
14697ec681f3Smrg   struct ir3_register *dst = __ssa_dst(phi);
14707ec681f3Smrg   dst->flags |= def->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
14717ec681f3Smrg   dst->size = def->size;
14727ec681f3Smrg   dst->wrmask = def->wrmask;
14737ec681f3Smrg
14747ec681f3Smrg   dst->interval_start = def->interval_start;
14757ec681f3Smrg   dst->interval_end = def->interval_end;
14767ec681f3Smrg   dst->merge_set = def->merge_set;
14777ec681f3Smrg   dst->merge_set_offset = def->merge_set_offset;
14787ec681f3Smrg
14797ec681f3Smrg   for (unsigned i = 0; i < block->predecessors_count; i++) {
14807ec681f3Smrg      struct ir3_block *pred = block->predecessors[i];
14817ec681f3Smrg      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
14827ec681f3Smrg      struct ir3_register *src = ir3_src_create(phi, INVALID_REG, dst->flags);
14837ec681f3Smrg      src->size = def->size;
14847ec681f3Smrg      src->wrmask = def->wrmask;
14857ec681f3Smrg
14867ec681f3Smrg      if (state->visited) {
14877ec681f3Smrg         struct hash_entry *entry =
14887ec681f3Smrg            _mesa_hash_table_search(state->remap, def);
14897ec681f3Smrg         assert(entry);
14907ec681f3Smrg         struct reg_or_immed *new_val = entry->data;
14917ec681f3Smrg         set_src_val(src, new_val);
14927ec681f3Smrg      } else {
14937ec681f3Smrg         src->def = def;
14947ec681f3Smrg      }
14957ec681f3Smrg   }
14967ec681f3Smrg
14977ec681f3Smrg   interval->dst.def = dst;
14987ec681f3Smrg   interval->dst.flags = dst->flags;
14997ec681f3Smrg
15007ec681f3Smrg   ir3_instr_move_before_block(phi, block);
15017ec681f3Smrg}
15027ec681f3Smrg
15037ec681f3Smrg/* When spilling a block with a single predecessors, the pred may have other
15047ec681f3Smrg * successors so we can't choose what's live in and we can't spill/restore
15057ec681f3Smrg * anything. Just make the inserted intervals exactly match the predecessor. If
15067ec681f3Smrg * it wasn't live in the predecessor then it must've already been spilled. Also,
15077ec681f3Smrg * there are no phi nodes and no live-ins.
15087ec681f3Smrg */
15097ec681f3Smrgstatic void
15107ec681f3Smrgspill_single_pred_live_in(struct ra_spill_ctx *ctx,
15117ec681f3Smrg                          struct ir3_block *block)
15127ec681f3Smrg{
15137ec681f3Smrg   unsigned name;
15147ec681f3Smrg   BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
15157ec681f3Smrg                       ctx->live->definitions_count) {
15167ec681f3Smrg      struct ir3_register *reg = ctx->live->definitions[name];
15177ec681f3Smrg      struct ra_spill_interval *interval = ctx->intervals[reg->name];
15187ec681f3Smrg      struct reg_or_immed *val = read_live_in(ctx, reg, block, 0);
15197ec681f3Smrg      if (val)
15207ec681f3Smrg         interval->dst = *val;
15217ec681f3Smrg      else
15227ec681f3Smrg         ra_spill_ctx_remove(ctx, interval);
15237ec681f3Smrg   }
15247ec681f3Smrg}
15257ec681f3Smrg
15267ec681f3Smrgstatic void
15277ec681f3Smrgrewrite_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *phi,
15287ec681f3Smrg            struct ir3_block *block)
15297ec681f3Smrg{
15307ec681f3Smrg   if (!ctx->intervals[phi->dsts[0]->name]->interval.inserted) {
15317ec681f3Smrg      phi->flags |= IR3_INSTR_UNUSED;
15327ec681f3Smrg      return;
15337ec681f3Smrg   }
15347ec681f3Smrg
15357ec681f3Smrg   for (unsigned i = 0; i < block->predecessors_count; i++) {
15367ec681f3Smrg      struct ir3_block *pred = block->predecessors[i];
15377ec681f3Smrg      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
15387ec681f3Smrg
15397ec681f3Smrg      if (!state->visited)
15407ec681f3Smrg         continue;
15417ec681f3Smrg
15427ec681f3Smrg      struct ir3_register *src = phi->srcs[i];
15437ec681f3Smrg      if (!src->def)
15447ec681f3Smrg         continue;
15457ec681f3Smrg
15467ec681f3Smrg      struct hash_entry *entry =
15477ec681f3Smrg         _mesa_hash_table_search(state->remap, src->def);
15487ec681f3Smrg      assert(entry);
15497ec681f3Smrg      struct reg_or_immed *new_val = entry->data;
15507ec681f3Smrg      set_src_val(src, new_val);
15517ec681f3Smrg   }
15527ec681f3Smrg}
15537ec681f3Smrg
15547ec681f3Smrgstatic void
15557ec681f3Smrgspill_live_out(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval,
15567ec681f3Smrg               struct ir3_block *block)
15577ec681f3Smrg{
15587ec681f3Smrg   struct ir3_register *def = interval->interval.reg;
15597ec681f3Smrg
15607ec681f3Smrg   spill(ctx, &interval->dst, get_spill_slot(ctx, def), NULL, block);
15617ec681f3Smrg   ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
15627ec681f3Smrg}
15637ec681f3Smrg
15647ec681f3Smrgstatic void
15657ec681f3Smrgspill_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
15667ec681f3Smrg{
15677ec681f3Smrg   struct ra_spill_block_state *state = &ctx->blocks[block->index];
15687ec681f3Smrg   rb_tree_foreach_safe (struct ra_spill_interval, interval,
15697ec681f3Smrg                         &ctx->reg_ctx.intervals, interval.node) {
15707ec681f3Smrg      if (!BITSET_TEST(state->live_out, interval->interval.reg->name)) {
15717ec681f3Smrg         spill_live_out(ctx, interval, block);
15727ec681f3Smrg      }
15737ec681f3Smrg   }
15747ec681f3Smrg}
15757ec681f3Smrg
15767ec681f3Smrgstatic void
15777ec681f3Smrgreload_live_out(struct ra_spill_ctx *ctx, struct ir3_register *def,
15787ec681f3Smrg                struct ir3_block *block)
15797ec681f3Smrg{
15807ec681f3Smrg   struct ra_spill_interval *interval = ctx->intervals[def->name];
15817ec681f3Smrg   ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval);
15827ec681f3Smrg
15837ec681f3Smrg   reload_def(ctx, def, NULL, block);
15847ec681f3Smrg}
15857ec681f3Smrg
15867ec681f3Smrgstatic void
15877ec681f3Smrgreload_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
15887ec681f3Smrg{
15897ec681f3Smrg   struct ra_spill_block_state *state = &ctx->blocks[block->index];
15907ec681f3Smrg   unsigned name;
15917ec681f3Smrg   BITSET_FOREACH_SET (name, state->live_out, ctx->live->definitions_count) {
15927ec681f3Smrg      struct ir3_register *reg = ctx->live->definitions[name];
15937ec681f3Smrg      struct ra_spill_interval *interval = ctx->intervals[name];
15947ec681f3Smrg      if (!interval->interval.inserted)
15957ec681f3Smrg         reload_live_out(ctx, reg, block);
15967ec681f3Smrg   }
15977ec681f3Smrg}
15987ec681f3Smrg
15997ec681f3Smrgstatic void
16007ec681f3Smrgupdate_live_out_phis(struct ra_spill_ctx *ctx, struct ir3_block *block)
16017ec681f3Smrg{
16027ec681f3Smrg   assert(!block->successors[1]);
16037ec681f3Smrg   struct ir3_block *succ = block->successors[0];
16047ec681f3Smrg   unsigned pred_idx = ir3_block_get_pred_index(succ, block);
16057ec681f3Smrg
16067ec681f3Smrg   foreach_instr (instr, &succ->instr_list) {
16077ec681f3Smrg      if (instr->opc != OPC_META_PHI)
16087ec681f3Smrg         break;
16097ec681f3Smrg
16107ec681f3Smrg      struct ir3_register *def = instr->srcs[pred_idx]->def;
16117ec681f3Smrg      if (!def)
16127ec681f3Smrg         continue;
16137ec681f3Smrg
16147ec681f3Smrg      struct ra_spill_interval *interval = ctx->intervals[def->name];
16157ec681f3Smrg      if (!interval->interval.inserted)
16167ec681f3Smrg         continue;
16177ec681f3Smrg      set_src_val(instr->srcs[pred_idx], &interval->dst);
16187ec681f3Smrg   }
16197ec681f3Smrg}
16207ec681f3Smrg
16217ec681f3Smrgstatic void
16227ec681f3Smrgrecord_pred_live_out(struct ra_spill_ctx *ctx,
16237ec681f3Smrg                     struct ra_spill_interval *interval,
16247ec681f3Smrg                     struct ir3_block *block, unsigned pred_idx)
16257ec681f3Smrg{
16267ec681f3Smrg   struct ir3_block *pred = block->predecessors[pred_idx];
16277ec681f3Smrg   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
16287ec681f3Smrg
16297ec681f3Smrg   struct ir3_register *def = interval->interval.reg;
16307ec681f3Smrg   if (is_live_in_phi(def, block)) {
16317ec681f3Smrg      def = def->instr->srcs[pred_idx]->def;
16327ec681f3Smrg   }
16337ec681f3Smrg   BITSET_SET(state->live_out, def->name);
16347ec681f3Smrg
16357ec681f3Smrg   rb_tree_foreach (struct ra_spill_interval, child,
16367ec681f3Smrg                    &interval->interval.children, interval.node) {
16377ec681f3Smrg      record_pred_live_out(ctx, child, block, pred_idx);
16387ec681f3Smrg   }
16397ec681f3Smrg}
16407ec681f3Smrg
16417ec681f3Smrgstatic void
16427ec681f3Smrgrecord_pred_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
16437ec681f3Smrg{
16447ec681f3Smrg   for (unsigned i = 0; i < block->predecessors_count; i++) {
16457ec681f3Smrg      struct ir3_block *pred = block->predecessors[i];
16467ec681f3Smrg      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
16477ec681f3Smrg      if (state->visited)
16487ec681f3Smrg         continue;
16497ec681f3Smrg
16507ec681f3Smrg      state->live_out = rzalloc_array(ctx, BITSET_WORD,
16517ec681f3Smrg                                      BITSET_WORDS(ctx->live->definitions_count));
16527ec681f3Smrg
16537ec681f3Smrg
16547ec681f3Smrg      rb_tree_foreach (struct ra_spill_interval, interval,
16557ec681f3Smrg                       &ctx->reg_ctx.intervals, interval.node) {
16567ec681f3Smrg         record_pred_live_out(ctx, interval, block, i);
16577ec681f3Smrg      }
16587ec681f3Smrg   }
16597ec681f3Smrg}
16607ec681f3Smrg
16617ec681f3Smrgstatic void
16627ec681f3Smrgrecord_live_out(struct ra_spill_ctx *ctx,
16637ec681f3Smrg                struct ra_spill_block_state *state,
16647ec681f3Smrg                struct ra_spill_interval *interval)
16657ec681f3Smrg{
16667ec681f3Smrg   if (!(interval->dst.flags & IR3_REG_SSA) ||
16677ec681f3Smrg       interval->dst.def) {
16687ec681f3Smrg      struct reg_or_immed *val = ralloc(ctx, struct reg_or_immed);
16697ec681f3Smrg      *val = interval->dst;
16707ec681f3Smrg      _mesa_hash_table_insert(state->remap, interval->interval.reg, val);
16717ec681f3Smrg   }
16727ec681f3Smrg   rb_tree_foreach (struct ra_spill_interval, child,
16737ec681f3Smrg                    &interval->interval.children, interval.node) {
16747ec681f3Smrg      record_live_out(ctx, state, child);
16757ec681f3Smrg   }
16767ec681f3Smrg}
16777ec681f3Smrg
16787ec681f3Smrgstatic void
16797ec681f3Smrgrecord_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
16807ec681f3Smrg{
16817ec681f3Smrg   struct ra_spill_block_state *state = &ctx->blocks[block->index];
16827ec681f3Smrg   state->remap = _mesa_pointer_hash_table_create(ctx);
16837ec681f3Smrg
16847ec681f3Smrg   rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals,
16857ec681f3Smrg                    interval.node) {
16867ec681f3Smrg      record_live_out(ctx, state, interval);
16877ec681f3Smrg   }
16887ec681f3Smrg}
16897ec681f3Smrg
16907ec681f3Smrgstatic void
16917ec681f3Smrghandle_block(struct ra_spill_ctx *ctx, struct ir3_block *block)
16927ec681f3Smrg{
16937ec681f3Smrg   memset(&ctx->cur_pressure, 0, sizeof(ctx->cur_pressure));
16947ec681f3Smrg   rb_tree_init(&ctx->reg_ctx.intervals);
16957ec681f3Smrg   rb_tree_init(&ctx->full_live_intervals);
16967ec681f3Smrg   rb_tree_init(&ctx->half_live_intervals);
16977ec681f3Smrg
16987ec681f3Smrg   unsigned name;
16997ec681f3Smrg   BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
17007ec681f3Smrg                       ctx->live->definitions_count) {
17017ec681f3Smrg      struct ir3_register *reg = ctx->live->definitions[name];
17027ec681f3Smrg      handle_live_in(ctx, block, reg);
17037ec681f3Smrg   }
17047ec681f3Smrg
17057ec681f3Smrg   foreach_instr (instr, &block->instr_list) {
17067ec681f3Smrg      if (instr->opc != OPC_META_PHI && instr->opc != OPC_META_INPUT &&
17077ec681f3Smrg          instr->opc != OPC_META_TEX_PREFETCH)
17087ec681f3Smrg         break;
17097ec681f3Smrg      handle_input_phi(ctx, instr);
17107ec681f3Smrg   }
17117ec681f3Smrg
17127ec681f3Smrg   if (ctx->spilling) {
17137ec681f3Smrg      if (block->predecessors_count == 1) {
17147ec681f3Smrg         spill_single_pred_live_in(ctx, block);
17157ec681f3Smrg      } else {
17167ec681f3Smrg         spill_live_ins(ctx, block);
17177ec681f3Smrg         reload_live_ins(ctx, block);
17187ec681f3Smrg         record_pred_live_outs(ctx, block);
17197ec681f3Smrg         foreach_instr (instr, &block->instr_list) {
17207ec681f3Smrg            if (instr->opc != OPC_META_PHI)
17217ec681f3Smrg               break;
17227ec681f3Smrg            rewrite_phi(ctx, instr, block);
17237ec681f3Smrg         }
17247ec681f3Smrg         BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
17257ec681f3Smrg                             ctx->live->definitions_count) {
17267ec681f3Smrg            struct ir3_register *reg = ctx->live->definitions[name];
17277ec681f3Smrg            add_live_in_phi(ctx, reg, block);
17287ec681f3Smrg         }
17297ec681f3Smrg      }
17307ec681f3Smrg   } else {
17317ec681f3Smrg      update_max_pressure(ctx);
17327ec681f3Smrg   }
17337ec681f3Smrg
17347ec681f3Smrg   foreach_instr (instr, &block->instr_list) {
17357ec681f3Smrg      di(instr, "processing");
17367ec681f3Smrg
17377ec681f3Smrg      if (instr->opc == OPC_META_PHI || instr->opc == OPC_META_INPUT ||
17387ec681f3Smrg          instr->opc == OPC_META_TEX_PREFETCH)
17397ec681f3Smrg         remove_input_phi(ctx, instr);
17407ec681f3Smrg      else if (ctx->spilling && instr->opc == OPC_META_PARALLEL_COPY)
17417ec681f3Smrg         handle_pcopy(ctx, instr);
17427ec681f3Smrg      else if (ctx->spilling && instr->opc == OPC_MOV &&
17437ec681f3Smrg               instr->dsts[0] == ctx->base_reg)
17447ec681f3Smrg         /* skip */;
17457ec681f3Smrg      else
17467ec681f3Smrg         handle_instr(ctx, instr);
17477ec681f3Smrg   }
17487ec681f3Smrg
17497ec681f3Smrg   if (ctx->spilling && block->successors[0]) {
17507ec681f3Smrg      struct ra_spill_block_state *state =
17517ec681f3Smrg         &ctx->blocks[block->successors[0]->index];
17527ec681f3Smrg      if (state->visited) {
17537ec681f3Smrg         assert(!block->successors[1]);
17547ec681f3Smrg
17557ec681f3Smrg         spill_live_outs(ctx, block);
17567ec681f3Smrg         reload_live_outs(ctx, block);
17577ec681f3Smrg         update_live_out_phis(ctx, block);
17587ec681f3Smrg      }
17597ec681f3Smrg   }
17607ec681f3Smrg
17617ec681f3Smrg   if (ctx->spilling) {
17627ec681f3Smrg      record_live_outs(ctx, block);
17637ec681f3Smrg      ctx->blocks[block->index].visited = true;
17647ec681f3Smrg   }
17657ec681f3Smrg}
17667ec681f3Smrg
17677ec681f3Smrgstatic bool
17687ec681f3Smrgsimplify_phi_node(struct ir3_instruction *phi)
17697ec681f3Smrg{
17707ec681f3Smrg   struct ir3_register *def = NULL;
17717ec681f3Smrg   foreach_src (src, phi) {
17727ec681f3Smrg      /* Ignore phi sources which point to the phi itself. */
17737ec681f3Smrg      if (src->def == phi->dsts[0])
17747ec681f3Smrg         continue;
17757ec681f3Smrg      /* If it's undef or it doesn't match the previous sources, bail */
17767ec681f3Smrg      if (!src->def || (def && def != src->def))
17777ec681f3Smrg         return false;
17787ec681f3Smrg      def = src->def;
17797ec681f3Smrg   }
17807ec681f3Smrg
17817ec681f3Smrg   phi->data = def;
17827ec681f3Smrg   phi->flags |= IR3_INSTR_UNUSED;
17837ec681f3Smrg   return true;
17847ec681f3Smrg}
17857ec681f3Smrg
17867ec681f3Smrgstatic struct ir3_register *
17877ec681f3Smrgsimplify_phi_def(struct ir3_register *def)
17887ec681f3Smrg{
17897ec681f3Smrg   if (def->instr->opc == OPC_META_PHI) {
17907ec681f3Smrg      struct ir3_instruction *phi = def->instr;
17917ec681f3Smrg
17927ec681f3Smrg      /* Note: this function is always called at least once after visiting the
17937ec681f3Smrg       * phi, so either there has been a simplified phi in the meantime, in
17947ec681f3Smrg       * which case we will set progress=true and visit the definition again, or
17957ec681f3Smrg       * phi->data already has the most up-to-date value. Therefore we don't
17967ec681f3Smrg       * have to recursively check phi->data.
17977ec681f3Smrg       */
17987ec681f3Smrg      if (phi->data)
17997ec681f3Smrg         return phi->data;
18007ec681f3Smrg   }
18017ec681f3Smrg
18027ec681f3Smrg   return def;
18037ec681f3Smrg}
18047ec681f3Smrg
18057ec681f3Smrgstatic void
18067ec681f3Smrgsimplify_phi_srcs(struct ir3_instruction *instr)
18077ec681f3Smrg{
18087ec681f3Smrg   foreach_src (src, instr) {
18097ec681f3Smrg      if (src->def)
18107ec681f3Smrg         src->def = simplify_phi_def(src->def);
18117ec681f3Smrg   }
18127ec681f3Smrg}
18137ec681f3Smrg
18147ec681f3Smrg/* We insert phi nodes for all live-ins of loops in case we need to split the
18157ec681f3Smrg * live range. This pass cleans that up for the case where the live range didn't
18167ec681f3Smrg * actually need to be split.
18177ec681f3Smrg */
18187ec681f3Smrgstatic void
18197ec681f3Smrgsimplify_phi_nodes(struct ir3 *ir)
18207ec681f3Smrg{
18217ec681f3Smrg   foreach_block (block, &ir->block_list) {
18227ec681f3Smrg      foreach_instr (instr, &block->instr_list) {
18237ec681f3Smrg         if (instr->opc != OPC_META_PHI)
18247ec681f3Smrg            break;
18257ec681f3Smrg         instr->data = NULL;
18267ec681f3Smrg      }
18277ec681f3Smrg   }
18287ec681f3Smrg
18297ec681f3Smrg   bool progress;
18307ec681f3Smrg   do {
18317ec681f3Smrg      progress = false;
18327ec681f3Smrg      foreach_block (block, &ir->block_list) {
18337ec681f3Smrg         foreach_instr (instr, &block->instr_list) {
18347ec681f3Smrg            if (instr->opc == OPC_META_PHI || (instr->flags & IR3_INSTR_UNUSED))
18357ec681f3Smrg               continue;
18367ec681f3Smrg
18377ec681f3Smrg            simplify_phi_srcs(instr);
18387ec681f3Smrg         }
18397ec681f3Smrg
18407ec681f3Smrg         /* Visit phi nodes in the sucessors to make sure that phi sources are
18417ec681f3Smrg          * always visited at least once after visiting the definition they
18427ec681f3Smrg          * point to. See note in simplify_phi_def() for why this is necessary.
18437ec681f3Smrg          */
18447ec681f3Smrg         for (unsigned i = 0; i < 2; i++) {
18457ec681f3Smrg            struct ir3_block *succ = block->successors[i];
18467ec681f3Smrg            if (!succ)
18477ec681f3Smrg               continue;
18487ec681f3Smrg            foreach_instr (instr, &succ->instr_list) {
18497ec681f3Smrg               if (instr->opc != OPC_META_PHI)
18507ec681f3Smrg                  break;
18517ec681f3Smrg               if (instr->flags & IR3_INSTR_UNUSED) {
18527ec681f3Smrg                  if (instr->data)
18537ec681f3Smrg                     instr->data = simplify_phi_def(instr->data);
18547ec681f3Smrg               } else {
18557ec681f3Smrg                  simplify_phi_srcs(instr);
18567ec681f3Smrg                  progress |= simplify_phi_node(instr);
18577ec681f3Smrg               }
18587ec681f3Smrg            }
18597ec681f3Smrg         }
18607ec681f3Smrg      }
18617ec681f3Smrg   } while (progress);
18627ec681f3Smrg}
18637ec681f3Smrg
18647ec681f3Smrgstatic void
18657ec681f3Smrgunmark_dead(struct ir3 *ir)
18667ec681f3Smrg{
18677ec681f3Smrg   foreach_block (block, &ir->block_list) {
18687ec681f3Smrg      foreach_instr (instr, &block->instr_list) {
18697ec681f3Smrg         instr->flags &= ~IR3_INSTR_UNUSED;
18707ec681f3Smrg      }
18717ec681f3Smrg   }
18727ec681f3Smrg}
18737ec681f3Smrg
18747ec681f3Smrg/* Simple pass to remove now-dead phi nodes and pcopy instructions. We mark
18757ec681f3Smrg * which ones are dead along the way, so there's nothing to compute here.
18767ec681f3Smrg */
18777ec681f3Smrgstatic void
18787ec681f3Smrgcleanup_dead(struct ir3 *ir)
18797ec681f3Smrg{
18807ec681f3Smrg   foreach_block (block, &ir->block_list) {
18817ec681f3Smrg      foreach_instr_safe (instr, &block->instr_list) {
18827ec681f3Smrg         if (instr->flags & IR3_INSTR_UNUSED)
18837ec681f3Smrg            list_delinit(&instr->node);
18847ec681f3Smrg      }
18857ec681f3Smrg   }
18867ec681f3Smrg}
18877ec681f3Smrg
18887ec681f3Smrg/* Deal with merge sets after spilling. Spilling generally leaves the merge sets
18897ec681f3Smrg * in a mess, and even if we properly cleaned up after ourselves, we would want
18907ec681f3Smrg * to recompute the merge sets afterward anway. That's because
18917ec681f3Smrg * spilling/reloading can "break up" phi webs and split/collect webs so that
18927ec681f3Smrg * allocating them to the same register no longer gives any benefit. For
18937ec681f3Smrg * example, imagine we have this:
18947ec681f3Smrg *
18957ec681f3Smrg * if (...) {
18967ec681f3Smrg *    foo = ...
18977ec681f3Smrg * } else {
18987ec681f3Smrg *    bar = ...
18997ec681f3Smrg * }
19007ec681f3Smrg * baz = phi(foo, bar)
19017ec681f3Smrg *
19027ec681f3Smrg * and we spill "baz":
19037ec681f3Smrg *
19047ec681f3Smrg * if (...) {
19057ec681f3Smrg *    foo = ...
19067ec681f3Smrg *    spill(foo)
19077ec681f3Smrg * } else {
19087ec681f3Smrg *    bar = ...
19097ec681f3Smrg *    spill(bar)
19107ec681f3Smrg * }
19117ec681f3Smrg * baz = reload()
19127ec681f3Smrg *
19137ec681f3Smrg * now foo, bar, and baz don't have to be allocated to the same register. How
19147ec681f3Smrg * exactly the merge sets change can be complicated, so it's easier just to
19157ec681f3Smrg * recompute them.
19167ec681f3Smrg *
19177ec681f3Smrg * However, there's a wrinkle in this: those same merge sets determine the
19187ec681f3Smrg * register pressure, due to multiple values inhabiting the same register! And
19197ec681f3Smrg * we assume that this sharing happens when spilling. Therefore we need a
19207ec681f3Smrg * three-step procedure:
19217ec681f3Smrg *
19227ec681f3Smrg * 1. Drop the original merge sets.
19237ec681f3Smrg * 2. Calculate which values *must* be merged, being careful to only use the
19247ec681f3Smrg *    interval information which isn't trashed by spilling, and forcibly merge
19257ec681f3Smrg *    them.
19267ec681f3Smrg * 3. Let ir3_merge_regs() finish the job, including recalculating the
19277ec681f3Smrg *    intervals.
19287ec681f3Smrg */
19297ec681f3Smrg
19307ec681f3Smrgstatic void
19317ec681f3Smrgfixup_merge_sets(struct ir3_liveness *live, struct ir3 *ir)
19327ec681f3Smrg{
19337ec681f3Smrg   foreach_block (block, &ir->block_list) {
19347ec681f3Smrg      foreach_instr (instr, &block->instr_list) {
19357ec681f3Smrg         ra_foreach_dst (dst, instr) {
19367ec681f3Smrg            dst->merge_set = NULL;
19377ec681f3Smrg            dst->merge_set_offset = 0;
19387ec681f3Smrg         }
19397ec681f3Smrg      }
19407ec681f3Smrg   }
19417ec681f3Smrg
19427ec681f3Smrg   foreach_block (block, &ir->block_list) {
19437ec681f3Smrg      foreach_instr (instr, &block->instr_list) {
19447ec681f3Smrg         if (instr->opc != OPC_META_SPLIT &&
19457ec681f3Smrg             instr->opc != OPC_META_COLLECT)
19467ec681f3Smrg            continue;
19477ec681f3Smrg
19487ec681f3Smrg         struct ir3_register *dst = instr->dsts[0];
19497ec681f3Smrg         ra_foreach_src (src, instr) {
19507ec681f3Smrg            if (!(src->flags & IR3_REG_KILL) &&
19517ec681f3Smrg                src->def->interval_start < dst->interval_end &&
19527ec681f3Smrg                dst->interval_start < src->def->interval_end) {
19537ec681f3Smrg               ir3_force_merge(dst, src->def,
19547ec681f3Smrg                               src->def->interval_start - dst->interval_start);
19557ec681f3Smrg            }
19567ec681f3Smrg         }
19577ec681f3Smrg      }
19587ec681f3Smrg   }
19597ec681f3Smrg
19607ec681f3Smrg   ir3_merge_regs(live, ir);
19617ec681f3Smrg}
19627ec681f3Smrg
19637ec681f3Smrgvoid
19647ec681f3Smrgir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
19657ec681f3Smrg                  struct ir3_pressure *max_pressure)
19667ec681f3Smrg{
19677ec681f3Smrg   struct ra_spill_ctx *ctx = rzalloc(NULL, struct ra_spill_ctx);
19687ec681f3Smrg   spill_ctx_init(ctx, v, live);
19697ec681f3Smrg
19707ec681f3Smrg   foreach_block (block, &v->ir->block_list) {
19717ec681f3Smrg      handle_block(ctx, block);
19727ec681f3Smrg   }
19737ec681f3Smrg
19747ec681f3Smrg   assert(ctx->cur_pressure.full == 0);
19757ec681f3Smrg   assert(ctx->cur_pressure.half == 0);
19767ec681f3Smrg   assert(ctx->cur_pressure.shared == 0);
19777ec681f3Smrg
19787ec681f3Smrg   *max_pressure = ctx->max_pressure;
19797ec681f3Smrg   ralloc_free(ctx);
19807ec681f3Smrg}
19817ec681f3Smrg
19827ec681f3Smrgbool
19837ec681f3Smrgir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
19847ec681f3Smrg          struct ir3_liveness **live,
19857ec681f3Smrg          const struct ir3_pressure *limit_pressure)
19867ec681f3Smrg{
19877ec681f3Smrg   void *mem_ctx = ralloc_parent(*live);
19887ec681f3Smrg   struct ra_spill_ctx *ctx = rzalloc(mem_ctx, struct ra_spill_ctx);
19897ec681f3Smrg   spill_ctx_init(ctx, v, *live);
19907ec681f3Smrg
19917ec681f3Smrg   ctx->spilling = true;
19927ec681f3Smrg
19937ec681f3Smrg   ctx->blocks = rzalloc_array(ctx, struct ra_spill_block_state,
19947ec681f3Smrg                               ctx->live->block_count);
19957ec681f3Smrg   rb_tree_init(&ctx->full_live_intervals);
19967ec681f3Smrg   rb_tree_init(&ctx->half_live_intervals);
19977ec681f3Smrg
19987ec681f3Smrg   ctx->limit_pressure = *limit_pressure;
19997ec681f3Smrg   ctx->spill_slot = v->pvtmem_size;
20007ec681f3Smrg
20017ec681f3Smrg   add_base_reg(ctx, ir);
20027ec681f3Smrg   compute_next_distance(ctx, ir);
20037ec681f3Smrg
20047ec681f3Smrg   unmark_dead(ir);
20057ec681f3Smrg
20067ec681f3Smrg   foreach_block (block, &ir->block_list) {
20077ec681f3Smrg      handle_block(ctx, block);
20087ec681f3Smrg   }
20097ec681f3Smrg
20107ec681f3Smrg   simplify_phi_nodes(ir);
20117ec681f3Smrg
20127ec681f3Smrg   cleanup_dead(ir);
20137ec681f3Smrg
20147ec681f3Smrg   ir3_create_parallel_copies(ir);
20157ec681f3Smrg
20167ec681f3Smrg   /* After this point, we're done mutating the IR. Liveness has been trashed,
20177ec681f3Smrg    * so recalculate it. We'll need it for recalculating the merge sets.
20187ec681f3Smrg    */
20197ec681f3Smrg   ralloc_free(ctx->live);
20207ec681f3Smrg   *live = ir3_calc_liveness(mem_ctx, ir);
20217ec681f3Smrg
20227ec681f3Smrg   fixup_merge_sets(*live, ir);
20237ec681f3Smrg
20247ec681f3Smrg   v->pvtmem_size = ctx->spill_slot;
20257ec681f3Smrg   ralloc_free(ctx);
20267ec681f3Smrg
20277ec681f3Smrg   return true;
20287ec681f3Smrg}
2029