17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2021 Valve Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "ir3_ra.h"
257ec681f3Smrg
267ec681f3Smrg/* The spilling pass leaves out a few details required to successfully operate
277ec681f3Smrg * ldp/stp:
287ec681f3Smrg *
297ec681f3Smrg * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores
307ec681f3Smrg *    that and just spills/restores entire values, including arrays and values
317ec681f3Smrg *    created for texture setup which can be more than 4 components.
327ec681f3Smrg * 2. The spiller doesn't add barrier dependencies needed for post-RA
337ec681f3Smrg *    scheduling.
347ec681f3Smrg *
357ec681f3Smrg * The first one, in particular, is much easier to handle after RA because
367ec681f3Smrg * arrays and normal values can be treated the same way. Therefore this pass
377ec681f3Smrg * runs after RA, and handles both issues. This keeps the complexity out of the
387ec681f3Smrg * spiller.
397ec681f3Smrg */
407ec681f3Smrg
417ec681f3Smrgstatic void
427ec681f3Smrgsplit_spill(struct ir3_instruction *spill)
437ec681f3Smrg{
447ec681f3Smrg   unsigned orig_components = spill->srcs[2]->uim_val;
457ec681f3Smrg
467ec681f3Smrg   /* We don't handle splitting dependencies. */
477ec681f3Smrg   assert(spill->deps_count == 0);
487ec681f3Smrg
497ec681f3Smrg   if (orig_components <= 4) {
507ec681f3Smrg      if (spill->srcs[1]->flags & IR3_REG_ARRAY) {
517ec681f3Smrg         spill->srcs[1]->wrmask = MASK(orig_components);
527ec681f3Smrg         spill->srcs[1]->num = spill->srcs[1]->array.base;
537ec681f3Smrg         spill->srcs[1]->flags &= ~IR3_REG_ARRAY;
547ec681f3Smrg      }
557ec681f3Smrg      return;
567ec681f3Smrg   }
577ec681f3Smrg
587ec681f3Smrg   for (unsigned comp = 0; comp < orig_components; comp += 4) {
597ec681f3Smrg      unsigned components = MIN2(orig_components - comp, 4);
607ec681f3Smrg      struct ir3_instruction *clone = ir3_instr_clone(spill);
617ec681f3Smrg      ir3_instr_move_before(clone, spill);
627ec681f3Smrg
637ec681f3Smrg      clone->srcs[1]->wrmask = MASK(components);
647ec681f3Smrg      if (clone->srcs[1]->flags & IR3_REG_ARRAY) {
657ec681f3Smrg         clone->srcs[1]->num = clone->srcs[1]->array.base + comp;
667ec681f3Smrg         clone->srcs[1]->flags &= ~IR3_REG_ARRAY;
677ec681f3Smrg      }
687ec681f3Smrg
697ec681f3Smrg      clone->srcs[2]->uim_val = components;
707ec681f3Smrg      clone->cat6.dst_offset +=
717ec681f3Smrg         comp * ((spill->srcs[1]->flags & IR3_REG_HALF) ? 2 : 4);
727ec681f3Smrg   }
737ec681f3Smrg
747ec681f3Smrg   list_delinit(&spill->node);
757ec681f3Smrg}
767ec681f3Smrg
777ec681f3Smrgstatic void
787ec681f3Smrgsplit_reload(struct ir3_instruction *reload)
797ec681f3Smrg{
807ec681f3Smrg   unsigned orig_components = reload->srcs[2]->uim_val;
817ec681f3Smrg
827ec681f3Smrg   assert(reload->deps_count == 0);
837ec681f3Smrg
847ec681f3Smrg   if (orig_components <= 4) {
857ec681f3Smrg      if (reload->dsts[0]->flags & IR3_REG_ARRAY) {
867ec681f3Smrg         reload->dsts[0]->wrmask = MASK(orig_components);
877ec681f3Smrg         reload->dsts[0]->num = reload->dsts[0]->array.base;
887ec681f3Smrg         reload->dsts[0]->flags &= ~IR3_REG_ARRAY;
897ec681f3Smrg      }
907ec681f3Smrg      return;
917ec681f3Smrg   }
927ec681f3Smrg
937ec681f3Smrg   for (unsigned comp = 0; comp < orig_components; comp += 4) {
947ec681f3Smrg      unsigned components = MIN2(orig_components - comp, 4);
957ec681f3Smrg      struct ir3_instruction *clone = ir3_instr_clone(reload);
967ec681f3Smrg      ir3_instr_move_before(clone, reload);
977ec681f3Smrg
987ec681f3Smrg      clone->dsts[0]->wrmask = MASK(components);
997ec681f3Smrg      if (clone->dsts[0]->flags & IR3_REG_ARRAY) {
1007ec681f3Smrg         clone->dsts[0]->num = clone->dsts[0]->array.base + comp;
1017ec681f3Smrg         clone->dsts[0]->flags &= ~IR3_REG_ARRAY;
1027ec681f3Smrg      }
1037ec681f3Smrg
1047ec681f3Smrg      clone->srcs[2]->uim_val = components;
1057ec681f3Smrg      clone->srcs[1]->uim_val +=
1067ec681f3Smrg         comp * ((reload->dsts[0]->flags & IR3_REG_HALF) ? 2 : 4);
1077ec681f3Smrg   }
1087ec681f3Smrg
1097ec681f3Smrg   list_delinit(&reload->node);
1107ec681f3Smrg}
1117ec681f3Smrg
1127ec681f3Smrgstatic void
1137ec681f3Smrgadd_spill_reload_deps(struct ir3_block *block)
1147ec681f3Smrg{
1157ec681f3Smrg   struct ir3_instruction *last_spill = NULL;
1167ec681f3Smrg
1177ec681f3Smrg   foreach_instr (instr, &block->instr_list) {
1187ec681f3Smrg      if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
1197ec681f3Smrg          last_spill) {
1207ec681f3Smrg         ir3_instr_add_dep(instr, last_spill);
1217ec681f3Smrg      }
1227ec681f3Smrg
1237ec681f3Smrg      if (instr->opc == OPC_SPILL_MACRO)
1247ec681f3Smrg         last_spill = instr;
1257ec681f3Smrg   }
1267ec681f3Smrg
1277ec681f3Smrg
1287ec681f3Smrg   last_spill = NULL;
1297ec681f3Smrg
1307ec681f3Smrg   foreach_instr_rev (instr, &block->instr_list) {
1317ec681f3Smrg      if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
1327ec681f3Smrg          last_spill) {
1337ec681f3Smrg         ir3_instr_add_dep(last_spill, instr);
1347ec681f3Smrg      }
1357ec681f3Smrg
1367ec681f3Smrg      if (instr->opc == OPC_SPILL_MACRO)
1377ec681f3Smrg         last_spill = instr;
1387ec681f3Smrg   }
1397ec681f3Smrg}
1407ec681f3Smrg
1417ec681f3Smrgbool
1427ec681f3Smrgir3_lower_spill(struct ir3 *ir)
1437ec681f3Smrg{
1447ec681f3Smrg   foreach_block (block, &ir->block_list) {
1457ec681f3Smrg      foreach_instr_safe (instr, &block->instr_list) {
1467ec681f3Smrg         if (instr->opc == OPC_SPILL_MACRO)
1477ec681f3Smrg            split_spill(instr);
1487ec681f3Smrg         else if (instr->opc == OPC_RELOAD_MACRO)
1497ec681f3Smrg            split_reload(instr);
1507ec681f3Smrg      }
1517ec681f3Smrg
1527ec681f3Smrg      add_spill_reload_deps(block);
1537ec681f3Smrg
1547ec681f3Smrg      foreach_instr (instr, &block->instr_list) {
1557ec681f3Smrg         if (instr->opc == OPC_SPILL_MACRO)
1567ec681f3Smrg            instr->opc = OPC_STP;
1577ec681f3Smrg         else if (instr->opc == OPC_RELOAD_MACRO)
1587ec681f3Smrg            instr->opc = OPC_LDP;
1597ec681f3Smrg      }
1607ec681f3Smrg   }
1617ec681f3Smrg
1627ec681f3Smrg   return true;
1637ec681f3Smrg}
164