17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2021 Valve Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "ir3_ra.h" 257ec681f3Smrg 267ec681f3Smrg/* The spilling pass leaves out a few details required to successfully operate 277ec681f3Smrg * ldp/stp: 287ec681f3Smrg * 297ec681f3Smrg * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores 307ec681f3Smrg * that and just spills/restores entire values, including arrays and values 317ec681f3Smrg * created for texture setup which can be more than 4 components. 327ec681f3Smrg * 2. The spiller doesn't add barrier dependencies needed for post-RA 337ec681f3Smrg * scheduling. 347ec681f3Smrg * 357ec681f3Smrg * The first one, in particular, is much easier to handle after RA because 367ec681f3Smrg * arrays and normal values can be treated the same way. Therefore this pass 377ec681f3Smrg * runs after RA, and handles both issues. This keeps the complexity out of the 387ec681f3Smrg * spiller. 397ec681f3Smrg */ 407ec681f3Smrg 417ec681f3Smrgstatic void 427ec681f3Smrgsplit_spill(struct ir3_instruction *spill) 437ec681f3Smrg{ 447ec681f3Smrg unsigned orig_components = spill->srcs[2]->uim_val; 457ec681f3Smrg 467ec681f3Smrg /* We don't handle splitting dependencies. */ 477ec681f3Smrg assert(spill->deps_count == 0); 487ec681f3Smrg 497ec681f3Smrg if (orig_components <= 4) { 507ec681f3Smrg if (spill->srcs[1]->flags & IR3_REG_ARRAY) { 517ec681f3Smrg spill->srcs[1]->wrmask = MASK(orig_components); 527ec681f3Smrg spill->srcs[1]->num = spill->srcs[1]->array.base; 537ec681f3Smrg spill->srcs[1]->flags &= ~IR3_REG_ARRAY; 547ec681f3Smrg } 557ec681f3Smrg return; 567ec681f3Smrg } 577ec681f3Smrg 587ec681f3Smrg for (unsigned comp = 0; comp < orig_components; comp += 4) { 597ec681f3Smrg unsigned components = MIN2(orig_components - comp, 4); 607ec681f3Smrg struct ir3_instruction *clone = ir3_instr_clone(spill); 617ec681f3Smrg ir3_instr_move_before(clone, spill); 627ec681f3Smrg 637ec681f3Smrg clone->srcs[1]->wrmask = MASK(components); 647ec681f3Smrg if (clone->srcs[1]->flags & IR3_REG_ARRAY) { 657ec681f3Smrg clone->srcs[1]->num = clone->srcs[1]->array.base + comp; 667ec681f3Smrg clone->srcs[1]->flags &= ~IR3_REG_ARRAY; 677ec681f3Smrg } 687ec681f3Smrg 697ec681f3Smrg clone->srcs[2]->uim_val = components; 707ec681f3Smrg clone->cat6.dst_offset += 717ec681f3Smrg comp * ((spill->srcs[1]->flags & IR3_REG_HALF) ? 2 : 4); 727ec681f3Smrg } 737ec681f3Smrg 747ec681f3Smrg list_delinit(&spill->node); 757ec681f3Smrg} 767ec681f3Smrg 777ec681f3Smrgstatic void 787ec681f3Smrgsplit_reload(struct ir3_instruction *reload) 797ec681f3Smrg{ 807ec681f3Smrg unsigned orig_components = reload->srcs[2]->uim_val; 817ec681f3Smrg 827ec681f3Smrg assert(reload->deps_count == 0); 837ec681f3Smrg 847ec681f3Smrg if (orig_components <= 4) { 857ec681f3Smrg if (reload->dsts[0]->flags & IR3_REG_ARRAY) { 867ec681f3Smrg reload->dsts[0]->wrmask = MASK(orig_components); 877ec681f3Smrg reload->dsts[0]->num = reload->dsts[0]->array.base; 887ec681f3Smrg reload->dsts[0]->flags &= ~IR3_REG_ARRAY; 897ec681f3Smrg } 907ec681f3Smrg return; 917ec681f3Smrg } 927ec681f3Smrg 937ec681f3Smrg for (unsigned comp = 0; comp < orig_components; comp += 4) { 947ec681f3Smrg unsigned components = MIN2(orig_components - comp, 4); 957ec681f3Smrg struct ir3_instruction *clone = ir3_instr_clone(reload); 967ec681f3Smrg ir3_instr_move_before(clone, reload); 977ec681f3Smrg 987ec681f3Smrg clone->dsts[0]->wrmask = MASK(components); 997ec681f3Smrg if (clone->dsts[0]->flags & IR3_REG_ARRAY) { 1007ec681f3Smrg clone->dsts[0]->num = clone->dsts[0]->array.base + comp; 1017ec681f3Smrg clone->dsts[0]->flags &= ~IR3_REG_ARRAY; 1027ec681f3Smrg } 1037ec681f3Smrg 1047ec681f3Smrg clone->srcs[2]->uim_val = components; 1057ec681f3Smrg clone->srcs[1]->uim_val += 1067ec681f3Smrg comp * ((reload->dsts[0]->flags & IR3_REG_HALF) ? 2 : 4); 1077ec681f3Smrg } 1087ec681f3Smrg 1097ec681f3Smrg list_delinit(&reload->node); 1107ec681f3Smrg} 1117ec681f3Smrg 1127ec681f3Smrgstatic void 1137ec681f3Smrgadd_spill_reload_deps(struct ir3_block *block) 1147ec681f3Smrg{ 1157ec681f3Smrg struct ir3_instruction *last_spill = NULL; 1167ec681f3Smrg 1177ec681f3Smrg foreach_instr (instr, &block->instr_list) { 1187ec681f3Smrg if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) && 1197ec681f3Smrg last_spill) { 1207ec681f3Smrg ir3_instr_add_dep(instr, last_spill); 1217ec681f3Smrg } 1227ec681f3Smrg 1237ec681f3Smrg if (instr->opc == OPC_SPILL_MACRO) 1247ec681f3Smrg last_spill = instr; 1257ec681f3Smrg } 1267ec681f3Smrg 1277ec681f3Smrg 1287ec681f3Smrg last_spill = NULL; 1297ec681f3Smrg 1307ec681f3Smrg foreach_instr_rev (instr, &block->instr_list) { 1317ec681f3Smrg if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) && 1327ec681f3Smrg last_spill) { 1337ec681f3Smrg ir3_instr_add_dep(last_spill, instr); 1347ec681f3Smrg } 1357ec681f3Smrg 1367ec681f3Smrg if (instr->opc == OPC_SPILL_MACRO) 1377ec681f3Smrg last_spill = instr; 1387ec681f3Smrg } 1397ec681f3Smrg} 1407ec681f3Smrg 1417ec681f3Smrgbool 1427ec681f3Smrgir3_lower_spill(struct ir3 *ir) 1437ec681f3Smrg{ 1447ec681f3Smrg foreach_block (block, &ir->block_list) { 1457ec681f3Smrg foreach_instr_safe (instr, &block->instr_list) { 1467ec681f3Smrg if (instr->opc == OPC_SPILL_MACRO) 1477ec681f3Smrg split_spill(instr); 1487ec681f3Smrg else if (instr->opc == OPC_RELOAD_MACRO) 1497ec681f3Smrg split_reload(instr); 1507ec681f3Smrg } 1517ec681f3Smrg 1527ec681f3Smrg add_spill_reload_deps(block); 1537ec681f3Smrg 1547ec681f3Smrg foreach_instr (instr, &block->instr_list) { 1557ec681f3Smrg if (instr->opc == OPC_SPILL_MACRO) 1567ec681f3Smrg instr->opc = OPC_STP; 1577ec681f3Smrg else if (instr->opc == OPC_RELOAD_MACRO) 1587ec681f3Smrg instr->opc = OPC_LDP; 1597ec681f3Smrg } 1607ec681f3Smrg } 1617ec681f3Smrg 1627ec681f3Smrg return true; 1637ec681f3Smrg} 164