1/* 2 * Copyright (C) 2021 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "ir3_ra.h" 25 26/* The spilling pass leaves out a few details required to successfully operate 27 * ldp/stp: 28 * 29 * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores 30 * that and just spills/restores entire values, including arrays and values 31 * created for texture setup which can be more than 4 components. 32 * 2. The spiller doesn't add barrier dependencies needed for post-RA 33 * scheduling. 34 * 35 * The first one, in particular, is much easier to handle after RA because 36 * arrays and normal values can be treated the same way. Therefore this pass 37 * runs after RA, and handles both issues. This keeps the complexity out of the 38 * spiller. 39 */ 40 41static void 42split_spill(struct ir3_instruction *spill) 43{ 44 unsigned orig_components = spill->srcs[2]->uim_val; 45 46 /* We don't handle splitting dependencies. */ 47 assert(spill->deps_count == 0); 48 49 if (orig_components <= 4) { 50 if (spill->srcs[1]->flags & IR3_REG_ARRAY) { 51 spill->srcs[1]->wrmask = MASK(orig_components); 52 spill->srcs[1]->num = spill->srcs[1]->array.base; 53 spill->srcs[1]->flags &= ~IR3_REG_ARRAY; 54 } 55 return; 56 } 57 58 for (unsigned comp = 0; comp < orig_components; comp += 4) { 59 unsigned components = MIN2(orig_components - comp, 4); 60 struct ir3_instruction *clone = ir3_instr_clone(spill); 61 ir3_instr_move_before(clone, spill); 62 63 clone->srcs[1]->wrmask = MASK(components); 64 if (clone->srcs[1]->flags & IR3_REG_ARRAY) { 65 clone->srcs[1]->num = clone->srcs[1]->array.base + comp; 66 clone->srcs[1]->flags &= ~IR3_REG_ARRAY; 67 } 68 69 clone->srcs[2]->uim_val = components; 70 clone->cat6.dst_offset += 71 comp * ((spill->srcs[1]->flags & IR3_REG_HALF) ? 2 : 4); 72 } 73 74 list_delinit(&spill->node); 75} 76 77static void 78split_reload(struct ir3_instruction *reload) 79{ 80 unsigned orig_components = reload->srcs[2]->uim_val; 81 82 assert(reload->deps_count == 0); 83 84 if (orig_components <= 4) { 85 if (reload->dsts[0]->flags & IR3_REG_ARRAY) { 86 reload->dsts[0]->wrmask = MASK(orig_components); 87 reload->dsts[0]->num = reload->dsts[0]->array.base; 88 reload->dsts[0]->flags &= ~IR3_REG_ARRAY; 89 } 90 return; 91 } 92 93 for (unsigned comp = 0; comp < orig_components; comp += 4) { 94 unsigned components = MIN2(orig_components - comp, 4); 95 struct ir3_instruction *clone = ir3_instr_clone(reload); 96 ir3_instr_move_before(clone, reload); 97 98 clone->dsts[0]->wrmask = MASK(components); 99 if (clone->dsts[0]->flags & IR3_REG_ARRAY) { 100 clone->dsts[0]->num = clone->dsts[0]->array.base + comp; 101 clone->dsts[0]->flags &= ~IR3_REG_ARRAY; 102 } 103 104 clone->srcs[2]->uim_val = components; 105 clone->srcs[1]->uim_val += 106 comp * ((reload->dsts[0]->flags & IR3_REG_HALF) ? 2 : 4); 107 } 108 109 list_delinit(&reload->node); 110} 111 112static void 113add_spill_reload_deps(struct ir3_block *block) 114{ 115 struct ir3_instruction *last_spill = NULL; 116 117 foreach_instr (instr, &block->instr_list) { 118 if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) && 119 last_spill) { 120 ir3_instr_add_dep(instr, last_spill); 121 } 122 123 if (instr->opc == OPC_SPILL_MACRO) 124 last_spill = instr; 125 } 126 127 128 last_spill = NULL; 129 130 foreach_instr_rev (instr, &block->instr_list) { 131 if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) && 132 last_spill) { 133 ir3_instr_add_dep(last_spill, instr); 134 } 135 136 if (instr->opc == OPC_SPILL_MACRO) 137 last_spill = instr; 138 } 139} 140 141bool 142ir3_lower_spill(struct ir3 *ir) 143{ 144 foreach_block (block, &ir->block_list) { 145 foreach_instr_safe (instr, &block->instr_list) { 146 if (instr->opc == OPC_SPILL_MACRO) 147 split_spill(instr); 148 else if (instr->opc == OPC_RELOAD_MACRO) 149 split_reload(instr); 150 } 151 152 add_spill_reload_deps(block); 153 154 foreach_instr (instr, &block->instr_list) { 155 if (instr->opc == OPC_SPILL_MACRO) 156 instr->opc = OPC_STP; 157 else if (instr->opc == OPC_RELOAD_MACRO) 158 instr->opc = OPC_LDP; 159 } 160 } 161 162 return true; 163} 164