1/* 2 * Copyright © 2018 Red Hat 3 * Copyright © 2019 Valve Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 * Authors: 25 * Rob Clark (robdclark@gmail.com> 26 * Daniel Schürmann (daniel.schuermann@campus.tu-berlin.de) 27 * Rhys Perry (pendingchaos02@gmail.com) 28 * 29 */ 30 31#include "nir.h" 32 33 34/* 35 * A simple pass that moves some instructions into the least common 36 * anscestor of consuming instructions. 37 */ 38 39bool 40nir_can_move_instr(nir_instr *instr, nir_move_options options) 41{ 42 switch (instr->type) { 43 case nir_instr_type_load_const: 44 case nir_instr_type_ssa_undef: { 45 return options & nir_move_const_undef; 46 } 47 case nir_instr_type_alu: { 48 if (nir_op_is_vec(nir_instr_as_alu(instr)->op) || 49 nir_instr_as_alu(instr)->op == nir_op_b2i32) 50 return options & nir_move_copies; 51 if (nir_alu_instr_is_comparison(nir_instr_as_alu(instr))) 52 return options & nir_move_comparisons; 53 return false; 54 } 55 case nir_instr_type_intrinsic: { 56 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 57 switch (intrin->intrinsic) { 58 case nir_intrinsic_load_ubo: 59 return options & nir_move_load_ubo; 60 case nir_intrinsic_load_ssbo: 61 return (options & nir_move_load_ssbo) && nir_intrinsic_can_reorder(intrin); 62 case nir_intrinsic_load_input: 63 case nir_intrinsic_load_interpolated_input: 64 case nir_intrinsic_load_per_vertex_input: 65 return options & nir_move_load_input; 66 default: 67 return false; 68 } 69 } 70 default: 71 return false; 72 } 73} 74 75static nir_loop * 76get_innermost_loop(nir_cf_node *node) 77{ 78 for (; node != NULL; node = node->parent) { 79 if (node->type == nir_cf_node_loop) 80 return (nir_loop*)node; 81 } 82 return NULL; 83} 84 85static bool 86loop_contains_block(nir_loop *loop, nir_block *block) 87{ 88 nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 89 nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 90 91 return block->index > before->index && block->index < after->index; 92} 93 94/* Given the LCA of all uses and the definition, find a block on the path 95 * between them in the dominance tree that is outside of as many loops as 96 * possible. If "sink_out_of_loops" is false, then we disallow sinking the 97 * definition outside of the loop it's defined in (if any). 98 */ 99 100static nir_block * 101adjust_block_for_loops(nir_block *use_block, nir_block *def_block, 102 bool sink_out_of_loops) 103{ 104 nir_loop *def_loop = NULL; 105 if (!sink_out_of_loops) 106 def_loop = get_innermost_loop(&def_block->cf_node); 107 108 for (nir_block *cur_block = use_block; cur_block != def_block->imm_dom; 109 cur_block = cur_block->imm_dom) { 110 if (!sink_out_of_loops && def_loop && 111 !loop_contains_block(def_loop, use_block)) { 112 use_block = cur_block; 113 continue; 114 } 115 116 nir_cf_node *next = nir_cf_node_next(&cur_block->cf_node); 117 if (next && next->type == nir_cf_node_loop) { 118 nir_loop *following_loop = nir_cf_node_as_loop(next); 119 if (loop_contains_block(following_loop, use_block)) { 120 use_block = cur_block; 121 continue; 122 } 123 } 124 } 125 126 return use_block; 127} 128 129/* iterate a ssa def's use's and try to find a more optimal block to 130 * move it to, using the dominance tree. In short, if all of the uses 131 * are contained in a single block, the load will be moved there, 132 * otherwise it will be move to the least common ancestor block of all 133 * the uses 134 */ 135static nir_block * 136get_preferred_block(nir_ssa_def *def, bool sink_out_of_loops) 137{ 138 nir_block *lca = NULL; 139 140 nir_foreach_use(use, def) { 141 nir_instr *instr = use->parent_instr; 142 nir_block *use_block = instr->block; 143 144 /* 145 * Kind of an ugly special-case, but phi instructions 146 * need to appear first in the block, so by definition 147 * we can't move an instruction into a block where it is 148 * consumed by a phi instruction. We could conceivably 149 * move it into a dominator block. 150 */ 151 if (instr->type == nir_instr_type_phi) { 152 nir_phi_instr *phi = nir_instr_as_phi(instr); 153 nir_block *phi_lca = NULL; 154 nir_foreach_phi_src(src, phi) { 155 if (&src->src == use) 156 phi_lca = nir_dominance_lca(phi_lca, src->pred); 157 } 158 use_block = phi_lca; 159 } 160 161 lca = nir_dominance_lca(lca, use_block); 162 } 163 164 nir_foreach_if_use(use, def) { 165 nir_block *use_block = 166 nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node)); 167 168 lca = nir_dominance_lca(lca, use_block); 169 } 170 171 /* return in case, we didn't find a reachable user */ 172 if (!lca) 173 return NULL; 174 175 /* We don't sink any instructions into loops to avoid repeated executions 176 * This might occasionally increase register pressure, but seems overall 177 * the better choice. 178 */ 179 lca = adjust_block_for_loops(lca, def->parent_instr->block, 180 sink_out_of_loops); 181 assert(nir_block_dominates(def->parent_instr->block, lca)); 182 183 return lca; 184} 185 186static bool 187can_sink_out_of_loop(nir_intrinsic_instr *intrin) 188{ 189 /* Don't sink buffer loads out of loops because that can make its 190 * resource divergent and break code like that which is generated 191 * by nir_lower_non_uniform_access. 192 */ 193 return intrin->intrinsic != nir_intrinsic_load_ubo && 194 intrin->intrinsic != nir_intrinsic_load_ssbo; 195} 196 197bool 198nir_opt_sink(nir_shader *shader, nir_move_options options) 199{ 200 bool progress = false; 201 202 nir_foreach_function(function, shader) { 203 if (!function->impl) 204 continue; 205 206 nir_metadata_require(function->impl, 207 nir_metadata_block_index | nir_metadata_dominance); 208 209 nir_foreach_block_reverse(block, function->impl) { 210 nir_foreach_instr_reverse_safe(instr, block) { 211 if (!nir_can_move_instr(instr, options)) 212 continue; 213 214 nir_ssa_def *def = nir_instr_ssa_def(instr); 215 216 bool sink_out_of_loops = 217 instr->type != nir_instr_type_intrinsic || 218 can_sink_out_of_loop(nir_instr_as_intrinsic(instr)); 219 nir_block *use_block = 220 get_preferred_block(def, sink_out_of_loops); 221 222 if (!use_block || use_block == instr->block) 223 continue; 224 225 nir_instr_remove(instr); 226 nir_instr_insert(nir_after_phis(use_block), instr); 227 228 progress = true; 229 } 230 } 231 232 nir_metadata_preserve(function->impl, 233 nir_metadata_block_index | nir_metadata_dominance); 234 } 235 236 return progress; 237} 238