17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2018 Red Hat 37ec681f3Smrg * Copyright © 2019 Valve Corporation 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 217ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 227ec681f3Smrg * IN THE SOFTWARE. 237ec681f3Smrg * 247ec681f3Smrg * Authors: 257ec681f3Smrg * Rob Clark (robdclark@gmail.com> 267ec681f3Smrg * Daniel Schürmann (daniel.schuermann@campus.tu-berlin.de) 277ec681f3Smrg * Rhys Perry (pendingchaos02@gmail.com) 287ec681f3Smrg * 297ec681f3Smrg */ 307ec681f3Smrg 317ec681f3Smrg#include "nir.h" 327ec681f3Smrg 337ec681f3Smrg 347ec681f3Smrg/* 357ec681f3Smrg * A simple pass that moves some instructions into the least common 367ec681f3Smrg * anscestor of consuming instructions. 377ec681f3Smrg */ 387ec681f3Smrg 397ec681f3Smrgbool 407ec681f3Smrgnir_can_move_instr(nir_instr *instr, nir_move_options options) 417ec681f3Smrg{ 427ec681f3Smrg switch (instr->type) { 437ec681f3Smrg case nir_instr_type_load_const: 447ec681f3Smrg case nir_instr_type_ssa_undef: { 457ec681f3Smrg return options & nir_move_const_undef; 467ec681f3Smrg } 477ec681f3Smrg case nir_instr_type_alu: { 487ec681f3Smrg if (nir_op_is_vec(nir_instr_as_alu(instr)->op) || 497ec681f3Smrg nir_instr_as_alu(instr)->op == nir_op_b2i32) 507ec681f3Smrg return options & nir_move_copies; 517ec681f3Smrg if (nir_alu_instr_is_comparison(nir_instr_as_alu(instr))) 527ec681f3Smrg return options & nir_move_comparisons; 537ec681f3Smrg return false; 547ec681f3Smrg } 557ec681f3Smrg case nir_instr_type_intrinsic: { 567ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 577ec681f3Smrg switch (intrin->intrinsic) { 587ec681f3Smrg case nir_intrinsic_load_ubo: 597ec681f3Smrg return options & nir_move_load_ubo; 607ec681f3Smrg case nir_intrinsic_load_ssbo: 617ec681f3Smrg return (options & nir_move_load_ssbo) && nir_intrinsic_can_reorder(intrin); 627ec681f3Smrg case nir_intrinsic_load_input: 637ec681f3Smrg case nir_intrinsic_load_interpolated_input: 647ec681f3Smrg case nir_intrinsic_load_per_vertex_input: 657ec681f3Smrg return options & nir_move_load_input; 667ec681f3Smrg default: 677ec681f3Smrg return false; 687ec681f3Smrg } 697ec681f3Smrg } 707ec681f3Smrg default: 717ec681f3Smrg return false; 727ec681f3Smrg } 737ec681f3Smrg} 747ec681f3Smrg 757ec681f3Smrgstatic nir_loop * 767ec681f3Smrgget_innermost_loop(nir_cf_node *node) 777ec681f3Smrg{ 787ec681f3Smrg for (; node != NULL; node = node->parent) { 797ec681f3Smrg if (node->type == nir_cf_node_loop) 807ec681f3Smrg return (nir_loop*)node; 817ec681f3Smrg } 827ec681f3Smrg return NULL; 837ec681f3Smrg} 847ec681f3Smrg 857ec681f3Smrgstatic bool 867ec681f3Smrgloop_contains_block(nir_loop *loop, nir_block *block) 877ec681f3Smrg{ 887ec681f3Smrg nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 897ec681f3Smrg nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 907ec681f3Smrg 917ec681f3Smrg return block->index > before->index && block->index < after->index; 927ec681f3Smrg} 937ec681f3Smrg 947ec681f3Smrg/* Given the LCA of all uses and the definition, find a block on the path 957ec681f3Smrg * between them in the dominance tree that is outside of as many loops as 967ec681f3Smrg * possible. If "sink_out_of_loops" is false, then we disallow sinking the 977ec681f3Smrg * definition outside of the loop it's defined in (if any). 987ec681f3Smrg */ 997ec681f3Smrg 1007ec681f3Smrgstatic nir_block * 1017ec681f3Smrgadjust_block_for_loops(nir_block *use_block, nir_block *def_block, 1027ec681f3Smrg bool sink_out_of_loops) 1037ec681f3Smrg{ 1047ec681f3Smrg nir_loop *def_loop = NULL; 1057ec681f3Smrg if (!sink_out_of_loops) 1067ec681f3Smrg def_loop = get_innermost_loop(&def_block->cf_node); 1077ec681f3Smrg 1087ec681f3Smrg for (nir_block *cur_block = use_block; cur_block != def_block->imm_dom; 1097ec681f3Smrg cur_block = cur_block->imm_dom) { 1107ec681f3Smrg if (!sink_out_of_loops && def_loop && 1117ec681f3Smrg !loop_contains_block(def_loop, use_block)) { 1127ec681f3Smrg use_block = cur_block; 1137ec681f3Smrg continue; 1147ec681f3Smrg } 1157ec681f3Smrg 1167ec681f3Smrg nir_cf_node *next = nir_cf_node_next(&cur_block->cf_node); 1177ec681f3Smrg if (next && next->type == nir_cf_node_loop) { 1187ec681f3Smrg nir_loop *following_loop = nir_cf_node_as_loop(next); 1197ec681f3Smrg if (loop_contains_block(following_loop, use_block)) { 1207ec681f3Smrg use_block = cur_block; 1217ec681f3Smrg continue; 1227ec681f3Smrg } 1237ec681f3Smrg } 1247ec681f3Smrg } 1257ec681f3Smrg 1267ec681f3Smrg return use_block; 1277ec681f3Smrg} 1287ec681f3Smrg 1297ec681f3Smrg/* iterate a ssa def's use's and try to find a more optimal block to 1307ec681f3Smrg * move it to, using the dominance tree. In short, if all of the uses 1317ec681f3Smrg * are contained in a single block, the load will be moved there, 1327ec681f3Smrg * otherwise it will be move to the least common ancestor block of all 1337ec681f3Smrg * the uses 1347ec681f3Smrg */ 1357ec681f3Smrgstatic nir_block * 1367ec681f3Smrgget_preferred_block(nir_ssa_def *def, bool sink_out_of_loops) 1377ec681f3Smrg{ 1387ec681f3Smrg nir_block *lca = NULL; 1397ec681f3Smrg 1407ec681f3Smrg nir_foreach_use(use, def) { 1417ec681f3Smrg nir_instr *instr = use->parent_instr; 1427ec681f3Smrg nir_block *use_block = instr->block; 1437ec681f3Smrg 1447ec681f3Smrg /* 1457ec681f3Smrg * Kind of an ugly special-case, but phi instructions 1467ec681f3Smrg * need to appear first in the block, so by definition 1477ec681f3Smrg * we can't move an instruction into a block where it is 1487ec681f3Smrg * consumed by a phi instruction. We could conceivably 1497ec681f3Smrg * move it into a dominator block. 1507ec681f3Smrg */ 1517ec681f3Smrg if (instr->type == nir_instr_type_phi) { 1527ec681f3Smrg nir_phi_instr *phi = nir_instr_as_phi(instr); 1537ec681f3Smrg nir_block *phi_lca = NULL; 1547ec681f3Smrg nir_foreach_phi_src(src, phi) { 1557ec681f3Smrg if (&src->src == use) 1567ec681f3Smrg phi_lca = nir_dominance_lca(phi_lca, src->pred); 1577ec681f3Smrg } 1587ec681f3Smrg use_block = phi_lca; 1597ec681f3Smrg } 1607ec681f3Smrg 1617ec681f3Smrg lca = nir_dominance_lca(lca, use_block); 1627ec681f3Smrg } 1637ec681f3Smrg 1647ec681f3Smrg nir_foreach_if_use(use, def) { 1657ec681f3Smrg nir_block *use_block = 1667ec681f3Smrg nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node)); 1677ec681f3Smrg 1687ec681f3Smrg lca = nir_dominance_lca(lca, use_block); 1697ec681f3Smrg } 1707ec681f3Smrg 1717ec681f3Smrg /* return in case, we didn't find a reachable user */ 1727ec681f3Smrg if (!lca) 1737ec681f3Smrg return NULL; 1747ec681f3Smrg 1757ec681f3Smrg /* We don't sink any instructions into loops to avoid repeated executions 1767ec681f3Smrg * This might occasionally increase register pressure, but seems overall 1777ec681f3Smrg * the better choice. 1787ec681f3Smrg */ 1797ec681f3Smrg lca = adjust_block_for_loops(lca, def->parent_instr->block, 1807ec681f3Smrg sink_out_of_loops); 1817ec681f3Smrg assert(nir_block_dominates(def->parent_instr->block, lca)); 1827ec681f3Smrg 1837ec681f3Smrg return lca; 1847ec681f3Smrg} 1857ec681f3Smrg 1867ec681f3Smrgstatic bool 1877ec681f3Smrgcan_sink_out_of_loop(nir_intrinsic_instr *intrin) 1887ec681f3Smrg{ 1897ec681f3Smrg /* Don't sink buffer loads out of loops because that can make its 1907ec681f3Smrg * resource divergent and break code like that which is generated 1917ec681f3Smrg * by nir_lower_non_uniform_access. 1927ec681f3Smrg */ 1937ec681f3Smrg return intrin->intrinsic != nir_intrinsic_load_ubo && 1947ec681f3Smrg intrin->intrinsic != nir_intrinsic_load_ssbo; 1957ec681f3Smrg} 1967ec681f3Smrg 1977ec681f3Smrgbool 1987ec681f3Smrgnir_opt_sink(nir_shader *shader, nir_move_options options) 1997ec681f3Smrg{ 2007ec681f3Smrg bool progress = false; 2017ec681f3Smrg 2027ec681f3Smrg nir_foreach_function(function, shader) { 2037ec681f3Smrg if (!function->impl) 2047ec681f3Smrg continue; 2057ec681f3Smrg 2067ec681f3Smrg nir_metadata_require(function->impl, 2077ec681f3Smrg nir_metadata_block_index | nir_metadata_dominance); 2087ec681f3Smrg 2097ec681f3Smrg nir_foreach_block_reverse(block, function->impl) { 2107ec681f3Smrg nir_foreach_instr_reverse_safe(instr, block) { 2117ec681f3Smrg if (!nir_can_move_instr(instr, options)) 2127ec681f3Smrg continue; 2137ec681f3Smrg 2147ec681f3Smrg nir_ssa_def *def = nir_instr_ssa_def(instr); 2157ec681f3Smrg 2167ec681f3Smrg bool sink_out_of_loops = 2177ec681f3Smrg instr->type != nir_instr_type_intrinsic || 2187ec681f3Smrg can_sink_out_of_loop(nir_instr_as_intrinsic(instr)); 2197ec681f3Smrg nir_block *use_block = 2207ec681f3Smrg get_preferred_block(def, sink_out_of_loops); 2217ec681f3Smrg 2227ec681f3Smrg if (!use_block || use_block == instr->block) 2237ec681f3Smrg continue; 2247ec681f3Smrg 2257ec681f3Smrg nir_instr_remove(instr); 2267ec681f3Smrg nir_instr_insert(nir_after_phis(use_block), instr); 2277ec681f3Smrg 2287ec681f3Smrg progress = true; 2297ec681f3Smrg } 2307ec681f3Smrg } 2317ec681f3Smrg 2327ec681f3Smrg nir_metadata_preserve(function->impl, 2337ec681f3Smrg nir_metadata_block_index | nir_metadata_dominance); 2347ec681f3Smrg } 2357ec681f3Smrg 2367ec681f3Smrg return progress; 2377ec681f3Smrg} 238