17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2018 Red Hat
37ec681f3Smrg * Copyright © 2019 Valve Corporation
47ec681f3Smrg *
57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
67ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
77ec681f3Smrg * to deal in the Software without restriction, including without limitation
87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
107ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * The above copyright notice and this permission notice (including the next
137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
147ec681f3Smrg * Software.
157ec681f3Smrg *
167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
217ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
227ec681f3Smrg * IN THE SOFTWARE.
237ec681f3Smrg *
247ec681f3Smrg * Authors:
257ec681f3Smrg *    Rob Clark (robdclark@gmail.com>
267ec681f3Smrg *    Daniel Schürmann (daniel.schuermann@campus.tu-berlin.de)
277ec681f3Smrg *    Rhys Perry (pendingchaos02@gmail.com)
287ec681f3Smrg *
297ec681f3Smrg */
307ec681f3Smrg
317ec681f3Smrg#include "nir.h"
327ec681f3Smrg
337ec681f3Smrg
347ec681f3Smrg/*
357ec681f3Smrg * A simple pass that moves some instructions into the least common
367ec681f3Smrg * anscestor of consuming instructions.
377ec681f3Smrg */
387ec681f3Smrg
397ec681f3Smrgbool
407ec681f3Smrgnir_can_move_instr(nir_instr *instr, nir_move_options options)
417ec681f3Smrg{
427ec681f3Smrg   switch (instr->type) {
437ec681f3Smrg   case nir_instr_type_load_const:
447ec681f3Smrg   case nir_instr_type_ssa_undef: {
457ec681f3Smrg      return options & nir_move_const_undef;
467ec681f3Smrg   }
477ec681f3Smrg   case nir_instr_type_alu: {
487ec681f3Smrg      if (nir_op_is_vec(nir_instr_as_alu(instr)->op) ||
497ec681f3Smrg          nir_instr_as_alu(instr)->op == nir_op_b2i32)
507ec681f3Smrg         return options & nir_move_copies;
517ec681f3Smrg      if (nir_alu_instr_is_comparison(nir_instr_as_alu(instr)))
527ec681f3Smrg         return options & nir_move_comparisons;
537ec681f3Smrg      return false;
547ec681f3Smrg   }
557ec681f3Smrg   case nir_instr_type_intrinsic: {
567ec681f3Smrg      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
577ec681f3Smrg      switch (intrin->intrinsic) {
587ec681f3Smrg      case nir_intrinsic_load_ubo:
597ec681f3Smrg         return options & nir_move_load_ubo;
607ec681f3Smrg      case nir_intrinsic_load_ssbo:
617ec681f3Smrg         return (options & nir_move_load_ssbo) && nir_intrinsic_can_reorder(intrin);
627ec681f3Smrg      case nir_intrinsic_load_input:
637ec681f3Smrg      case nir_intrinsic_load_interpolated_input:
647ec681f3Smrg      case nir_intrinsic_load_per_vertex_input:
657ec681f3Smrg         return options & nir_move_load_input;
667ec681f3Smrg      default:
677ec681f3Smrg         return false;
687ec681f3Smrg      }
697ec681f3Smrg   }
707ec681f3Smrg   default:
717ec681f3Smrg      return false;
727ec681f3Smrg   }
737ec681f3Smrg}
747ec681f3Smrg
757ec681f3Smrgstatic nir_loop *
767ec681f3Smrgget_innermost_loop(nir_cf_node *node)
777ec681f3Smrg{
787ec681f3Smrg   for (; node != NULL; node = node->parent) {
797ec681f3Smrg      if (node->type == nir_cf_node_loop)
807ec681f3Smrg         return (nir_loop*)node;
817ec681f3Smrg   }
827ec681f3Smrg   return NULL;
837ec681f3Smrg}
847ec681f3Smrg
857ec681f3Smrgstatic bool
867ec681f3Smrgloop_contains_block(nir_loop *loop, nir_block *block)
877ec681f3Smrg{
887ec681f3Smrg   nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
897ec681f3Smrg   nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
907ec681f3Smrg
917ec681f3Smrg   return block->index > before->index && block->index < after->index;
927ec681f3Smrg}
937ec681f3Smrg
947ec681f3Smrg/* Given the LCA of all uses and the definition, find a block on the path
957ec681f3Smrg * between them in the dominance tree that is outside of as many loops as
967ec681f3Smrg * possible. If "sink_out_of_loops" is false, then we disallow sinking the
977ec681f3Smrg * definition outside of the loop it's defined in (if any).
987ec681f3Smrg */
997ec681f3Smrg
1007ec681f3Smrgstatic nir_block *
1017ec681f3Smrgadjust_block_for_loops(nir_block *use_block, nir_block *def_block,
1027ec681f3Smrg                       bool sink_out_of_loops)
1037ec681f3Smrg{
1047ec681f3Smrg   nir_loop *def_loop = NULL;
1057ec681f3Smrg   if (!sink_out_of_loops)
1067ec681f3Smrg      def_loop = get_innermost_loop(&def_block->cf_node);
1077ec681f3Smrg
1087ec681f3Smrg   for (nir_block *cur_block = use_block; cur_block != def_block->imm_dom;
1097ec681f3Smrg        cur_block = cur_block->imm_dom) {
1107ec681f3Smrg      if (!sink_out_of_loops && def_loop &&
1117ec681f3Smrg          !loop_contains_block(def_loop, use_block)) {
1127ec681f3Smrg         use_block = cur_block;
1137ec681f3Smrg         continue;
1147ec681f3Smrg      }
1157ec681f3Smrg
1167ec681f3Smrg      nir_cf_node *next = nir_cf_node_next(&cur_block->cf_node);
1177ec681f3Smrg      if (next && next->type == nir_cf_node_loop) {
1187ec681f3Smrg         nir_loop *following_loop = nir_cf_node_as_loop(next);
1197ec681f3Smrg         if (loop_contains_block(following_loop, use_block)) {
1207ec681f3Smrg             use_block = cur_block;
1217ec681f3Smrg             continue;
1227ec681f3Smrg         }
1237ec681f3Smrg      }
1247ec681f3Smrg   }
1257ec681f3Smrg
1267ec681f3Smrg   return use_block;
1277ec681f3Smrg}
1287ec681f3Smrg
1297ec681f3Smrg/* iterate a ssa def's use's and try to find a more optimal block to
1307ec681f3Smrg * move it to, using the dominance tree.  In short, if all of the uses
1317ec681f3Smrg * are contained in a single block, the load will be moved there,
1327ec681f3Smrg * otherwise it will be move to the least common ancestor block of all
1337ec681f3Smrg * the uses
1347ec681f3Smrg */
1357ec681f3Smrgstatic nir_block *
1367ec681f3Smrgget_preferred_block(nir_ssa_def *def, bool sink_out_of_loops)
1377ec681f3Smrg{
1387ec681f3Smrg   nir_block *lca = NULL;
1397ec681f3Smrg
1407ec681f3Smrg   nir_foreach_use(use, def) {
1417ec681f3Smrg      nir_instr *instr = use->parent_instr;
1427ec681f3Smrg      nir_block *use_block = instr->block;
1437ec681f3Smrg
1447ec681f3Smrg      /*
1457ec681f3Smrg       * Kind of an ugly special-case, but phi instructions
1467ec681f3Smrg       * need to appear first in the block, so by definition
1477ec681f3Smrg       * we can't move an instruction into a block where it is
1487ec681f3Smrg       * consumed by a phi instruction.  We could conceivably
1497ec681f3Smrg       * move it into a dominator block.
1507ec681f3Smrg       */
1517ec681f3Smrg      if (instr->type == nir_instr_type_phi) {
1527ec681f3Smrg         nir_phi_instr *phi = nir_instr_as_phi(instr);
1537ec681f3Smrg         nir_block *phi_lca = NULL;
1547ec681f3Smrg         nir_foreach_phi_src(src, phi) {
1557ec681f3Smrg            if (&src->src == use)
1567ec681f3Smrg               phi_lca = nir_dominance_lca(phi_lca, src->pred);
1577ec681f3Smrg         }
1587ec681f3Smrg         use_block = phi_lca;
1597ec681f3Smrg      }
1607ec681f3Smrg
1617ec681f3Smrg      lca = nir_dominance_lca(lca, use_block);
1627ec681f3Smrg   }
1637ec681f3Smrg
1647ec681f3Smrg   nir_foreach_if_use(use, def) {
1657ec681f3Smrg      nir_block *use_block =
1667ec681f3Smrg         nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node));
1677ec681f3Smrg
1687ec681f3Smrg      lca = nir_dominance_lca(lca, use_block);
1697ec681f3Smrg   }
1707ec681f3Smrg
1717ec681f3Smrg   /* return in case, we didn't find a reachable user */
1727ec681f3Smrg   if (!lca)
1737ec681f3Smrg      return NULL;
1747ec681f3Smrg
1757ec681f3Smrg   /* We don't sink any instructions into loops to avoid repeated executions
1767ec681f3Smrg    * This might occasionally increase register pressure, but seems overall
1777ec681f3Smrg    * the better choice.
1787ec681f3Smrg    */
1797ec681f3Smrg   lca = adjust_block_for_loops(lca, def->parent_instr->block,
1807ec681f3Smrg                                sink_out_of_loops);
1817ec681f3Smrg   assert(nir_block_dominates(def->parent_instr->block, lca));
1827ec681f3Smrg
1837ec681f3Smrg   return lca;
1847ec681f3Smrg}
1857ec681f3Smrg
1867ec681f3Smrgstatic bool
1877ec681f3Smrgcan_sink_out_of_loop(nir_intrinsic_instr *intrin)
1887ec681f3Smrg{
1897ec681f3Smrg   /* Don't sink buffer loads out of loops because that can make its
1907ec681f3Smrg    * resource divergent and break code like that which is generated
1917ec681f3Smrg    * by nir_lower_non_uniform_access.
1927ec681f3Smrg    */
1937ec681f3Smrg   return intrin->intrinsic != nir_intrinsic_load_ubo &&
1947ec681f3Smrg          intrin->intrinsic != nir_intrinsic_load_ssbo;
1957ec681f3Smrg}
1967ec681f3Smrg
1977ec681f3Smrgbool
1987ec681f3Smrgnir_opt_sink(nir_shader *shader, nir_move_options options)
1997ec681f3Smrg{
2007ec681f3Smrg   bool progress = false;
2017ec681f3Smrg
2027ec681f3Smrg   nir_foreach_function(function, shader) {
2037ec681f3Smrg      if (!function->impl)
2047ec681f3Smrg         continue;
2057ec681f3Smrg
2067ec681f3Smrg      nir_metadata_require(function->impl,
2077ec681f3Smrg                           nir_metadata_block_index | nir_metadata_dominance);
2087ec681f3Smrg
2097ec681f3Smrg      nir_foreach_block_reverse(block, function->impl) {
2107ec681f3Smrg         nir_foreach_instr_reverse_safe(instr, block) {
2117ec681f3Smrg            if (!nir_can_move_instr(instr, options))
2127ec681f3Smrg               continue;
2137ec681f3Smrg
2147ec681f3Smrg            nir_ssa_def *def = nir_instr_ssa_def(instr);
2157ec681f3Smrg
2167ec681f3Smrg            bool sink_out_of_loops =
2177ec681f3Smrg               instr->type != nir_instr_type_intrinsic ||
2187ec681f3Smrg               can_sink_out_of_loop(nir_instr_as_intrinsic(instr));
2197ec681f3Smrg            nir_block *use_block =
2207ec681f3Smrg                  get_preferred_block(def, sink_out_of_loops);
2217ec681f3Smrg
2227ec681f3Smrg            if (!use_block || use_block == instr->block)
2237ec681f3Smrg               continue;
2247ec681f3Smrg
2257ec681f3Smrg            nir_instr_remove(instr);
2267ec681f3Smrg            nir_instr_insert(nir_after_phis(use_block), instr);
2277ec681f3Smrg
2287ec681f3Smrg            progress = true;
2297ec681f3Smrg         }
2307ec681f3Smrg      }
2317ec681f3Smrg
2327ec681f3Smrg      nir_metadata_preserve(function->impl,
2337ec681f3Smrg                            nir_metadata_block_index | nir_metadata_dominance);
2347ec681f3Smrg   }
2357ec681f3Smrg
2367ec681f3Smrg   return progress;
2377ec681f3Smrg}
238