101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2014 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg * Authors: 2401e04c3fSmrg * Connor Abbott (cwabbott0@gmail.com) 2501e04c3fSmrg * 2601e04c3fSmrg */ 2701e04c3fSmrg 2801e04c3fSmrg#include "nir.h" 2901e04c3fSmrg 307ec681f3Smrgstatic bool 317ec681f3Smrgis_dest_live(const nir_dest *dest, BITSET_WORD *defs_live) 3201e04c3fSmrg{ 337ec681f3Smrg return !dest->is_ssa || BITSET_TEST(defs_live, dest->ssa.index); 3401e04c3fSmrg} 3501e04c3fSmrg 3601e04c3fSmrgstatic bool 377ec681f3Smrgmark_src_live(const nir_src *src, BITSET_WORD *defs_live) 3801e04c3fSmrg{ 397ec681f3Smrg if (src->is_ssa && !BITSET_TEST(defs_live, src->ssa->index)) { 407ec681f3Smrg BITSET_SET(defs_live, src->ssa->index); 417ec681f3Smrg return true; 427ec681f3Smrg } else { 437ec681f3Smrg return false; 447ec681f3Smrg } 457ec681f3Smrg} 4601e04c3fSmrg 477ec681f3Smrgstatic bool 487ec681f3Smrgmark_live_cb(nir_src *src, void *defs_live) 497ec681f3Smrg{ 507ec681f3Smrg mark_src_live(src, defs_live); 5101e04c3fSmrg return true; 5201e04c3fSmrg} 5301e04c3fSmrg 547ec681f3Smrgstatic bool 557ec681f3Smrgis_live(BITSET_WORD *defs_live, nir_instr *instr) 5601e04c3fSmrg{ 5701e04c3fSmrg switch (instr->type) { 5801e04c3fSmrg case nir_instr_type_call: 5901e04c3fSmrg case nir_instr_type_jump: 607ec681f3Smrg return true; 617ec681f3Smrg case nir_instr_type_alu: { 627ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 637ec681f3Smrg return is_dest_live(&alu->dest.dest, defs_live); 647ec681f3Smrg } 657ec681f3Smrg case nir_instr_type_deref: { 667ec681f3Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 677ec681f3Smrg return is_dest_live(&deref->dest, defs_live); 687ec681f3Smrg } 697ec681f3Smrg case nir_instr_type_intrinsic: { 707ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 717ec681f3Smrg const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic]; 727ec681f3Smrg return !(info->flags & NIR_INTRINSIC_CAN_ELIMINATE) || 737ec681f3Smrg (info->has_dest && is_dest_live(&intrin->dest, defs_live)); 747ec681f3Smrg } 757ec681f3Smrg case nir_instr_type_tex: { 767ec681f3Smrg nir_tex_instr *tex = nir_instr_as_tex(instr); 777ec681f3Smrg return is_dest_live(&tex->dest, defs_live); 787ec681f3Smrg } 797ec681f3Smrg case nir_instr_type_phi: { 807ec681f3Smrg nir_phi_instr *phi = nir_instr_as_phi(instr); 817ec681f3Smrg return is_dest_live(&phi->dest, defs_live); 827ec681f3Smrg } 837ec681f3Smrg case nir_instr_type_load_const: { 847ec681f3Smrg nir_load_const_instr *lc = nir_instr_as_load_const(instr); 857ec681f3Smrg return BITSET_TEST(defs_live, lc->def.index); 867ec681f3Smrg } 877ec681f3Smrg case nir_instr_type_ssa_undef: { 887ec681f3Smrg nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(instr); 897ec681f3Smrg return BITSET_TEST(defs_live, undef->def.index); 907ec681f3Smrg } 917ec681f3Smrg case nir_instr_type_parallel_copy: { 927ec681f3Smrg nir_parallel_copy_instr *pc = nir_instr_as_parallel_copy(instr); 937ec681f3Smrg nir_foreach_parallel_copy_entry(entry, pc) { 947ec681f3Smrg if (is_dest_live(&entry->dest, defs_live)) 957ec681f3Smrg return true; 9601e04c3fSmrg } 977ec681f3Smrg return false; 987ec681f3Smrg } 9901e04c3fSmrg default: 1007ec681f3Smrg unreachable("unexpected instr type"); 10101e04c3fSmrg } 10201e04c3fSmrg} 10301e04c3fSmrg 1047ec681f3Smrgstruct loop_state { 1057ec681f3Smrg bool header_phis_changed; 1067ec681f3Smrg nir_block *preheader; 1077ec681f3Smrg}; 1087ec681f3Smrg 10901e04c3fSmrgstatic bool 1107ec681f3Smrgdce_block(nir_block *block, BITSET_WORD *defs_live, struct loop_state *loop) 11101e04c3fSmrg{ 1127ec681f3Smrg bool progress = false; 1137ec681f3Smrg bool phis_changed = false; 1147ec681f3Smrg nir_foreach_instr_reverse_safe(instr, block) { 1157ec681f3Smrg bool live = is_live(defs_live, instr); 1167ec681f3Smrg if (live) { 1177ec681f3Smrg if (instr->type == nir_instr_type_phi) { 1187ec681f3Smrg nir_foreach_phi_src(src, nir_instr_as_phi(instr)) { 1197ec681f3Smrg phis_changed |= mark_src_live(&src->src, defs_live) && 1207ec681f3Smrg src->pred != loop->preheader; 1217ec681f3Smrg } 1227ec681f3Smrg } else { 1237ec681f3Smrg nir_foreach_src(instr, mark_live_cb, defs_live); 1247ec681f3Smrg } 1257ec681f3Smrg } 1267ec681f3Smrg 1277ec681f3Smrg /* If we're not in a loop, remove it now if it's dead. If we are in a 1287ec681f3Smrg * loop, leave instructions to be removed later if they're still dead. 1297ec681f3Smrg */ 1307ec681f3Smrg if (loop->preheader) { 1317ec681f3Smrg instr->pass_flags = live; 1327ec681f3Smrg } else if (!live) { 1337ec681f3Smrg nir_instr_remove(instr); 1347ec681f3Smrg progress = true; 1357ec681f3Smrg } 13601e04c3fSmrg } 13701e04c3fSmrg 1387ec681f3Smrg /* Because blocks are visited in reverse and this stomps header_phis_changed, 1397ec681f3Smrg * we don't have to check whether the current block is a loop header before 1407ec681f3Smrg * setting header_phis_changed. 1417ec681f3Smrg */ 1427ec681f3Smrg loop->header_phis_changed = phis_changed; 1437ec681f3Smrg 1447ec681f3Smrg return progress; 14501e04c3fSmrg} 14601e04c3fSmrg 14701e04c3fSmrgstatic bool 1487ec681f3Smrgdce_cf_list(struct exec_list *cf_list, BITSET_WORD *defs_live, 1497ec681f3Smrg struct loop_state *parent_loop) 15001e04c3fSmrg{ 1517ec681f3Smrg bool progress = false; 1527ec681f3Smrg foreach_list_typed_reverse(nir_cf_node, cf_node, node, cf_list) { 1537ec681f3Smrg switch (cf_node->type) { 1547ec681f3Smrg case nir_cf_node_block: { 1557ec681f3Smrg nir_block *block = nir_cf_node_as_block(cf_node); 1567ec681f3Smrg progress |= dce_block(block, defs_live, parent_loop); 1577ec681f3Smrg break; 1587ec681f3Smrg } 1597ec681f3Smrg case nir_cf_node_if: { 1607ec681f3Smrg nir_if *nif = nir_cf_node_as_if(cf_node); 1617ec681f3Smrg progress |= dce_cf_list(&nif->else_list, defs_live, parent_loop); 1627ec681f3Smrg progress |= dce_cf_list(&nif->then_list, defs_live, parent_loop); 1637ec681f3Smrg mark_src_live(&nif->condition, defs_live); 1647ec681f3Smrg break; 1657ec681f3Smrg } 1667ec681f3Smrg case nir_cf_node_loop: { 1677ec681f3Smrg nir_loop *loop = nir_cf_node_as_loop(cf_node); 1687ec681f3Smrg 1697ec681f3Smrg struct loop_state inner_state; 1707ec681f3Smrg inner_state.preheader = nir_cf_node_as_block(nir_cf_node_prev(cf_node)); 1717ec681f3Smrg inner_state.header_phis_changed = false; 1727ec681f3Smrg 1737ec681f3Smrg /* Fast path if the loop has no continues: we can remove instructions 1747ec681f3Smrg * as we mark the others live. 1757ec681f3Smrg */ 1767ec681f3Smrg struct set *predecessors = nir_loop_first_block(loop)->predecessors; 1777ec681f3Smrg if (predecessors->entries == 1 && 1787ec681f3Smrg _mesa_set_next_entry(predecessors, NULL)->key == inner_state.preheader) { 1797ec681f3Smrg progress |= dce_cf_list(&loop->body, defs_live, parent_loop); 1807ec681f3Smrg break; 1817ec681f3Smrg } 18201e04c3fSmrg 1837ec681f3Smrg /* Mark instructions as live until there is no more progress. */ 1847ec681f3Smrg do { 1857ec681f3Smrg /* dce_cf_list() resets inner_state.header_phis_changed itself, so 1867ec681f3Smrg * it doesn't have to be done here. 1877ec681f3Smrg */ 1887ec681f3Smrg dce_cf_list(&loop->body, defs_live, &inner_state); 1897ec681f3Smrg } while (inner_state.header_phis_changed); 1907ec681f3Smrg 1917ec681f3Smrg /* We don't know how many times mark_cf_list() will repeat, so 1927ec681f3Smrg * remove instructions separately. 1937ec681f3Smrg * 1947ec681f3Smrg * By checking parent_loop->preheader, we ensure that we only do this 1957ec681f3Smrg * walk for the outer-most loops so it only happens once. 1967ec681f3Smrg */ 1977ec681f3Smrg if (!parent_loop->preheader) { 1987ec681f3Smrg nir_foreach_block_in_cf_node(block, cf_node) { 1997ec681f3Smrg nir_foreach_instr_safe(instr, block) { 2007ec681f3Smrg if (!instr->pass_flags) { 2017ec681f3Smrg nir_instr_remove(instr); 2027ec681f3Smrg progress = true; 2037ec681f3Smrg } 2047ec681f3Smrg } 2057ec681f3Smrg } 2067ec681f3Smrg } 2077ec681f3Smrg break; 2087ec681f3Smrg } 2097ec681f3Smrg case nir_cf_node_function: 2107ec681f3Smrg unreachable("Invalid cf type"); 2117ec681f3Smrg } 21201e04c3fSmrg } 21301e04c3fSmrg 2147ec681f3Smrg return progress; 2157ec681f3Smrg} 2167ec681f3Smrg 2177ec681f3Smrgstatic bool 2187ec681f3Smrgnir_opt_dce_impl(nir_function_impl *impl) 2197ec681f3Smrg{ 2207ec681f3Smrg assert(impl->structured); 22101e04c3fSmrg 2227ec681f3Smrg BITSET_WORD *defs_live = rzalloc_array(NULL, BITSET_WORD, 2237ec681f3Smrg BITSET_WORDS(impl->ssa_alloc)); 22401e04c3fSmrg 2257ec681f3Smrg struct loop_state loop; 2267ec681f3Smrg loop.preheader = NULL; 2277ec681f3Smrg bool progress = dce_cf_list(&impl->body, defs_live, &loop); 22801e04c3fSmrg 2297ec681f3Smrg ralloc_free(defs_live); 23001e04c3fSmrg 2317e102996Smaya if (progress) { 23201e04c3fSmrg nir_metadata_preserve(impl, nir_metadata_block_index | 23301e04c3fSmrg nir_metadata_dominance); 2347e102996Smaya } else { 2357ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 2367e102996Smaya } 23701e04c3fSmrg 23801e04c3fSmrg return progress; 23901e04c3fSmrg} 24001e04c3fSmrg 24101e04c3fSmrgbool 24201e04c3fSmrgnir_opt_dce(nir_shader *shader) 24301e04c3fSmrg{ 24401e04c3fSmrg bool progress = false; 24501e04c3fSmrg nir_foreach_function(function, shader) { 24601e04c3fSmrg if (function->impl && nir_opt_dce_impl(function->impl)) 24701e04c3fSmrg progress = true; 24801e04c3fSmrg } 24901e04c3fSmrg 25001e04c3fSmrg return progress; 25101e04c3fSmrg} 252