101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015 Broadcom 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "nir.h" 2501e04c3fSmrg#include "nir_builder.h" 2601e04c3fSmrg 2701e04c3fSmrg/** @file nir_opt_undef.c 2801e04c3fSmrg * 2901e04c3fSmrg * Handles optimization of operations involving ssa_undef. 3001e04c3fSmrg */ 3101e04c3fSmrg 3201e04c3fSmrg/** 3301e04c3fSmrg * Turn conditional selects between an undef and some other value into a move 3401e04c3fSmrg * of that other value (on the assumption that the condition's going to be 3501e04c3fSmrg * choosing the defined value). This reduces work after if flattening when 3601e04c3fSmrg * each side of the if is defining a variable. 3701e04c3fSmrg */ 3801e04c3fSmrgstatic bool 3901e04c3fSmrgopt_undef_csel(nir_alu_instr *instr) 4001e04c3fSmrg{ 4101e04c3fSmrg if (instr->op != nir_op_bcsel && instr->op != nir_op_fcsel) 4201e04c3fSmrg return false; 4301e04c3fSmrg 4401e04c3fSmrg assert(instr->dest.dest.is_ssa); 4501e04c3fSmrg 4601e04c3fSmrg for (int i = 1; i <= 2; i++) { 4701e04c3fSmrg if (!instr->src[i].src.is_ssa) 4801e04c3fSmrg continue; 4901e04c3fSmrg 5001e04c3fSmrg nir_instr *parent = instr->src[i].src.ssa->parent_instr; 5101e04c3fSmrg if (parent->type != nir_instr_type_ssa_undef) 5201e04c3fSmrg continue; 5301e04c3fSmrg 5401e04c3fSmrg /* We can't just use nir_alu_src_copy, because we need the def/use 5501e04c3fSmrg * updated. 5601e04c3fSmrg */ 5701e04c3fSmrg nir_instr_rewrite_src(&instr->instr, &instr->src[0].src, 5801e04c3fSmrg instr->src[i == 1 ? 2 : 1].src); 597ec681f3Smrg nir_alu_src_copy(&instr->src[0], &instr->src[i == 1 ? 2 : 1]); 6001e04c3fSmrg 6101e04c3fSmrg nir_src empty_src; 6201e04c3fSmrg memset(&empty_src, 0, sizeof(empty_src)); 6301e04c3fSmrg nir_instr_rewrite_src(&instr->instr, &instr->src[1].src, empty_src); 6401e04c3fSmrg nir_instr_rewrite_src(&instr->instr, &instr->src[2].src, empty_src); 657ec681f3Smrg instr->op = nir_op_mov; 6601e04c3fSmrg 6701e04c3fSmrg return true; 6801e04c3fSmrg } 6901e04c3fSmrg 7001e04c3fSmrg return false; 7101e04c3fSmrg} 7201e04c3fSmrg 7301e04c3fSmrg/** 7401e04c3fSmrg * Replace vecN(undef, undef, ...) with a single undef. 7501e04c3fSmrg */ 7601e04c3fSmrgstatic bool 7701e04c3fSmrgopt_undef_vecN(nir_builder *b, nir_alu_instr *alu) 7801e04c3fSmrg{ 797ec681f3Smrg if (!nir_op_is_vec(alu->op)) 8001e04c3fSmrg return false; 8101e04c3fSmrg 8201e04c3fSmrg assert(alu->dest.dest.is_ssa); 8301e04c3fSmrg 8401e04c3fSmrg for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 8501e04c3fSmrg if (!alu->src[i].src.is_ssa || 8601e04c3fSmrg alu->src[i].src.ssa->parent_instr->type != nir_instr_type_ssa_undef) 8701e04c3fSmrg return false; 8801e04c3fSmrg } 8901e04c3fSmrg 9001e04c3fSmrg b->cursor = nir_before_instr(&alu->instr); 9101e04c3fSmrg nir_ssa_def *undef = nir_ssa_undef(b, alu->dest.dest.ssa.num_components, 9201e04c3fSmrg nir_dest_bit_size(alu->dest.dest)); 937ec681f3Smrg nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, undef); 9401e04c3fSmrg 9501e04c3fSmrg return true; 9601e04c3fSmrg} 9701e04c3fSmrg 987ec681f3Smrgstatic uint32_t 997ec681f3Smrgnir_get_undef_mask(nir_ssa_def *def) 1007ec681f3Smrg{ 1017ec681f3Smrg nir_instr *instr = def->parent_instr; 1027ec681f3Smrg 1037ec681f3Smrg if (instr->type == nir_instr_type_ssa_undef) 1047ec681f3Smrg return BITSET_MASK(def->num_components); 1057ec681f3Smrg 1067ec681f3Smrg if (instr->type != nir_instr_type_alu) 1077ec681f3Smrg return 0; 1087ec681f3Smrg 1097ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 1107ec681f3Smrg unsigned undef = 0; 1117ec681f3Smrg 1127ec681f3Smrg if (nir_op_is_vec(alu->op)) { 1137ec681f3Smrg for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 1147ec681f3Smrg if (alu->src[i].src.is_ssa && 1157ec681f3Smrg alu->src[i].src.ssa->parent_instr->type == 1167ec681f3Smrg nir_instr_type_ssa_undef) { 1177ec681f3Smrg undef |= BITSET_MASK(nir_ssa_alu_instr_src_components(alu, i)) << i; 1187ec681f3Smrg } 1197ec681f3Smrg } 1207ec681f3Smrg } 1217ec681f3Smrg 1227ec681f3Smrg return undef; 1237ec681f3Smrg} 1247ec681f3Smrg 12501e04c3fSmrg/** 1267ec681f3Smrg * Remove any store intrinsic writemask channels whose value is undefined (the 1277ec681f3Smrg * existing value is a fine representation of "undefined"). 12801e04c3fSmrg */ 12901e04c3fSmrgstatic bool 13001e04c3fSmrgopt_undef_store(nir_intrinsic_instr *intrin) 13101e04c3fSmrg{ 13201e04c3fSmrg int arg_index; 13301e04c3fSmrg switch (intrin->intrinsic) { 13401e04c3fSmrg case nir_intrinsic_store_deref: 13501e04c3fSmrg arg_index = 1; 13601e04c3fSmrg break; 13701e04c3fSmrg case nir_intrinsic_store_output: 13801e04c3fSmrg case nir_intrinsic_store_per_vertex_output: 1397ec681f3Smrg case nir_intrinsic_store_per_primitive_output: 14001e04c3fSmrg case nir_intrinsic_store_ssbo: 14101e04c3fSmrg case nir_intrinsic_store_shared: 1427ec681f3Smrg case nir_intrinsic_store_global: 1437ec681f3Smrg case nir_intrinsic_store_scratch: 14401e04c3fSmrg arg_index = 0; 14501e04c3fSmrg break; 14601e04c3fSmrg default: 14701e04c3fSmrg return false; 14801e04c3fSmrg } 14901e04c3fSmrg 1507ec681f3Smrg if (!intrin->src[arg_index].is_ssa) 1517ec681f3Smrg return false; 1527ec681f3Smrg 1537ec681f3Smrg nir_ssa_def *def = intrin->src[arg_index].ssa; 1547ec681f3Smrg 1557ec681f3Smrg unsigned write_mask = nir_intrinsic_write_mask(intrin); 1567ec681f3Smrg unsigned undef_mask = nir_get_undef_mask(def); 1577ec681f3Smrg 1587ec681f3Smrg if (!(write_mask & undef_mask)) 15901e04c3fSmrg return false; 16001e04c3fSmrg 1617ec681f3Smrg write_mask &= ~undef_mask; 1627ec681f3Smrg if (!write_mask) 1637ec681f3Smrg nir_instr_remove(&intrin->instr); 1647ec681f3Smrg else 1657ec681f3Smrg nir_intrinsic_set_write_mask(intrin, write_mask); 16601e04c3fSmrg 16701e04c3fSmrg return true; 16801e04c3fSmrg} 16901e04c3fSmrg 1707ec681f3Smrgstatic bool 1717ec681f3Smrgnir_opt_undef_instr(nir_builder *b, nir_instr *instr, void *data) 17201e04c3fSmrg{ 1737ec681f3Smrg if (instr->type == nir_instr_type_alu) { 1747ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 1757ec681f3Smrg return opt_undef_csel(alu) || opt_undef_vecN(b, alu); 1767ec681f3Smrg } else if (instr->type == nir_instr_type_intrinsic) { 1777ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1787ec681f3Smrg return opt_undef_store(intrin); 17901e04c3fSmrg } 18001e04c3fSmrg 1817ec681f3Smrg return false; 1827ec681f3Smrg} 1837ec681f3Smrg 1847ec681f3Smrgbool 1857ec681f3Smrgnir_opt_undef(nir_shader *shader) 1867ec681f3Smrg{ 1877ec681f3Smrg return nir_shader_instructions_pass(shader, 1887ec681f3Smrg nir_opt_undef_instr, 1897ec681f3Smrg nir_metadata_block_index | 1907ec681f3Smrg nir_metadata_dominance, 1917ec681f3Smrg NULL); 19201e04c3fSmrg} 193