1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2015 Broadcom 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "nir.h" 25b8e80941Smrg#include "nir_builder.h" 26b8e80941Smrg 27b8e80941Smrg/** @file nir_opt_undef.c 28b8e80941Smrg * 29b8e80941Smrg * Handles optimization of operations involving ssa_undef. 30b8e80941Smrg */ 31b8e80941Smrg 32b8e80941Smrg/** 33b8e80941Smrg * Turn conditional selects between an undef and some other value into a move 34b8e80941Smrg * of that other value (on the assumption that the condition's going to be 35b8e80941Smrg * choosing the defined value). This reduces work after if flattening when 36b8e80941Smrg * each side of the if is defining a variable. 37b8e80941Smrg */ 38b8e80941Smrgstatic bool 39b8e80941Smrgopt_undef_csel(nir_alu_instr *instr) 40b8e80941Smrg{ 41b8e80941Smrg if (instr->op != nir_op_bcsel && instr->op != nir_op_fcsel) 42b8e80941Smrg return false; 43b8e80941Smrg 44b8e80941Smrg assert(instr->dest.dest.is_ssa); 45b8e80941Smrg 46b8e80941Smrg for (int i = 1; i <= 2; i++) { 47b8e80941Smrg if (!instr->src[i].src.is_ssa) 48b8e80941Smrg continue; 49b8e80941Smrg 50b8e80941Smrg nir_instr *parent = instr->src[i].src.ssa->parent_instr; 51b8e80941Smrg if (parent->type != nir_instr_type_ssa_undef) 52b8e80941Smrg continue; 53b8e80941Smrg 54b8e80941Smrg /* We can't just use nir_alu_src_copy, because we need the def/use 55b8e80941Smrg * updated. 56b8e80941Smrg */ 57b8e80941Smrg nir_instr_rewrite_src(&instr->instr, &instr->src[0].src, 58b8e80941Smrg instr->src[i == 1 ? 2 : 1].src); 59b8e80941Smrg nir_alu_src_copy(&instr->src[0], &instr->src[i == 1 ? 2 : 1], 60b8e80941Smrg ralloc_parent(instr)); 61b8e80941Smrg 62b8e80941Smrg nir_src empty_src; 63b8e80941Smrg memset(&empty_src, 0, sizeof(empty_src)); 64b8e80941Smrg nir_instr_rewrite_src(&instr->instr, &instr->src[1].src, empty_src); 65b8e80941Smrg nir_instr_rewrite_src(&instr->instr, &instr->src[2].src, empty_src); 66b8e80941Smrg instr->op = nir_op_imov; 67b8e80941Smrg 68b8e80941Smrg return true; 69b8e80941Smrg } 70b8e80941Smrg 71b8e80941Smrg return false; 72b8e80941Smrg} 73b8e80941Smrg 74b8e80941Smrg/** 75b8e80941Smrg * Replace vecN(undef, undef, ...) with a single undef. 76b8e80941Smrg */ 77b8e80941Smrgstatic bool 78b8e80941Smrgopt_undef_vecN(nir_builder *b, nir_alu_instr *alu) 79b8e80941Smrg{ 80b8e80941Smrg if (alu->op != nir_op_vec2 && 81b8e80941Smrg alu->op != nir_op_vec3 && 82b8e80941Smrg alu->op != nir_op_vec4 && 83b8e80941Smrg alu->op != nir_op_fmov && 84b8e80941Smrg alu->op != nir_op_imov) 85b8e80941Smrg return false; 86b8e80941Smrg 87b8e80941Smrg assert(alu->dest.dest.is_ssa); 88b8e80941Smrg 89b8e80941Smrg for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 90b8e80941Smrg if (!alu->src[i].src.is_ssa || 91b8e80941Smrg alu->src[i].src.ssa->parent_instr->type != nir_instr_type_ssa_undef) 92b8e80941Smrg return false; 93b8e80941Smrg } 94b8e80941Smrg 95b8e80941Smrg b->cursor = nir_before_instr(&alu->instr); 96b8e80941Smrg nir_ssa_def *undef = nir_ssa_undef(b, alu->dest.dest.ssa.num_components, 97b8e80941Smrg nir_dest_bit_size(alu->dest.dest)); 98b8e80941Smrg nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(undef)); 99b8e80941Smrg 100b8e80941Smrg return true; 101b8e80941Smrg} 102b8e80941Smrg 103b8e80941Smrg/** 104b8e80941Smrg * Remove any store intrinsics whose value is undefined (the existing 105b8e80941Smrg * value is a fine representation of "undefined"). 106b8e80941Smrg */ 107b8e80941Smrgstatic bool 108b8e80941Smrgopt_undef_store(nir_intrinsic_instr *intrin) 109b8e80941Smrg{ 110b8e80941Smrg int arg_index; 111b8e80941Smrg switch (intrin->intrinsic) { 112b8e80941Smrg case nir_intrinsic_store_deref: 113b8e80941Smrg arg_index = 1; 114b8e80941Smrg break; 115b8e80941Smrg case nir_intrinsic_store_output: 116b8e80941Smrg case nir_intrinsic_store_per_vertex_output: 117b8e80941Smrg case nir_intrinsic_store_ssbo: 118b8e80941Smrg case nir_intrinsic_store_shared: 119b8e80941Smrg arg_index = 0; 120b8e80941Smrg break; 121b8e80941Smrg default: 122b8e80941Smrg return false; 123b8e80941Smrg } 124b8e80941Smrg 125b8e80941Smrg if (!intrin->src[arg_index].is_ssa || 126b8e80941Smrg intrin->src[arg_index].ssa->parent_instr->type != nir_instr_type_ssa_undef) 127b8e80941Smrg return false; 128b8e80941Smrg 129b8e80941Smrg nir_instr_remove(&intrin->instr); 130b8e80941Smrg 131b8e80941Smrg return true; 132b8e80941Smrg} 133b8e80941Smrg 134b8e80941Smrgbool 135b8e80941Smrgnir_opt_undef(nir_shader *shader) 136b8e80941Smrg{ 137b8e80941Smrg nir_builder b; 138b8e80941Smrg bool progress = false; 139b8e80941Smrg 140b8e80941Smrg nir_foreach_function(function, shader) { 141b8e80941Smrg if (function->impl) { 142b8e80941Smrg nir_builder_init(&b, function->impl); 143b8e80941Smrg nir_foreach_block(block, function->impl) { 144b8e80941Smrg nir_foreach_instr_safe(instr, block) { 145b8e80941Smrg if (instr->type == nir_instr_type_alu) { 146b8e80941Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 147b8e80941Smrg 148b8e80941Smrg progress = opt_undef_csel(alu) || progress; 149b8e80941Smrg progress = opt_undef_vecN(&b, alu) || progress; 150b8e80941Smrg } else if (instr->type == nir_instr_type_intrinsic) { 151b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 152b8e80941Smrg progress = opt_undef_store(intrin) || progress; 153b8e80941Smrg } 154b8e80941Smrg } 155b8e80941Smrg } 156b8e80941Smrg 157b8e80941Smrg if (progress) { 158b8e80941Smrg nir_metadata_preserve(function->impl, 159b8e80941Smrg nir_metadata_block_index | 160b8e80941Smrg nir_metadata_dominance); 161b8e80941Smrg } else { 162b8e80941Smrg#ifndef NDEBUG 163b8e80941Smrg function->impl->valid_metadata &= ~nir_metadata_not_properly_reset; 164b8e80941Smrg#endif 165b8e80941Smrg } 166b8e80941Smrg } 167b8e80941Smrg } 168b8e80941Smrg 169b8e80941Smrg return progress; 170b8e80941Smrg} 171