1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2016 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Jason Ekstrand (jason@jlekstrand.net) 25b8e80941Smrg * 26b8e80941Smrg */ 27b8e80941Smrg 28b8e80941Smrg/* 29b8e80941Smrg * This lowering pass converts references to variables with loads/stores to 30b8e80941Smrg * scratch space based on a few configurable parameters. 31b8e80941Smrg */ 32b8e80941Smrg 33b8e80941Smrg#include "nir.h" 34b8e80941Smrg#include "nir_builder.h" 35b8e80941Smrg#include "nir_deref.h" 36b8e80941Smrg 37b8e80941Smrgstatic bool 38b8e80941Smrgderef_has_indirect(nir_deref_instr *deref) 39b8e80941Smrg{ 40b8e80941Smrg while (deref->deref_type != nir_deref_type_var) { 41b8e80941Smrg if (deref->deref_type == nir_deref_type_array && 42b8e80941Smrg nir_src_as_const_value(deref->arr.index) == NULL) 43b8e80941Smrg return true; 44b8e80941Smrg 45b8e80941Smrg deref = nir_deref_instr_parent(deref); 46b8e80941Smrg } 47b8e80941Smrg 48b8e80941Smrg return false; 49b8e80941Smrg} 50b8e80941Smrg 51b8e80941Smrgstatic void 52b8e80941Smrglower_load_store(nir_builder *b, 53b8e80941Smrg nir_intrinsic_instr *intrin, 54b8e80941Smrg glsl_type_size_align_func size_align) 55b8e80941Smrg{ 56b8e80941Smrg b->cursor = nir_before_instr(&intrin->instr); 57b8e80941Smrg 58b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 59b8e80941Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 60b8e80941Smrg 61b8e80941Smrg nir_ssa_def *offset = 62b8e80941Smrg nir_iadd_imm(b, nir_build_deref_offset(b, deref, size_align), 63b8e80941Smrg var->data.location); 64b8e80941Smrg 65b8e80941Smrg unsigned align, UNUSED size; 66b8e80941Smrg size_align(deref->type, &size, &align); 67b8e80941Smrg 68b8e80941Smrg if (intrin->intrinsic == nir_intrinsic_load_deref) { 69b8e80941Smrg nir_intrinsic_instr *load = 70b8e80941Smrg nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_scratch); 71b8e80941Smrg load->num_components = intrin->num_components; 72b8e80941Smrg load->src[0] = nir_src_for_ssa(offset); 73b8e80941Smrg nir_intrinsic_set_align(load, align, 0); 74b8e80941Smrg nir_ssa_dest_init(&load->instr, &load->dest, 75b8e80941Smrg intrin->dest.ssa.num_components, 76b8e80941Smrg intrin->dest.ssa.bit_size, NULL); 77b8e80941Smrg nir_builder_instr_insert(b, &load->instr); 78b8e80941Smrg 79b8e80941Smrg nir_ssa_def *value = &load->dest.ssa; 80b8e80941Smrg if (glsl_type_is_boolean(deref->type)) 81b8e80941Smrg value = nir_b2i32(b, value); 82b8e80941Smrg 83b8e80941Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 84b8e80941Smrg nir_src_for_ssa(&load->dest.ssa)); 85b8e80941Smrg } else { 86b8e80941Smrg assert(intrin->intrinsic == nir_intrinsic_store_deref); 87b8e80941Smrg 88b8e80941Smrg assert(intrin->src[1].is_ssa); 89b8e80941Smrg nir_ssa_def *value = intrin->src[1].ssa; 90b8e80941Smrg if (glsl_type_is_boolean(deref->type)) 91b8e80941Smrg value = nir_i2b(b, value); 92b8e80941Smrg 93b8e80941Smrg nir_intrinsic_instr *store = 94b8e80941Smrg nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_scratch); 95b8e80941Smrg store->num_components = intrin->num_components; 96b8e80941Smrg store->src[0] = nir_src_for_ssa(value); 97b8e80941Smrg store->src[1] = nir_src_for_ssa(offset); 98b8e80941Smrg nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); 99b8e80941Smrg nir_intrinsic_set_align(store, align, 0); 100b8e80941Smrg nir_builder_instr_insert(b, &store->instr); 101b8e80941Smrg } 102b8e80941Smrg 103b8e80941Smrg nir_instr_remove(&intrin->instr); 104b8e80941Smrg nir_deref_instr_remove_if_unused(deref); 105b8e80941Smrg} 106b8e80941Smrg 107b8e80941Smrgbool 108b8e80941Smrgnir_lower_vars_to_scratch(nir_shader *shader, 109b8e80941Smrg nir_variable_mode modes, 110b8e80941Smrg int size_threshold, 111b8e80941Smrg glsl_type_size_align_func size_align) 112b8e80941Smrg{ 113b8e80941Smrg /* First, we walk the instructions and flag any variables we want to lower 114b8e80941Smrg * by removing them from their respective list and setting the mode to 0. 115b8e80941Smrg */ 116b8e80941Smrg nir_foreach_function(function, shader) { 117b8e80941Smrg nir_foreach_block(block, function->impl) { 118b8e80941Smrg nir_foreach_instr(instr, block) { 119b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 120b8e80941Smrg continue; 121b8e80941Smrg 122b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 123b8e80941Smrg if (intrin->intrinsic != nir_intrinsic_load_deref && 124b8e80941Smrg intrin->intrinsic != nir_intrinsic_store_deref) 125b8e80941Smrg continue; 126b8e80941Smrg 127b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 128b8e80941Smrg if (!(deref->mode & modes)) 129b8e80941Smrg continue; 130b8e80941Smrg 131b8e80941Smrg if (!deref_has_indirect(nir_src_as_deref(intrin->src[0]))) 132b8e80941Smrg continue; 133b8e80941Smrg 134b8e80941Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 135b8e80941Smrg 136b8e80941Smrg /* We set var->mode to 0 to indicate that a variable will be moved 137b8e80941Smrg * to scratch. Don't assign a scratch location twice. 138b8e80941Smrg */ 139b8e80941Smrg if (var->data.mode == 0) 140b8e80941Smrg continue; 141b8e80941Smrg 142b8e80941Smrg unsigned var_size, var_align; 143b8e80941Smrg size_align(var->type, &var_size, &var_align); 144b8e80941Smrg if (var_size <= size_threshold) 145b8e80941Smrg continue; 146b8e80941Smrg 147b8e80941Smrg /* Remove it from its list */ 148b8e80941Smrg exec_node_remove(&var->node); 149b8e80941Smrg /* Invalid mode used to flag "moving to scratch" */ 150b8e80941Smrg var->data.mode = 0; 151b8e80941Smrg 152b8e80941Smrg var->data.location = ALIGN_POT(shader->scratch_size, var_align); 153b8e80941Smrg shader->scratch_size = var->data.location + var_size; 154b8e80941Smrg } 155b8e80941Smrg } 156b8e80941Smrg } 157b8e80941Smrg 158b8e80941Smrg bool progress = false; 159b8e80941Smrg nir_foreach_function(function, shader) { 160b8e80941Smrg if (!function->impl) 161b8e80941Smrg continue; 162b8e80941Smrg 163b8e80941Smrg nir_builder build; 164b8e80941Smrg nir_builder_init(&build, function->impl); 165b8e80941Smrg 166b8e80941Smrg bool impl_progress = false; 167b8e80941Smrg nir_foreach_block(block, function->impl) { 168b8e80941Smrg nir_foreach_instr_safe(instr, block) { 169b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 170b8e80941Smrg continue; 171b8e80941Smrg 172b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 173b8e80941Smrg if (intrin->intrinsic != nir_intrinsic_load_deref && 174b8e80941Smrg intrin->intrinsic != nir_intrinsic_store_deref) 175b8e80941Smrg continue; 176b8e80941Smrg 177b8e80941Smrg nir_variable *var = nir_intrinsic_get_var(intrin, 0); 178b8e80941Smrg /* Variables flagged for lowering above have mode == 0 */ 179b8e80941Smrg if (!var || var->data.mode) 180b8e80941Smrg continue; 181b8e80941Smrg 182b8e80941Smrg lower_load_store(&build, intrin, size_align); 183b8e80941Smrg impl_progress = true; 184b8e80941Smrg } 185b8e80941Smrg } 186b8e80941Smrg 187b8e80941Smrg if (impl_progress) { 188b8e80941Smrg progress = true; 189b8e80941Smrg nir_metadata_preserve(function->impl, nir_metadata_block_index | 190b8e80941Smrg nir_metadata_dominance); 191b8e80941Smrg } 192b8e80941Smrg } 193b8e80941Smrg 194b8e80941Smrg return progress; 195b8e80941Smrg} 196