nir_inline_functions.c revision b8e80941
1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "nir_control_flow.h" 27#include "nir_vla.h" 28 29void nir_inline_function_impl(struct nir_builder *b, 30 const nir_function_impl *impl, 31 nir_ssa_def **params) 32{ 33 nir_function_impl *copy = nir_function_impl_clone(b->shader, impl); 34 35 /* Insert a nop at the cursor so we can keep track of where things are as 36 * we add/remove stuff from the CFG. 37 */ 38 nir_intrinsic_instr *nop = 39 nir_intrinsic_instr_create(b->shader, nir_intrinsic_nop); 40 nir_builder_instr_insert(b, &nop->instr); 41 42 exec_list_append(&b->impl->locals, ©->locals); 43 exec_list_append(&b->impl->registers, ©->registers); 44 45 nir_foreach_block(block, copy) { 46 nir_foreach_instr_safe(instr, block) { 47 /* Returns have to be lowered for this to work */ 48 assert(instr->type != nir_instr_type_jump || 49 nir_instr_as_jump(instr)->type != nir_jump_return); 50 51 if (instr->type != nir_instr_type_intrinsic) 52 continue; 53 54 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr); 55 if (load->intrinsic != nir_intrinsic_load_param) 56 continue; 57 58 unsigned param_idx = nir_intrinsic_param_idx(load); 59 assert(param_idx < impl->function->num_params); 60 assert(load->dest.is_ssa); 61 nir_ssa_def_rewrite_uses(&load->dest.ssa, 62 nir_src_for_ssa(params[param_idx])); 63 64 /* Remove any left-over load_param intrinsics because they're soon 65 * to be in another function and therefore no longer valid. 66 */ 67 nir_instr_remove(&load->instr); 68 } 69 } 70 71 /* Pluck the body out of the function and place it here */ 72 nir_cf_list body; 73 nir_cf_list_extract(&body, ©->body); 74 nir_cf_reinsert(&body, nir_before_instr(&nop->instr)); 75 76 b->cursor = nir_instr_remove(&nop->instr); 77} 78 79static bool inline_function_impl(nir_function_impl *impl, struct set *inlined); 80 81static bool 82inline_functions_block(nir_block *block, nir_builder *b, 83 struct set *inlined) 84{ 85 bool progress = false; 86 /* This is tricky. We're iterating over instructions in a block but, as 87 * we go, the block and its instruction list are being split into 88 * pieces. However, this *should* be safe since foreach_safe always 89 * stashes the next thing in the iteration. That next thing will 90 * properly get moved to the next block when it gets split, and we 91 * continue iterating there. 92 */ 93 nir_foreach_instr_safe(instr, block) { 94 if (instr->type != nir_instr_type_call) 95 continue; 96 97 progress = true; 98 99 nir_call_instr *call = nir_instr_as_call(instr); 100 assert(call->callee->impl); 101 102 /* Make sure that the function we're calling is already inlined */ 103 inline_function_impl(call->callee->impl, inlined); 104 105 b->cursor = nir_instr_remove(&call->instr); 106 107 /* Rewrite all of the uses of the callee's parameters to use the call 108 * instructions sources. In order to ensure that the "load" happens 109 * here and not later (for register sources), we make sure to convert it 110 * to an SSA value first. 111 */ 112 const unsigned num_params = call->num_params; 113 NIR_VLA(nir_ssa_def *, params, num_params); 114 for (unsigned i = 0; i < num_params; i++) { 115 params[i] = nir_ssa_for_src(b, call->params[i], 116 call->callee->params[i].num_components); 117 } 118 119 nir_inline_function_impl(b, call->callee->impl, params); 120 } 121 122 return progress; 123} 124 125static bool 126inline_function_impl(nir_function_impl *impl, struct set *inlined) 127{ 128 if (_mesa_set_search(inlined, impl)) 129 return false; /* Already inlined */ 130 131 nir_builder b; 132 nir_builder_init(&b, impl); 133 134 bool progress = false; 135 nir_foreach_block_safe(block, impl) { 136 progress |= inline_functions_block(block, &b, inlined); 137 } 138 139 if (progress) { 140 /* SSA and register indices are completely messed up now */ 141 nir_index_ssa_defs(impl); 142 nir_index_local_regs(impl); 143 144 nir_metadata_preserve(impl, nir_metadata_none); 145 } else { 146#ifndef NDEBUG 147 impl->valid_metadata &= ~nir_metadata_not_properly_reset; 148#endif 149 } 150 151 _mesa_set_add(inlined, impl); 152 153 return progress; 154} 155 156/** A pass to inline all functions in a shader into their callers 157 * 158 * For most use-cases, function inlining is a multi-step process. The general 159 * pattern employed by SPIR-V consumers and others is as follows: 160 * 161 * 1. nir_lower_constant_initializers(shader, nir_var_function_temp) 162 * 163 * This is needed because local variables from the callee are simply added 164 * to the locals list for the caller and the information about where the 165 * constant initializer logically happens is lost. If the callee is 166 * called in a loop, this can cause the variable to go from being 167 * initialized once per loop iteration to being initialized once at the 168 * top of the caller and values to persist from one invocation of the 169 * callee to the next. The simple solution to this problem is to get rid 170 * of constant initializers before function inlining. 171 * 172 * 2. nir_lower_returns(shader) 173 * 174 * nir_inline_functions assumes that all functions end "naturally" by 175 * execution reaching the end of the function without any return 176 * instructions causing instant jumps to the end. Thanks to NIR being 177 * structured, we can't represent arbitrary jumps to various points in the 178 * program which is what an early return in the callee would have to turn 179 * into when we inline it into the caller. Instead, we require returns to 180 * be lowered which lets us just copy+paste the callee directly into the 181 * caller. 182 * 183 * 3. nir_inline_functions(shader) 184 * 185 * This does the actual function inlining and the resulting shader will 186 * contain no call instructions. 187 * 188 * 4. nir_opt_deref(shader) 189 * 190 * Most functions contain pointer parameters where the result of a deref 191 * instruction is passed in as a parameter, loaded via a load_param 192 * intrinsic, and then turned back into a deref via a cast. Function 193 * inlining will get rid of the load_param but we are still left with a 194 * cast. Running nir_opt_deref gets rid of the intermediate cast and 195 * results in a whole deref chain again. This is currently required by a 196 * number of optimizations and lowering passes at least for certain 197 * variable modes. 198 * 199 * 5. Loop over the functions and delete all but the main entrypoint. 200 * 201 * In the Intel Vulkan driver this looks like this: 202 * 203 * foreach_list_typed_safe(nir_function, func, node, &nir->functions) { 204 * if (func != entry_point) 205 * exec_node_remove(&func->node); 206 * } 207 * assert(exec_list_length(&nir->functions) == 1); 208 * 209 * While nir_inline_functions does get rid of all call instructions, it 210 * doesn't get rid of any functions because it doesn't know what the "root 211 * function" is. Instead, it's up to the individual driver to know how to 212 * decide on a root function and delete the rest. With SPIR-V, 213 * spirv_to_nir returns the root function and so we can just use == whereas 214 * with GL, you may have to look for a function named "main". 215 * 216 * 6. nir_lower_constant_initializers(shader, ~nir_var_function_temp) 217 * 218 * Lowering constant initializers on inputs, outputs, global variables, 219 * etc. requires that we know the main entrypoint so that we know where to 220 * initialize them. Otherwise, we would have to assume that anything 221 * could be a main entrypoint and initialize them at the start of every 222 * function but that would clearly be wrong if any of those functions were 223 * ever called within another function. Simply requiring a single- 224 * entrypoint function shader is the best way to make it well-defined. 225 */ 226bool 227nir_inline_functions(nir_shader *shader) 228{ 229 struct set *inlined = _mesa_pointer_set_create(NULL); 230 bool progress = false; 231 232 nir_foreach_function(function, shader) { 233 if (function->impl) 234 progress = inline_function_impl(function->impl, inlined) || progress; 235 } 236 237 _mesa_set_destroy(inlined, NULL); 238 239 return progress; 240} 241