1/* 2 * Copyright © 2019 Google, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "ir3_nir.h" 25#include "ir3_compiler.h" 26#include "compiler/nir/nir.h" 27#include "compiler/nir/nir_builder.h" 28#include "mesa/main/macros.h" 29 30static inline struct ir3_ubo_range 31get_ubo_load_range(nir_intrinsic_instr *instr) 32{ 33 struct ir3_ubo_range r; 34 35 const int offset = nir_src_as_uint(instr->src[1]); 36 const int bytes = nir_intrinsic_dest_components(instr) * 4; 37 38 r.start = ROUND_DOWN_TO(offset, 16 * 4); 39 r.end = ALIGN(offset + bytes, 16 * 4); 40 41 return r; 42} 43 44static void 45gather_ubo_ranges(nir_intrinsic_instr *instr, 46 struct ir3_ubo_analysis_state *state) 47{ 48 if (!nir_src_is_const(instr->src[0])) 49 return; 50 51 if (!nir_src_is_const(instr->src[1])) 52 return; 53 54 const struct ir3_ubo_range r = get_ubo_load_range(instr); 55 const uint32_t block = nir_src_as_uint(instr->src[0]); 56 57 /* if UBO lowering is disabled, we still want to lower block 0 58 * (which is normal uniforms): 59 */ 60 if ((block > 0) && (ir3_shader_debug & IR3_DBG_NOUBOOPT)) 61 return; 62 63 if (r.start < state->range[block].start) 64 state->range[block].start = r.start; 65 if (state->range[block].end < r.end) 66 state->range[block].end = r.end; 67} 68 69static void 70lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b, 71 struct ir3_ubo_analysis_state *state) 72{ 73 /* We don't lower dynamic block index UBO loads to load_uniform, but we 74 * could probably with some effort determine a block stride in number of 75 * registers. 76 */ 77 if (!nir_src_is_const(instr->src[0])) 78 return; 79 80 const uint32_t block = nir_src_as_uint(instr->src[0]); 81 82 if (block > 0) { 83 /* We don't lower dynamic array indexing either, but we definitely should. 84 * We don't have a good way of determining the range of the dynamic 85 * access, so for now just fall back to pulling. 86 */ 87 if (!nir_src_is_const(instr->src[1])) 88 return; 89 90 /* After gathering the UBO access ranges, we limit the total 91 * upload. Reject if we're now outside the range. 92 */ 93 const struct ir3_ubo_range r = get_ubo_load_range(instr); 94 if (!(state->range[block].start <= r.start && 95 r.end <= state->range[block].end)) 96 return; 97 } 98 99 b->cursor = nir_before_instr(&instr->instr); 100 101 nir_ssa_def *ubo_offset = nir_ssa_for_src(b, instr->src[1], 1); 102 nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, -2); 103 if (new_offset) 104 ubo_offset = new_offset; 105 else 106 ubo_offset = nir_ushr(b, ubo_offset, nir_imm_int(b, 2)); 107 108 const int range_offset = 109 (state->range[block].offset - state->range[block].start) / 4; 110 nir_ssa_def *uniform_offset = 111 nir_iadd(b, ubo_offset, nir_imm_int(b, range_offset)); 112 113 nir_intrinsic_instr *uniform = 114 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); 115 uniform->num_components = instr->num_components; 116 uniform->src[0] = nir_src_for_ssa(uniform_offset); 117 nir_ssa_dest_init(&uniform->instr, &uniform->dest, 118 uniform->num_components, instr->dest.ssa.bit_size, 119 instr->dest.ssa.name); 120 nir_builder_instr_insert(b, &uniform->instr); 121 nir_ssa_def_rewrite_uses(&instr->dest.ssa, 122 nir_src_for_ssa(&uniform->dest.ssa)); 123 124 nir_instr_remove(&instr->instr); 125 126 state->lower_count++; 127} 128 129bool 130ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader) 131{ 132 struct ir3_ubo_analysis_state *state = &shader->ubo_state; 133 134 memset(state, 0, sizeof(*state)); 135 state->range[0].end = align(nir->num_uniforms * 16, 16 * 4); /* align to 4*vec4 */ 136 137 nir_foreach_function(function, nir) { 138 if (function->impl) { 139 nir_foreach_block(block, function->impl) { 140 nir_foreach_instr(instr, block) { 141 if (instr->type == nir_instr_type_intrinsic && 142 nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_ubo) 143 gather_ubo_ranges(nir_instr_as_intrinsic(instr), state); 144 } 145 } 146 } 147 } 148 149 /* For now, everything we upload is accessed statically and thus will be 150 * used by the shader. Once we can upload dynamically indexed data, we may 151 * upload sparsely accessed arrays, at which point we probably want to 152 * give priority to smaller UBOs, on the assumption that big UBOs will be 153 * accessed dynamically. Alternatively, we can track statically and 154 * dynamically accessed ranges separately and upload static rangtes 155 * first. 156 */ 157 const uint32_t max_upload = 16 * 1024; 158 uint32_t offset = 0; 159 for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) { 160 uint32_t range_size = state->range[i].end - state->range[i].start; 161 162 debug_assert(offset <= max_upload); 163 state->range[i].offset = offset; 164 if (offset + range_size > max_upload) { 165 range_size = max_upload - offset; 166 state->range[i].end = state->range[i].start + range_size; 167 } 168 offset += range_size; 169 } 170 state->size = offset; 171 172 nir_foreach_function(function, nir) { 173 if (function->impl) { 174 nir_builder builder; 175 nir_builder_init(&builder, function->impl); 176 nir_foreach_block(block, function->impl) { 177 nir_foreach_instr_safe(instr, block) { 178 if (instr->type == nir_instr_type_intrinsic && 179 nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_ubo) 180 lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr), &builder, state); 181 } 182 } 183 184 nir_metadata_preserve(function->impl, nir_metadata_block_index | 185 nir_metadata_dominance); 186 } 187 } 188 189 return state->lower_count > 0; 190} 191