1/* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "nir_deref.h" 27 28/** @file nir_lower_io_to_scalar.c 29 * 30 * Replaces nir_load_input/nir_store_output operations with num_components != 31 * 1 with individual per-channel operations. 32 */ 33 34static void 35lower_load_input_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) 36{ 37 b->cursor = nir_before_instr(&intr->instr); 38 39 assert(intr->dest.is_ssa); 40 41 nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS]; 42 43 for (unsigned i = 0; i < intr->num_components; i++) { 44 nir_intrinsic_instr *chan_intr = 45 nir_intrinsic_instr_create(b->shader, intr->intrinsic); 46 nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest, 47 1, intr->dest.ssa.bit_size, NULL); 48 chan_intr->num_components = 1; 49 50 nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr)); 51 nir_intrinsic_set_component(chan_intr, nir_intrinsic_component(intr) + i); 52 /* offset */ 53 nir_src_copy(&chan_intr->src[0], &intr->src[0], chan_intr); 54 55 nir_builder_instr_insert(b, &chan_intr->instr); 56 57 loads[i] = &chan_intr->dest.ssa; 58 } 59 60 nir_ssa_def_rewrite_uses(&intr->dest.ssa, 61 nir_src_for_ssa(nir_vec(b, loads, 62 intr->num_components))); 63 nir_instr_remove(&intr->instr); 64} 65 66static void 67lower_store_output_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) 68{ 69 b->cursor = nir_before_instr(&intr->instr); 70 71 nir_ssa_def *value = nir_ssa_for_src(b, intr->src[0], intr->num_components); 72 73 for (unsigned i = 0; i < intr->num_components; i++) { 74 if (!(nir_intrinsic_write_mask(intr) & (1 << i))) 75 continue; 76 77 nir_intrinsic_instr *chan_intr = 78 nir_intrinsic_instr_create(b->shader, intr->intrinsic); 79 chan_intr->num_components = 1; 80 81 nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr)); 82 nir_intrinsic_set_write_mask(chan_intr, 0x1); 83 nir_intrinsic_set_component(chan_intr, nir_intrinsic_component(intr) + i); 84 85 /* value */ 86 chan_intr->src[0] = nir_src_for_ssa(nir_channel(b, value, i)); 87 /* offset */ 88 nir_src_copy(&chan_intr->src[1], &intr->src[1], chan_intr); 89 90 nir_builder_instr_insert(b, &chan_intr->instr); 91 } 92 93 nir_instr_remove(&intr->instr); 94} 95 96void 97nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask) 98{ 99 nir_foreach_function(function, shader) { 100 if (function->impl) { 101 nir_builder b; 102 nir_builder_init(&b, function->impl); 103 104 nir_foreach_block(block, function->impl) { 105 nir_foreach_instr_safe(instr, block) { 106 if (instr->type != nir_instr_type_intrinsic) 107 continue; 108 109 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 110 111 if (intr->num_components == 1) 112 continue; 113 114 switch (intr->intrinsic) { 115 case nir_intrinsic_load_input: 116 if (mask & nir_var_shader_in) 117 lower_load_input_to_scalar(&b, intr); 118 break; 119 case nir_intrinsic_store_output: 120 if (mask & nir_var_shader_out) 121 lower_store_output_to_scalar(&b, intr); 122 break; 123 default: 124 break; 125 } 126 } 127 } 128 } 129 } 130} 131 132static nir_variable ** 133get_channel_variables(struct hash_table *ht, nir_variable *var) 134{ 135 nir_variable **chan_vars; 136 struct hash_entry *entry = _mesa_hash_table_search(ht, var); 137 if (!entry) { 138 chan_vars = (nir_variable **) calloc(4, sizeof(nir_variable *)); 139 _mesa_hash_table_insert(ht, var, chan_vars); 140 } else { 141 chan_vars = (nir_variable **) entry->data; 142 } 143 144 return chan_vars; 145} 146 147/* 148 * Note that the src deref that we are cloning is the head of the 149 * chain of deref instructions from the original intrinsic, but 150 * the dst we are cloning to is the tail (because chains of deref 151 * instructions are created back to front) 152 */ 153 154static nir_deref_instr * 155clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail, 156 const nir_deref_instr *src_head) 157{ 158 const nir_deref_instr *parent = nir_deref_instr_parent(src_head); 159 160 if (!parent) 161 return dst_tail; 162 163 assert(src_head->deref_type == nir_deref_type_array); 164 165 dst_tail = clone_deref_array(b, dst_tail, parent); 166 167 return nir_build_deref_array(b, dst_tail, 168 nir_ssa_for_src(b, src_head->arr.index, 1)); 169} 170 171static void 172lower_load_to_scalar_early(nir_builder *b, nir_intrinsic_instr *intr, 173 nir_variable *var, struct hash_table *split_inputs, 174 struct hash_table *split_outputs) 175{ 176 b->cursor = nir_before_instr(&intr->instr); 177 178 assert(intr->dest.is_ssa); 179 180 nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS]; 181 182 nir_variable **chan_vars; 183 if (var->data.mode == nir_var_shader_in) { 184 chan_vars = get_channel_variables(split_inputs, var); 185 } else { 186 chan_vars = get_channel_variables(split_outputs, var); 187 } 188 189 for (unsigned i = 0; i < intr->num_components; i++) { 190 nir_variable *chan_var = chan_vars[var->data.location_frac + i]; 191 if (!chan_vars[var->data.location_frac + i]) { 192 chan_var = nir_variable_clone(var, b->shader); 193 chan_var->data.location_frac = var->data.location_frac + i; 194 chan_var->type = glsl_channel_type(chan_var->type); 195 if (var->data.explicit_offset) { 196 unsigned comp_size = glsl_get_bit_size(chan_var->type) / 8; 197 chan_var->data.offset = var->data.offset + i * comp_size; 198 } 199 200 chan_vars[var->data.location_frac + i] = chan_var; 201 202 nir_shader_add_variable(b->shader, chan_var); 203 } 204 205 nir_intrinsic_instr *chan_intr = 206 nir_intrinsic_instr_create(b->shader, intr->intrinsic); 207 nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest, 208 1, intr->dest.ssa.bit_size, NULL); 209 chan_intr->num_components = 1; 210 211 nir_deref_instr *deref = nir_build_deref_var(b, chan_var); 212 213 deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0])); 214 215 chan_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa); 216 217 if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset || 218 intr->intrinsic == nir_intrinsic_interp_deref_at_sample) 219 nir_src_copy(&chan_intr->src[1], &intr->src[1], &chan_intr->instr); 220 221 nir_builder_instr_insert(b, &chan_intr->instr); 222 223 loads[i] = &chan_intr->dest.ssa; 224 } 225 226 nir_ssa_def_rewrite_uses(&intr->dest.ssa, 227 nir_src_for_ssa(nir_vec(b, loads, 228 intr->num_components))); 229 230 /* Remove the old load intrinsic */ 231 nir_instr_remove(&intr->instr); 232} 233 234static void 235lower_store_output_to_scalar_early(nir_builder *b, nir_intrinsic_instr *intr, 236 nir_variable *var, 237 struct hash_table *split_outputs) 238{ 239 b->cursor = nir_before_instr(&intr->instr); 240 241 nir_ssa_def *value = nir_ssa_for_src(b, intr->src[1], intr->num_components); 242 243 nir_variable **chan_vars = get_channel_variables(split_outputs, var); 244 for (unsigned i = 0; i < intr->num_components; i++) { 245 if (!(nir_intrinsic_write_mask(intr) & (1 << i))) 246 continue; 247 248 nir_variable *chan_var = chan_vars[var->data.location_frac + i]; 249 if (!chan_vars[var->data.location_frac + i]) { 250 chan_var = nir_variable_clone(var, b->shader); 251 chan_var->data.location_frac = var->data.location_frac + i; 252 chan_var->type = glsl_channel_type(chan_var->type); 253 if (var->data.explicit_offset) { 254 unsigned comp_size = glsl_get_bit_size(chan_var->type) / 8; 255 chan_var->data.offset = var->data.offset + i * comp_size; 256 } 257 258 chan_vars[var->data.location_frac + i] = chan_var; 259 260 nir_shader_add_variable(b->shader, chan_var); 261 } 262 263 nir_intrinsic_instr *chan_intr = 264 nir_intrinsic_instr_create(b->shader, intr->intrinsic); 265 chan_intr->num_components = 1; 266 267 nir_intrinsic_set_write_mask(chan_intr, 0x1); 268 269 nir_deref_instr *deref = nir_build_deref_var(b, chan_var); 270 271 deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0])); 272 273 chan_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa); 274 chan_intr->src[1] = nir_src_for_ssa(nir_channel(b, value, i)); 275 276 nir_builder_instr_insert(b, &chan_intr->instr); 277 } 278 279 /* Remove the old store intrinsic */ 280 nir_instr_remove(&intr->instr); 281} 282 283/* 284 * This function is intended to be called earlier than nir_lower_io_to_scalar() 285 * i.e. before nir_lower_io() is called. 286 */ 287void 288nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask) 289{ 290 struct hash_table *split_inputs = _mesa_pointer_hash_table_create(NULL); 291 struct hash_table *split_outputs = _mesa_pointer_hash_table_create(NULL); 292 293 nir_foreach_function(function, shader) { 294 if (function->impl) { 295 nir_builder b; 296 nir_builder_init(&b, function->impl); 297 298 nir_foreach_block(block, function->impl) { 299 nir_foreach_instr_safe(instr, block) { 300 if (instr->type != nir_instr_type_intrinsic) 301 continue; 302 303 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 304 305 if (intr->num_components == 1) 306 continue; 307 308 if (intr->intrinsic != nir_intrinsic_load_deref && 309 intr->intrinsic != nir_intrinsic_store_deref && 310 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid && 311 intr->intrinsic != nir_intrinsic_interp_deref_at_sample && 312 intr->intrinsic != nir_intrinsic_interp_deref_at_offset) 313 continue; 314 315 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 316 nir_variable_mode mode = deref->mode; 317 if (!(mode & mask)) 318 continue; 319 320 nir_variable *var = nir_deref_instr_get_variable(deref); 321 322 /* TODO: add patch support */ 323 if (var->data.patch) 324 continue; 325 326 /* TODO: add doubles support */ 327 if (glsl_type_is_64bit(glsl_without_array(var->type))) 328 continue; 329 330 if (!(shader->info.stage == MESA_SHADER_VERTEX && 331 mode == nir_var_shader_in) && 332 var->data.location < VARYING_SLOT_VAR0 && 333 var->data.location >= 0) 334 continue; 335 336 /* Don't bother splitting if we can't opt away any unused 337 * components. 338 */ 339 if (var->data.always_active_io) 340 continue; 341 342 /* Skip types we cannot split */ 343 if (glsl_type_is_matrix(glsl_without_array(var->type)) || 344 glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 345 continue; 346 347 switch (intr->intrinsic) { 348 case nir_intrinsic_interp_deref_at_centroid: 349 case nir_intrinsic_interp_deref_at_sample: 350 case nir_intrinsic_interp_deref_at_offset: 351 case nir_intrinsic_load_deref: 352 if ((mask & nir_var_shader_in && mode == nir_var_shader_in) || 353 (mask & nir_var_shader_out && mode == nir_var_shader_out)) 354 lower_load_to_scalar_early(&b, intr, var, split_inputs, 355 split_outputs); 356 break; 357 case nir_intrinsic_store_deref: 358 if (mask & nir_var_shader_out && 359 mode == nir_var_shader_out) 360 lower_store_output_to_scalar_early(&b, intr, var, 361 split_outputs); 362 break; 363 default: 364 break; 365 } 366 } 367 } 368 } 369 } 370 371 /* Remove old input from the shaders inputs list */ 372 hash_table_foreach(split_inputs, entry) { 373 nir_variable *var = (nir_variable *) entry->key; 374 exec_node_remove(&var->node); 375 376 free(entry->data); 377 } 378 379 /* Remove old output from the shaders outputs list */ 380 hash_table_foreach(split_outputs, entry) { 381 nir_variable *var = (nir_variable *) entry->key; 382 exec_node_remove(&var->node); 383 384 free(entry->data); 385 } 386 387 _mesa_hash_table_destroy(split_inputs, NULL); 388 _mesa_hash_table_destroy(split_outputs, NULL); 389 390 nir_remove_dead_derefs(shader); 391} 392