nir_lower_io.c revision 01e04c3f
1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * Jason Ekstrand (jason@jlekstrand.net) 26 * 27 */ 28 29/* 30 * This lowering pass converts references to input/output variables with 31 * loads/stores to actual input/output intrinsics. 32 */ 33 34#include "nir.h" 35#include "nir_builder.h" 36#include "nir_deref.h" 37 38struct lower_io_state { 39 void *dead_ctx; 40 nir_builder builder; 41 int (*type_size)(const struct glsl_type *type); 42 nir_variable_mode modes; 43 nir_lower_io_options options; 44}; 45 46void 47nir_assign_var_locations(struct exec_list *var_list, unsigned *size, 48 int (*type_size)(const struct glsl_type *)) 49{ 50 unsigned location = 0; 51 52 nir_foreach_variable(var, var_list) { 53 /* 54 * UBOs have their own address spaces, so don't count them towards the 55 * number of global uniforms 56 */ 57 if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) && 58 var->interface_type != NULL) 59 continue; 60 61 var->data.driver_location = location; 62 location += type_size(var->type); 63 } 64 65 *size = location; 66} 67 68/** 69 * Return true if the given variable is a per-vertex input/output array. 70 * (such as geometry shader inputs). 71 */ 72bool 73nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage) 74{ 75 if (var->data.patch || !glsl_type_is_array(var->type)) 76 return false; 77 78 if (var->data.mode == nir_var_shader_in) 79 return stage == MESA_SHADER_GEOMETRY || 80 stage == MESA_SHADER_TESS_CTRL || 81 stage == MESA_SHADER_TESS_EVAL; 82 83 if (var->data.mode == nir_var_shader_out) 84 return stage == MESA_SHADER_TESS_CTRL; 85 86 return false; 87} 88 89static nir_ssa_def * 90get_io_offset(nir_builder *b, nir_deref_instr *deref, 91 nir_ssa_def **vertex_index, 92 int (*type_size)(const struct glsl_type *), 93 unsigned *component) 94{ 95 nir_deref_path path; 96 nir_deref_path_init(&path, deref, NULL); 97 98 assert(path.path[0]->deref_type == nir_deref_type_var); 99 nir_deref_instr **p = &path.path[1]; 100 101 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the 102 * outermost array index separate. Process the rest normally. 103 */ 104 if (vertex_index != NULL) { 105 assert((*p)->deref_type == nir_deref_type_array); 106 *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1); 107 p++; 108 } 109 110 if (path.path[0]->var->data.compact) { 111 assert((*p)->deref_type == nir_deref_type_array); 112 assert(glsl_type_is_scalar((*p)->type)); 113 114 /* We always lower indirect dereferences for "compact" array vars. */ 115 const unsigned index = nir_src_as_uint((*p)->arr.index); 116 const unsigned total_offset = *component + index; 117 const unsigned slot_offset = total_offset / 4; 118 *component = total_offset % 4; 119 return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset); 120 } 121 122 /* Just emit code and let constant-folding go to town */ 123 nir_ssa_def *offset = nir_imm_int(b, 0); 124 125 for (; *p; p++) { 126 if ((*p)->deref_type == nir_deref_type_array) { 127 unsigned size = type_size((*p)->type); 128 129 nir_ssa_def *mul = 130 nir_imul(b, nir_imm_int(b, size), 131 nir_ssa_for_src(b, (*p)->arr.index, 1)); 132 133 offset = nir_iadd(b, offset, mul); 134 } else if ((*p)->deref_type == nir_deref_type_struct) { 135 /* p starts at path[1], so this is safe */ 136 nir_deref_instr *parent = *(p - 1); 137 138 unsigned field_offset = 0; 139 for (unsigned i = 0; i < (*p)->strct.index; i++) { 140 field_offset += type_size(glsl_get_struct_field(parent->type, i)); 141 } 142 offset = nir_iadd(b, offset, nir_imm_int(b, field_offset)); 143 } else { 144 unreachable("Unsupported deref type"); 145 } 146 } 147 148 nir_deref_path_finish(&path); 149 150 return offset; 151} 152 153static nir_intrinsic_instr * 154lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, 155 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset, 156 unsigned component) 157{ 158 const nir_shader *nir = state->builder.shader; 159 nir_variable_mode mode = var->data.mode; 160 nir_ssa_def *barycentric = NULL; 161 162 nir_intrinsic_op op; 163 switch (mode) { 164 case nir_var_shader_in: 165 if (nir->info.stage == MESA_SHADER_FRAGMENT && 166 nir->options->use_interpolated_input_intrinsics && 167 var->data.interpolation != INTERP_MODE_FLAT) { 168 assert(vertex_index == NULL); 169 170 nir_intrinsic_op bary_op; 171 if (var->data.sample || 172 (state->options & nir_lower_io_force_sample_interpolation)) 173 bary_op = nir_intrinsic_load_barycentric_sample; 174 else if (var->data.centroid) 175 bary_op = nir_intrinsic_load_barycentric_centroid; 176 else 177 bary_op = nir_intrinsic_load_barycentric_pixel; 178 179 barycentric = nir_load_barycentric(&state->builder, bary_op, 180 var->data.interpolation); 181 op = nir_intrinsic_load_interpolated_input; 182 } else { 183 op = vertex_index ? nir_intrinsic_load_per_vertex_input : 184 nir_intrinsic_load_input; 185 } 186 break; 187 case nir_var_shader_out: 188 op = vertex_index ? nir_intrinsic_load_per_vertex_output : 189 nir_intrinsic_load_output; 190 break; 191 case nir_var_uniform: 192 op = nir_intrinsic_load_uniform; 193 break; 194 case nir_var_shared: 195 op = nir_intrinsic_load_shared; 196 break; 197 default: 198 unreachable("Unknown variable mode"); 199 } 200 201 nir_intrinsic_instr *load = 202 nir_intrinsic_instr_create(state->builder.shader, op); 203 load->num_components = intrin->num_components; 204 205 nir_intrinsic_set_base(load, var->data.driver_location); 206 if (mode == nir_var_shader_in || mode == nir_var_shader_out) 207 nir_intrinsic_set_component(load, component); 208 209 if (load->intrinsic == nir_intrinsic_load_uniform) 210 nir_intrinsic_set_range(load, state->type_size(var->type)); 211 212 if (vertex_index) { 213 load->src[0] = nir_src_for_ssa(vertex_index); 214 load->src[1] = nir_src_for_ssa(offset); 215 } else if (barycentric) { 216 load->src[0] = nir_src_for_ssa(barycentric); 217 load->src[1] = nir_src_for_ssa(offset); 218 } else { 219 load->src[0] = nir_src_for_ssa(offset); 220 } 221 222 return load; 223} 224 225static nir_intrinsic_instr * 226lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, 227 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset, 228 unsigned component) 229{ 230 nir_variable_mode mode = var->data.mode; 231 232 nir_intrinsic_op op; 233 if (mode == nir_var_shared) { 234 op = nir_intrinsic_store_shared; 235 } else { 236 assert(mode == nir_var_shader_out); 237 op = vertex_index ? nir_intrinsic_store_per_vertex_output : 238 nir_intrinsic_store_output; 239 } 240 241 nir_intrinsic_instr *store = 242 nir_intrinsic_instr_create(state->builder.shader, op); 243 store->num_components = intrin->num_components; 244 245 nir_src_copy(&store->src[0], &intrin->src[1], store); 246 247 nir_intrinsic_set_base(store, var->data.driver_location); 248 249 if (mode == nir_var_shader_out) 250 nir_intrinsic_set_component(store, component); 251 252 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); 253 254 if (vertex_index) 255 store->src[1] = nir_src_for_ssa(vertex_index); 256 257 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset); 258 259 return store; 260} 261 262static nir_intrinsic_instr * 263lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, 264 nir_variable *var, nir_ssa_def *offset) 265{ 266 assert(var->data.mode == nir_var_shared); 267 268 nir_intrinsic_op op; 269 switch (intrin->intrinsic) { 270#define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break; 271 OP(atomic_exchange) 272 OP(atomic_comp_swap) 273 OP(atomic_add) 274 OP(atomic_imin) 275 OP(atomic_umin) 276 OP(atomic_imax) 277 OP(atomic_umax) 278 OP(atomic_and) 279 OP(atomic_or) 280 OP(atomic_xor) 281 OP(atomic_fadd) 282 OP(atomic_fmin) 283 OP(atomic_fmax) 284 OP(atomic_fcomp_swap) 285#undef OP 286 default: 287 unreachable("Invalid atomic"); 288 } 289 290 nir_intrinsic_instr *atomic = 291 nir_intrinsic_instr_create(state->builder.shader, op); 292 293 nir_intrinsic_set_base(atomic, var->data.driver_location); 294 295 atomic->src[0] = nir_src_for_ssa(offset); 296 assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs == 297 nir_intrinsic_infos[op].num_srcs); 298 for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) { 299 nir_src_copy(&atomic->src[i], &intrin->src[i], atomic); 300 } 301 302 return atomic; 303} 304 305static nir_intrinsic_instr * 306lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, 307 nir_variable *var, nir_ssa_def *offset, unsigned component) 308{ 309 assert(var->data.mode == nir_var_shader_in); 310 311 /* Ignore interpolateAt() for flat variables - flat is flat. */ 312 if (var->data.interpolation == INTERP_MODE_FLAT) 313 return lower_load(intrin, state, NULL, var, offset, component); 314 315 nir_intrinsic_op bary_op; 316 switch (intrin->intrinsic) { 317 case nir_intrinsic_interp_deref_at_centroid: 318 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ? 319 nir_intrinsic_load_barycentric_sample : 320 nir_intrinsic_load_barycentric_centroid; 321 break; 322 case nir_intrinsic_interp_deref_at_sample: 323 bary_op = nir_intrinsic_load_barycentric_at_sample; 324 break; 325 case nir_intrinsic_interp_deref_at_offset: 326 bary_op = nir_intrinsic_load_barycentric_at_offset; 327 break; 328 default: 329 unreachable("Bogus interpolateAt() intrinsic."); 330 } 331 332 nir_intrinsic_instr *bary_setup = 333 nir_intrinsic_instr_create(state->builder.shader, bary_op); 334 335 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL); 336 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation); 337 338 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample || 339 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) 340 nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup); 341 342 nir_builder_instr_insert(&state->builder, &bary_setup->instr); 343 344 nir_intrinsic_instr *load = 345 nir_intrinsic_instr_create(state->builder.shader, 346 nir_intrinsic_load_interpolated_input); 347 load->num_components = intrin->num_components; 348 349 nir_intrinsic_set_base(load, var->data.driver_location); 350 nir_intrinsic_set_component(load, component); 351 352 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa); 353 load->src[1] = nir_src_for_ssa(offset); 354 355 return load; 356} 357 358static bool 359nir_lower_io_block(nir_block *block, 360 struct lower_io_state *state) 361{ 362 nir_builder *b = &state->builder; 363 const nir_shader_compiler_options *options = b->shader->options; 364 bool progress = false; 365 366 nir_foreach_instr_safe(instr, block) { 367 if (instr->type != nir_instr_type_intrinsic) 368 continue; 369 370 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 371 372 switch (intrin->intrinsic) { 373 case nir_intrinsic_load_deref: 374 case nir_intrinsic_store_deref: 375 case nir_intrinsic_deref_atomic_add: 376 case nir_intrinsic_deref_atomic_imin: 377 case nir_intrinsic_deref_atomic_umin: 378 case nir_intrinsic_deref_atomic_imax: 379 case nir_intrinsic_deref_atomic_umax: 380 case nir_intrinsic_deref_atomic_and: 381 case nir_intrinsic_deref_atomic_or: 382 case nir_intrinsic_deref_atomic_xor: 383 case nir_intrinsic_deref_atomic_exchange: 384 case nir_intrinsic_deref_atomic_comp_swap: 385 case nir_intrinsic_deref_atomic_fadd: 386 case nir_intrinsic_deref_atomic_fmin: 387 case nir_intrinsic_deref_atomic_fmax: 388 case nir_intrinsic_deref_atomic_fcomp_swap: 389 /* We can lower the io for this nir instrinsic */ 390 break; 391 case nir_intrinsic_interp_deref_at_centroid: 392 case nir_intrinsic_interp_deref_at_sample: 393 case nir_intrinsic_interp_deref_at_offset: 394 /* We can optionally lower these to load_interpolated_input */ 395 if (options->use_interpolated_input_intrinsics) 396 break; 397 default: 398 /* We can't lower the io for this nir instrinsic, so skip it */ 399 continue; 400 } 401 402 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 403 404 nir_variable *var = nir_deref_instr_get_variable(deref); 405 nir_variable_mode mode = var->data.mode; 406 407 if ((state->modes & mode) == 0) 408 continue; 409 410 if (mode != nir_var_shader_in && 411 mode != nir_var_shader_out && 412 mode != nir_var_shared && 413 mode != nir_var_uniform) 414 continue; 415 416 b->cursor = nir_before_instr(instr); 417 418 const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage); 419 420 nir_ssa_def *offset; 421 nir_ssa_def *vertex_index = NULL; 422 unsigned component_offset = var->data.location_frac; 423 424 offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL, 425 state->type_size, &component_offset); 426 427 nir_intrinsic_instr *replacement; 428 429 switch (intrin->intrinsic) { 430 case nir_intrinsic_load_deref: 431 replacement = lower_load(intrin, state, vertex_index, var, offset, 432 component_offset); 433 break; 434 435 case nir_intrinsic_store_deref: 436 replacement = lower_store(intrin, state, vertex_index, var, offset, 437 component_offset); 438 break; 439 440 case nir_intrinsic_deref_atomic_add: 441 case nir_intrinsic_deref_atomic_imin: 442 case nir_intrinsic_deref_atomic_umin: 443 case nir_intrinsic_deref_atomic_imax: 444 case nir_intrinsic_deref_atomic_umax: 445 case nir_intrinsic_deref_atomic_and: 446 case nir_intrinsic_deref_atomic_or: 447 case nir_intrinsic_deref_atomic_xor: 448 case nir_intrinsic_deref_atomic_exchange: 449 case nir_intrinsic_deref_atomic_comp_swap: 450 case nir_intrinsic_deref_atomic_fadd: 451 case nir_intrinsic_deref_atomic_fmin: 452 case nir_intrinsic_deref_atomic_fmax: 453 case nir_intrinsic_deref_atomic_fcomp_swap: 454 assert(vertex_index == NULL); 455 replacement = lower_atomic(intrin, state, var, offset); 456 break; 457 458 case nir_intrinsic_interp_deref_at_centroid: 459 case nir_intrinsic_interp_deref_at_sample: 460 case nir_intrinsic_interp_deref_at_offset: 461 assert(vertex_index == NULL); 462 replacement = lower_interpolate_at(intrin, state, var, offset, 463 component_offset); 464 break; 465 466 default: 467 continue; 468 } 469 470 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) { 471 if (intrin->dest.is_ssa) { 472 nir_ssa_dest_init(&replacement->instr, &replacement->dest, 473 intrin->dest.ssa.num_components, 474 intrin->dest.ssa.bit_size, NULL); 475 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 476 nir_src_for_ssa(&replacement->dest.ssa)); 477 } else { 478 nir_dest_copy(&replacement->dest, &intrin->dest, &intrin->instr); 479 } 480 } 481 482 nir_instr_insert_before(&intrin->instr, &replacement->instr); 483 nir_instr_remove(&intrin->instr); 484 progress = true; 485 } 486 487 return progress; 488} 489 490static bool 491nir_lower_io_impl(nir_function_impl *impl, 492 nir_variable_mode modes, 493 int (*type_size)(const struct glsl_type *), 494 nir_lower_io_options options) 495{ 496 struct lower_io_state state; 497 bool progress = false; 498 499 nir_builder_init(&state.builder, impl); 500 state.dead_ctx = ralloc_context(NULL); 501 state.modes = modes; 502 state.type_size = type_size; 503 state.options = options; 504 505 nir_foreach_block(block, impl) { 506 progress |= nir_lower_io_block(block, &state); 507 } 508 509 ralloc_free(state.dead_ctx); 510 511 nir_metadata_preserve(impl, nir_metadata_block_index | 512 nir_metadata_dominance); 513 return progress; 514} 515 516bool 517nir_lower_io(nir_shader *shader, nir_variable_mode modes, 518 int (*type_size)(const struct glsl_type *), 519 nir_lower_io_options options) 520{ 521 bool progress = false; 522 523 nir_foreach_function(function, shader) { 524 if (function->impl) { 525 progress |= nir_lower_io_impl(function->impl, modes, 526 type_size, options); 527 } 528 } 529 530 return progress; 531} 532 533/** 534 * Return the offset source for a load/store intrinsic. 535 */ 536nir_src * 537nir_get_io_offset_src(nir_intrinsic_instr *instr) 538{ 539 switch (instr->intrinsic) { 540 case nir_intrinsic_load_input: 541 case nir_intrinsic_load_output: 542 case nir_intrinsic_load_uniform: 543 return &instr->src[0]; 544 case nir_intrinsic_load_ubo: 545 case nir_intrinsic_load_ssbo: 546 case nir_intrinsic_load_per_vertex_input: 547 case nir_intrinsic_load_per_vertex_output: 548 case nir_intrinsic_load_interpolated_input: 549 case nir_intrinsic_store_output: 550 return &instr->src[1]; 551 case nir_intrinsic_store_ssbo: 552 case nir_intrinsic_store_per_vertex_output: 553 return &instr->src[2]; 554 default: 555 return NULL; 556 } 557} 558 559/** 560 * Return the vertex index source for a load/store per_vertex intrinsic. 561 */ 562nir_src * 563nir_get_io_vertex_index_src(nir_intrinsic_instr *instr) 564{ 565 switch (instr->intrinsic) { 566 case nir_intrinsic_load_per_vertex_input: 567 case nir_intrinsic_load_per_vertex_output: 568 return &instr->src[0]; 569 case nir_intrinsic_store_per_vertex_output: 570 return &instr->src[1]; 571 default: 572 return NULL; 573 } 574} 575