brw_nir.c revision 01e04c3f
1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_nir.h" 25#include "brw_shader.h" 26#include "common/gen_debug.h" 27#include "compiler/glsl_types.h" 28#include "compiler/nir/nir_builder.h" 29#include "util/u_math.h" 30 31static bool 32is_input(nir_intrinsic_instr *intrin) 33{ 34 return intrin->intrinsic == nir_intrinsic_load_input || 35 intrin->intrinsic == nir_intrinsic_load_per_vertex_input || 36 intrin->intrinsic == nir_intrinsic_load_interpolated_input; 37} 38 39static bool 40is_output(nir_intrinsic_instr *intrin) 41{ 42 return intrin->intrinsic == nir_intrinsic_load_output || 43 intrin->intrinsic == nir_intrinsic_load_per_vertex_output || 44 intrin->intrinsic == nir_intrinsic_store_output || 45 intrin->intrinsic == nir_intrinsic_store_per_vertex_output; 46} 47 48/** 49 * In many cases, we just add the base and offset together, so there's no 50 * reason to keep them separate. Sometimes, combining them is essential: 51 * if a shader only accesses part of a compound variable (such as a matrix 52 * or array), the variable's base may not actually exist in the VUE map. 53 * 54 * This pass adds constant offsets to instr->const_index[0], and resets 55 * the offset source to 0. Non-constant offsets remain unchanged - since 56 * we don't know what part of a compound variable is accessed, we allocate 57 * storage for the entire thing. 58 */ 59 60static bool 61add_const_offset_to_base_block(nir_block *block, nir_builder *b, 62 nir_variable_mode mode) 63{ 64 nir_foreach_instr_safe(instr, block) { 65 if (instr->type != nir_instr_type_intrinsic) 66 continue; 67 68 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 69 70 if ((mode == nir_var_shader_in && is_input(intrin)) || 71 (mode == nir_var_shader_out && is_output(intrin))) { 72 nir_src *offset = nir_get_io_offset_src(intrin); 73 nir_const_value *const_offset = nir_src_as_const_value(*offset); 74 75 if (const_offset) { 76 intrin->const_index[0] += const_offset->u32[0]; 77 b->cursor = nir_before_instr(&intrin->instr); 78 nir_instr_rewrite_src(&intrin->instr, offset, 79 nir_src_for_ssa(nir_imm_int(b, 0))); 80 } 81 } 82 } 83 return true; 84} 85 86static void 87add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode) 88{ 89 nir_foreach_function(f, nir) { 90 if (f->impl) { 91 nir_builder b; 92 nir_builder_init(&b, f->impl); 93 nir_foreach_block(block, f->impl) { 94 add_const_offset_to_base_block(block, &b, mode); 95 } 96 } 97 } 98} 99 100static bool 101remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr, 102 GLenum primitive_mode) 103{ 104 const int location = nir_intrinsic_base(intr); 105 const unsigned component = nir_intrinsic_component(intr); 106 bool out_of_bounds; 107 108 if (location == VARYING_SLOT_TESS_LEVEL_INNER) { 109 switch (primitive_mode) { 110 case GL_QUADS: 111 /* gl_TessLevelInner[0..1] lives at DWords 3-2 (reversed). */ 112 nir_intrinsic_set_base(intr, 0); 113 nir_intrinsic_set_component(intr, 3 - component); 114 out_of_bounds = false; 115 break; 116 case GL_TRIANGLES: 117 /* gl_TessLevelInner[0] lives at DWord 4. */ 118 nir_intrinsic_set_base(intr, 1); 119 out_of_bounds = component > 0; 120 break; 121 case GL_ISOLINES: 122 out_of_bounds = true; 123 break; 124 default: 125 unreachable("Bogus tessellation domain"); 126 } 127 } else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) { 128 if (primitive_mode == GL_ISOLINES) { 129 /* gl_TessLevelOuter[0..1] lives at DWords 6-7 (in order). */ 130 nir_intrinsic_set_base(intr, 1); 131 nir_intrinsic_set_component(intr, 2 + nir_intrinsic_component(intr)); 132 out_of_bounds = component > 1; 133 } else { 134 /* Triangles use DWords 7-5 (reversed); Quads use 7-4 (reversed) */ 135 nir_intrinsic_set_base(intr, 1); 136 nir_intrinsic_set_component(intr, 3 - nir_intrinsic_component(intr)); 137 out_of_bounds = component == 3 && primitive_mode == GL_TRIANGLES; 138 } 139 } else { 140 return false; 141 } 142 143 if (out_of_bounds) { 144 if (nir_intrinsic_infos[intr->intrinsic].has_dest) { 145 b->cursor = nir_before_instr(&intr->instr); 146 nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); 147 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(undef)); 148 } 149 nir_instr_remove(&intr->instr); 150 } 151 152 return true; 153} 154 155static bool 156remap_patch_urb_offsets(nir_block *block, nir_builder *b, 157 const struct brw_vue_map *vue_map, 158 GLenum tes_primitive_mode) 159{ 160 const bool is_passthrough_tcs = b->shader->info.name && 161 strcmp(b->shader->info.name, "passthrough") == 0; 162 163 nir_foreach_instr_safe(instr, block) { 164 if (instr->type != nir_instr_type_intrinsic) 165 continue; 166 167 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 168 169 gl_shader_stage stage = b->shader->info.stage; 170 171 if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) || 172 (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) { 173 174 if (!is_passthrough_tcs && 175 remap_tess_levels(b, intrin, tes_primitive_mode)) 176 continue; 177 178 int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; 179 assert(vue_slot != -1); 180 intrin->const_index[0] = vue_slot; 181 182 nir_src *vertex = nir_get_io_vertex_index_src(intrin); 183 if (vertex) { 184 nir_const_value *const_vertex = nir_src_as_const_value(*vertex); 185 if (const_vertex) { 186 intrin->const_index[0] += const_vertex->u32[0] * 187 vue_map->num_per_vertex_slots; 188 } else { 189 b->cursor = nir_before_instr(&intrin->instr); 190 191 /* Multiply by the number of per-vertex slots. */ 192 nir_ssa_def *vertex_offset = 193 nir_imul(b, 194 nir_ssa_for_src(b, *vertex, 1), 195 nir_imm_int(b, 196 vue_map->num_per_vertex_slots)); 197 198 /* Add it to the existing offset */ 199 nir_src *offset = nir_get_io_offset_src(intrin); 200 nir_ssa_def *total_offset = 201 nir_iadd(b, vertex_offset, 202 nir_ssa_for_src(b, *offset, 1)); 203 204 nir_instr_rewrite_src(&intrin->instr, offset, 205 nir_src_for_ssa(total_offset)); 206 } 207 } 208 } 209 } 210 return true; 211} 212 213void 214brw_nir_lower_vs_inputs(nir_shader *nir, 215 const uint8_t *vs_attrib_wa_flags) 216{ 217 /* Start with the location of the variable's base. */ 218 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 219 var->data.driver_location = var->data.location; 220 } 221 222 /* Now use nir_lower_io to walk dereference chains. Attribute arrays are 223 * loaded as one vec4 or dvec4 per element (or matrix column), depending on 224 * whether it is a double-precision type or not. 225 */ 226 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 227 228 /* This pass needs actual constants */ 229 nir_opt_constant_folding(nir); 230 231 add_const_offset_to_base(nir, nir_var_shader_in); 232 233 brw_nir_apply_attribute_workarounds(nir, vs_attrib_wa_flags); 234 235 /* The last step is to remap VERT_ATTRIB_* to actual registers */ 236 237 /* Whether or not we have any system generated values. gl_DrawID is not 238 * included here as it lives in its own vec4. 239 */ 240 const bool has_sgvs = 241 nir->info.system_values_read & 242 (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) | 243 BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) | 244 BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) | 245 BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID)); 246 247 const unsigned num_inputs = util_bitcount64(nir->info.inputs_read); 248 249 nir_foreach_function(function, nir) { 250 if (!function->impl) 251 continue; 252 253 nir_builder b; 254 nir_builder_init(&b, function->impl); 255 256 nir_foreach_block(block, function->impl) { 257 nir_foreach_instr_safe(instr, block) { 258 if (instr->type != nir_instr_type_intrinsic) 259 continue; 260 261 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 262 263 switch (intrin->intrinsic) { 264 case nir_intrinsic_load_first_vertex: 265 case nir_intrinsic_load_base_instance: 266 case nir_intrinsic_load_vertex_id_zero_base: 267 case nir_intrinsic_load_instance_id: 268 case nir_intrinsic_load_is_indexed_draw: 269 case nir_intrinsic_load_draw_id: { 270 b.cursor = nir_after_instr(&intrin->instr); 271 272 /* gl_VertexID and friends are stored by the VF as the last 273 * vertex element. We convert them to load_input intrinsics at 274 * the right location. 275 */ 276 nir_intrinsic_instr *load = 277 nir_intrinsic_instr_create(nir, nir_intrinsic_load_input); 278 load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 279 280 nir_intrinsic_set_base(load, num_inputs); 281 switch (intrin->intrinsic) { 282 case nir_intrinsic_load_first_vertex: 283 nir_intrinsic_set_component(load, 0); 284 break; 285 case nir_intrinsic_load_base_instance: 286 nir_intrinsic_set_component(load, 1); 287 break; 288 case nir_intrinsic_load_vertex_id_zero_base: 289 nir_intrinsic_set_component(load, 2); 290 break; 291 case nir_intrinsic_load_instance_id: 292 nir_intrinsic_set_component(load, 3); 293 break; 294 case nir_intrinsic_load_draw_id: 295 case nir_intrinsic_load_is_indexed_draw: 296 /* gl_DrawID and IsIndexedDraw are stored right after 297 * gl_VertexID and friends if any of them exist. 298 */ 299 nir_intrinsic_set_base(load, num_inputs + has_sgvs); 300 if (intrin->intrinsic == nir_intrinsic_load_draw_id) 301 nir_intrinsic_set_component(load, 0); 302 else 303 nir_intrinsic_set_component(load, 1); 304 break; 305 default: 306 unreachable("Invalid system value intrinsic"); 307 } 308 309 load->num_components = 1; 310 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); 311 nir_builder_instr_insert(&b, &load->instr); 312 313 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 314 nir_src_for_ssa(&load->dest.ssa)); 315 nir_instr_remove(&intrin->instr); 316 break; 317 } 318 319 case nir_intrinsic_load_input: { 320 /* Attributes come in a contiguous block, ordered by their 321 * gl_vert_attrib value. That means we can compute the slot 322 * number for an attribute by masking out the enabled attributes 323 * before it and counting the bits. 324 */ 325 int attr = nir_intrinsic_base(intrin); 326 int slot = util_bitcount64(nir->info.inputs_read & 327 BITFIELD64_MASK(attr)); 328 nir_intrinsic_set_base(intrin, slot); 329 break; 330 } 331 332 default: 333 break; /* Nothing to do */ 334 } 335 } 336 } 337 } 338} 339 340void 341brw_nir_lower_vue_inputs(nir_shader *nir, 342 const struct brw_vue_map *vue_map) 343{ 344 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 345 var->data.driver_location = var->data.location; 346 } 347 348 /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ 349 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 350 351 /* This pass needs actual constants */ 352 nir_opt_constant_folding(nir); 353 354 add_const_offset_to_base(nir, nir_var_shader_in); 355 356 nir_foreach_function(function, nir) { 357 if (!function->impl) 358 continue; 359 360 nir_foreach_block(block, function->impl) { 361 nir_foreach_instr(instr, block) { 362 if (instr->type != nir_instr_type_intrinsic) 363 continue; 364 365 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 366 367 if (intrin->intrinsic == nir_intrinsic_load_input || 368 intrin->intrinsic == nir_intrinsic_load_per_vertex_input) { 369 /* Offset 0 is the VUE header, which contains 370 * VARYING_SLOT_LAYER [.y], VARYING_SLOT_VIEWPORT [.z], and 371 * VARYING_SLOT_PSIZ [.w]. 372 */ 373 int varying = nir_intrinsic_base(intrin); 374 int vue_slot; 375 switch (varying) { 376 case VARYING_SLOT_PSIZ: 377 nir_intrinsic_set_base(intrin, 0); 378 nir_intrinsic_set_component(intrin, 3); 379 break; 380 381 default: 382 vue_slot = vue_map->varying_to_slot[varying]; 383 assert(vue_slot != -1); 384 nir_intrinsic_set_base(intrin, vue_slot); 385 break; 386 } 387 } 388 } 389 } 390 } 391} 392 393void 394brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) 395{ 396 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 397 var->data.driver_location = var->data.location; 398 } 399 400 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 401 402 /* This pass needs actual constants */ 403 nir_opt_constant_folding(nir); 404 405 add_const_offset_to_base(nir, nir_var_shader_in); 406 407 nir_foreach_function(function, nir) { 408 if (function->impl) { 409 nir_builder b; 410 nir_builder_init(&b, function->impl); 411 nir_foreach_block(block, function->impl) { 412 remap_patch_urb_offsets(block, &b, vue_map, 413 nir->info.tess.primitive_mode); 414 } 415 } 416 } 417} 418 419void 420brw_nir_lower_fs_inputs(nir_shader *nir, 421 const struct gen_device_info *devinfo, 422 const struct brw_wm_prog_key *key) 423{ 424 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 425 var->data.driver_location = var->data.location; 426 427 /* Apply default interpolation mode. 428 * 429 * Everything defaults to smooth except for the legacy GL color 430 * built-in variables, which might be flat depending on API state. 431 */ 432 if (var->data.interpolation == INTERP_MODE_NONE) { 433 const bool flat = key->flat_shade && 434 (var->data.location == VARYING_SLOT_COL0 || 435 var->data.location == VARYING_SLOT_COL1); 436 437 var->data.interpolation = flat ? INTERP_MODE_FLAT 438 : INTERP_MODE_SMOOTH; 439 } 440 441 /* On Ironlake and below, there is only one interpolation mode. 442 * Centroid interpolation doesn't mean anything on this hardware -- 443 * there is no multisampling. 444 */ 445 if (devinfo->gen < 6) { 446 var->data.centroid = false; 447 var->data.sample = false; 448 } 449 } 450 451 nir_lower_io_options lower_io_options = 0; 452 if (key->persample_interp) 453 lower_io_options |= nir_lower_io_force_sample_interpolation; 454 455 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, lower_io_options); 456 457 /* This pass needs actual constants */ 458 nir_opt_constant_folding(nir); 459 460 add_const_offset_to_base(nir, nir_var_shader_in); 461} 462 463void 464brw_nir_lower_vue_outputs(nir_shader *nir) 465{ 466 nir_foreach_variable(var, &nir->outputs) { 467 var->data.driver_location = var->data.location; 468 } 469 470 nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0); 471} 472 473void 474brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map, 475 GLenum tes_primitive_mode) 476{ 477 nir_foreach_variable(var, &nir->outputs) { 478 var->data.driver_location = var->data.location; 479 } 480 481 nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0); 482 483 /* This pass needs actual constants */ 484 nir_opt_constant_folding(nir); 485 486 add_const_offset_to_base(nir, nir_var_shader_out); 487 488 nir_foreach_function(function, nir) { 489 if (function->impl) { 490 nir_builder b; 491 nir_builder_init(&b, function->impl); 492 nir_foreach_block(block, function->impl) { 493 remap_patch_urb_offsets(block, &b, vue_map, tes_primitive_mode); 494 } 495 } 496 } 497} 498 499void 500brw_nir_lower_fs_outputs(nir_shader *nir) 501{ 502 nir_foreach_variable(var, &nir->outputs) { 503 var->data.driver_location = 504 SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) | 505 SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION); 506 } 507 508 nir_lower_io(nir, nir_var_shader_out, type_size_dvec4, 0); 509} 510 511#define OPT(pass, ...) ({ \ 512 bool this_progress = false; \ 513 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 514 if (this_progress) \ 515 progress = true; \ 516 this_progress; \ 517}) 518 519static nir_variable_mode 520brw_nir_no_indirect_mask(const struct brw_compiler *compiler, 521 gl_shader_stage stage) 522{ 523 nir_variable_mode indirect_mask = 0; 524 525 if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) 526 indirect_mask |= nir_var_shader_in; 527 if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput) 528 indirect_mask |= nir_var_shader_out; 529 if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) 530 indirect_mask |= nir_var_local; 531 532 return indirect_mask; 533} 534 535nir_shader * 536brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, 537 bool is_scalar, bool allow_copies) 538{ 539 nir_variable_mode indirect_mask = 540 brw_nir_no_indirect_mask(compiler, nir->info.stage); 541 542 bool progress; 543 do { 544 progress = false; 545 OPT(nir_split_array_vars, nir_var_local); 546 OPT(nir_shrink_vec_array_vars, nir_var_local); 547 OPT(nir_lower_vars_to_ssa); 548 if (allow_copies) { 549 /* Only run this pass in the first call to brw_nir_optimize. Later 550 * calls assume that we've lowered away any copy_deref instructions 551 * and we don't want to introduce any more. 552 */ 553 OPT(nir_opt_find_array_copies); 554 } 555 OPT(nir_opt_copy_prop_vars); 556 OPT(nir_opt_dead_write_vars); 557 558 if (is_scalar) { 559 OPT(nir_lower_alu_to_scalar); 560 } 561 562 OPT(nir_copy_prop); 563 564 if (is_scalar) { 565 OPT(nir_lower_phis_to_scalar); 566 } 567 568 OPT(nir_copy_prop); 569 OPT(nir_opt_dce); 570 OPT(nir_opt_cse); 571 OPT(nir_opt_peephole_select, 0); 572 OPT(nir_opt_intrinsics); 573 OPT(nir_opt_algebraic); 574 OPT(nir_opt_constant_folding); 575 OPT(nir_opt_dead_cf); 576 if (OPT(nir_opt_trivial_continues)) { 577 /* If nir_opt_trivial_continues makes progress, then we need to clean 578 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll 579 * to make progress. 580 */ 581 OPT(nir_copy_prop); 582 OPT(nir_opt_dce); 583 } 584 OPT(nir_opt_if); 585 if (nir->options->max_unroll_iterations != 0) { 586 OPT(nir_opt_loop_unroll, indirect_mask); 587 } 588 OPT(nir_opt_remove_phis); 589 OPT(nir_opt_undef); 590 OPT(nir_lower_doubles, nir_lower_drcp | 591 nir_lower_dsqrt | 592 nir_lower_drsq | 593 nir_lower_dtrunc | 594 nir_lower_dfloor | 595 nir_lower_dceil | 596 nir_lower_dfract | 597 nir_lower_dround_even | 598 nir_lower_dmod); 599 OPT(nir_lower_pack); 600 } while (progress); 601 602 /* Workaround Gfxbench unused local sampler variable which will trigger an 603 * assert in the opt_large_constants pass. 604 */ 605 OPT(nir_remove_dead_variables, nir_var_local); 606 607 return nir; 608} 609 610static unsigned 611lower_bit_size_callback(const nir_alu_instr *alu, UNUSED void *data) 612{ 613 assert(alu->dest.dest.is_ssa); 614 if (alu->dest.dest.ssa.bit_size != 16) 615 return 0; 616 617 switch (alu->op) { 618 case nir_op_idiv: 619 case nir_op_imod: 620 case nir_op_irem: 621 case nir_op_udiv: 622 case nir_op_umod: 623 return 32; 624 default: 625 return 0; 626 } 627} 628 629/* Does some simple lowering and runs the standard suite of optimizations 630 * 631 * This is intended to be called more-or-less directly after you get the 632 * shader out of GLSL or some other source. While it is geared towards i965, 633 * it is not at all generator-specific except for the is_scalar flag. Even 634 * there, it is safe to call with is_scalar = false for a shader that is 635 * intended for the FS backend as long as nir_optimize is called again with 636 * is_scalar = true to scalarize everything prior to code gen. 637 */ 638nir_shader * 639brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) 640{ 641 const struct gen_device_info *devinfo = compiler->devinfo; 642 UNUSED bool progress; /* Written by OPT */ 643 644 const bool is_scalar = compiler->scalar_stage[nir->info.stage]; 645 646 if (nir->info.stage == MESA_SHADER_GEOMETRY) 647 OPT(nir_lower_gs_intrinsics); 648 649 /* See also brw_nir_trig_workarounds.py */ 650 if (compiler->precise_trig && 651 !(devinfo->gen >= 10 || devinfo->is_kabylake)) 652 OPT(brw_nir_apply_trig_workarounds); 653 654 static const nir_lower_tex_options tex_options = { 655 .lower_txp = ~0, 656 .lower_txf_offset = true, 657 .lower_rect_offset = true, 658 .lower_txd_cube_map = true, 659 }; 660 661 OPT(nir_lower_tex, &tex_options); 662 OPT(nir_normalize_cubemap_coords); 663 664 OPT(nir_lower_global_vars_to_local); 665 666 OPT(nir_split_var_copies); 667 OPT(nir_split_struct_vars, nir_var_local); 668 669 /* Run opt_algebraic before int64 lowering so we can hopefully get rid 670 * of some int64 instructions. 671 */ 672 OPT(nir_opt_algebraic); 673 674 /* Lower int64 instructions before nir_optimize so that loop unrolling 675 * sees their actual cost. 676 */ 677 OPT(nir_lower_int64, nir_lower_imul64 | 678 nir_lower_isign64 | 679 nir_lower_divmod64); 680 681 nir = brw_nir_optimize(nir, compiler, is_scalar, true); 682 683 /* This needs to be run after the first optimization pass but before we 684 * lower indirect derefs away 685 */ 686 if (compiler->supports_shader_constants) { 687 OPT(nir_opt_large_constants, NULL, 32); 688 } 689 690 OPT(nir_lower_bit_size, lower_bit_size_callback, NULL); 691 692 if (is_scalar) { 693 OPT(nir_lower_load_const_to_scalar); 694 } 695 696 /* Lower a bunch of stuff */ 697 OPT(nir_lower_var_copies); 698 699 OPT(nir_lower_system_values); 700 701 const nir_lower_subgroups_options subgroups_options = { 702 .subgroup_size = BRW_SUBGROUP_SIZE, 703 .ballot_bit_size = 32, 704 .lower_to_scalar = true, 705 .lower_subgroup_masks = true, 706 .lower_vote_trivial = !is_scalar, 707 .lower_shuffle = true, 708 }; 709 OPT(nir_lower_subgroups, &subgroups_options); 710 711 OPT(nir_lower_clip_cull_distance_arrays); 712 713 nir_variable_mode indirect_mask = 714 brw_nir_no_indirect_mask(compiler, nir->info.stage); 715 OPT(nir_lower_indirect_derefs, indirect_mask); 716 717 /* Get rid of split copies */ 718 nir = brw_nir_optimize(nir, compiler, is_scalar, false); 719 720 return nir; 721} 722 723void 724brw_nir_link_shaders(const struct brw_compiler *compiler, 725 nir_shader **producer, nir_shader **consumer) 726{ 727 nir_lower_io_arrays_to_elements(*producer, *consumer); 728 nir_validate_shader(*producer, "after nir_lower_io_arrays_to_elements"); 729 nir_validate_shader(*consumer, "after nir_lower_io_arrays_to_elements"); 730 731 const bool p_is_scalar = 732 compiler->scalar_stage[(*producer)->info.stage]; 733 const bool c_is_scalar = 734 compiler->scalar_stage[(*consumer)->info.stage]; 735 736 if (p_is_scalar && c_is_scalar) { 737 NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out); 738 NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); 739 *producer = brw_nir_optimize(*producer, compiler, p_is_scalar, false); 740 *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false); 741 } 742 743 NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out); 744 NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in); 745 746 if (nir_remove_unused_varyings(*producer, *consumer)) { 747 NIR_PASS_V(*producer, nir_lower_global_vars_to_local); 748 NIR_PASS_V(*consumer, nir_lower_global_vars_to_local); 749 750 /* The backend might not be able to handle indirects on 751 * temporaries so we need to lower indirects on any of the 752 * varyings we have demoted here. 753 */ 754 NIR_PASS_V(*producer, nir_lower_indirect_derefs, 755 brw_nir_no_indirect_mask(compiler, (*producer)->info.stage)); 756 NIR_PASS_V(*consumer, nir_lower_indirect_derefs, 757 brw_nir_no_indirect_mask(compiler, (*consumer)->info.stage)); 758 759 *producer = brw_nir_optimize(*producer, compiler, p_is_scalar, false); 760 *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false); 761 } 762} 763 764/* Prepare the given shader for codegen 765 * 766 * This function is intended to be called right before going into the actual 767 * backend and is highly backend-specific. Also, once this function has been 768 * called on a shader, it will no longer be in SSA form so most optimizations 769 * will not work. 770 */ 771nir_shader * 772brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, 773 bool is_scalar) 774{ 775 const struct gen_device_info *devinfo = compiler->devinfo; 776 bool debug_enabled = 777 (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->info.stage)); 778 779 UNUSED bool progress; /* Written by OPT */ 780 781 782 do { 783 progress = false; 784 OPT(nir_opt_algebraic_before_ffma); 785 } while (progress); 786 787 nir = brw_nir_optimize(nir, compiler, is_scalar, false); 788 789 if (devinfo->gen >= 6) { 790 /* Try and fuse multiply-adds */ 791 OPT(brw_nir_opt_peephole_ffma); 792 } 793 794 OPT(nir_opt_algebraic_late); 795 796 OPT(nir_lower_to_source_mods); 797 OPT(nir_copy_prop); 798 OPT(nir_opt_dce); 799 OPT(nir_opt_move_comparisons); 800 801 OPT(nir_lower_locals_to_regs); 802 803 if (unlikely(debug_enabled)) { 804 /* Re-index SSA defs so we print more sensible numbers. */ 805 nir_foreach_function(function, nir) { 806 if (function->impl) 807 nir_index_ssa_defs(function->impl); 808 } 809 810 fprintf(stderr, "NIR (SSA form) for %s shader:\n", 811 _mesa_shader_stage_to_string(nir->info.stage)); 812 nir_print_shader(nir, stderr); 813 } 814 815 OPT(nir_convert_from_ssa, true); 816 817 if (!is_scalar) { 818 OPT(nir_move_vec_src_uses_to_dest); 819 OPT(nir_lower_vec_to_movs); 820 } 821 822 OPT(nir_opt_dce); 823 824 /* This is the last pass we run before we start emitting stuff. It 825 * determines when we need to insert boolean resolves on Gen <= 5. We 826 * run it last because it stashes data in instr->pass_flags and we don't 827 * want that to be squashed by other NIR passes. 828 */ 829 if (devinfo->gen <= 5) 830 brw_nir_analyze_boolean_resolves(nir); 831 832 nir_sweep(nir); 833 834 if (unlikely(debug_enabled)) { 835 fprintf(stderr, "NIR (final form) for %s shader:\n", 836 _mesa_shader_stage_to_string(nir->info.stage)); 837 nir_print_shader(nir, stderr); 838 } 839 840 return nir; 841} 842 843nir_shader * 844brw_nir_apply_sampler_key(nir_shader *nir, 845 const struct brw_compiler *compiler, 846 const struct brw_sampler_prog_key_data *key_tex, 847 bool is_scalar) 848{ 849 const struct gen_device_info *devinfo = compiler->devinfo; 850 nir_lower_tex_options tex_options = { 0 }; 851 852 /* Iron Lake and prior require lowering of all rectangle textures */ 853 if (devinfo->gen < 6) 854 tex_options.lower_rect = true; 855 856 /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */ 857 if (devinfo->gen < 8) { 858 tex_options.saturate_s = key_tex->gl_clamp_mask[0]; 859 tex_options.saturate_t = key_tex->gl_clamp_mask[1]; 860 tex_options.saturate_r = key_tex->gl_clamp_mask[2]; 861 } 862 863 /* Prior to Haswell, we have to fake texture swizzle */ 864 for (unsigned s = 0; s < MAX_SAMPLERS; s++) { 865 if (key_tex->swizzles[s] == SWIZZLE_NOOP) 866 continue; 867 868 tex_options.swizzle_result |= (1 << s); 869 for (unsigned c = 0; c < 4; c++) 870 tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c); 871 } 872 873 /* Prior to Haswell, we have to lower gradients on shadow samplers */ 874 tex_options.lower_txd_shadow = devinfo->gen < 8 && !devinfo->is_haswell; 875 876 tex_options.lower_y_uv_external = key_tex->y_uv_image_mask; 877 tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask; 878 tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask; 879 tex_options.lower_xy_uxvx_external = key_tex->xy_uxvx_image_mask; 880 881 if (nir_lower_tex(nir, &tex_options)) { 882 nir_validate_shader(nir, "after nir_lower_tex"); 883 nir = brw_nir_optimize(nir, compiler, is_scalar, false); 884 } 885 886 return nir; 887} 888 889enum brw_reg_type 890brw_type_for_nir_type(const struct gen_device_info *devinfo, nir_alu_type type) 891{ 892 switch (type) { 893 case nir_type_uint: 894 case nir_type_uint32: 895 return BRW_REGISTER_TYPE_UD; 896 case nir_type_bool: 897 case nir_type_int: 898 case nir_type_bool32: 899 case nir_type_int32: 900 return BRW_REGISTER_TYPE_D; 901 case nir_type_float: 902 case nir_type_float32: 903 return BRW_REGISTER_TYPE_F; 904 case nir_type_float16: 905 return BRW_REGISTER_TYPE_HF; 906 case nir_type_float64: 907 return BRW_REGISTER_TYPE_DF; 908 case nir_type_int64: 909 return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_Q; 910 case nir_type_uint64: 911 return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_UQ; 912 case nir_type_int16: 913 return BRW_REGISTER_TYPE_W; 914 case nir_type_uint16: 915 return BRW_REGISTER_TYPE_UW; 916 case nir_type_int8: 917 return BRW_REGISTER_TYPE_B; 918 case nir_type_uint8: 919 return BRW_REGISTER_TYPE_UB; 920 default: 921 unreachable("unknown type"); 922 } 923 924 return BRW_REGISTER_TYPE_F; 925} 926 927/* Returns the glsl_base_type corresponding to a nir_alu_type. 928 * This is used by both brw_vec4_nir and brw_fs_nir. 929 */ 930enum glsl_base_type 931brw_glsl_base_type_for_nir_type(nir_alu_type type) 932{ 933 switch (type) { 934 case nir_type_float: 935 case nir_type_float32: 936 return GLSL_TYPE_FLOAT; 937 938 case nir_type_float16: 939 return GLSL_TYPE_FLOAT16; 940 941 case nir_type_float64: 942 return GLSL_TYPE_DOUBLE; 943 944 case nir_type_int: 945 case nir_type_int32: 946 return GLSL_TYPE_INT; 947 948 case nir_type_uint: 949 case nir_type_uint32: 950 return GLSL_TYPE_UINT; 951 952 case nir_type_int16: 953 return GLSL_TYPE_INT16; 954 955 case nir_type_uint16: 956 return GLSL_TYPE_UINT16; 957 958 default: 959 unreachable("bad type"); 960 } 961} 962 963nir_shader * 964brw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler, 965 const nir_shader_compiler_options *options, 966 const struct brw_tcs_prog_key *key) 967{ 968 nir_builder b; 969 nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_TESS_CTRL, 970 options); 971 nir_shader *nir = b.shader; 972 nir_variable *var; 973 nir_intrinsic_instr *load; 974 nir_intrinsic_instr *store; 975 nir_ssa_def *zero = nir_imm_int(&b, 0); 976 nir_ssa_def *invoc_id = 977 nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0); 978 979 nir->info.inputs_read = key->outputs_written & 980 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); 981 nir->info.outputs_written = key->outputs_written; 982 nir->info.tess.tcs_vertices_out = key->input_vertices; 983 nir->info.name = ralloc_strdup(nir, "passthrough"); 984 nir->num_uniforms = 8 * sizeof(uint32_t); 985 986 var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_0"); 987 var->data.location = 0; 988 var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_1"); 989 var->data.location = 1; 990 991 /* Write the patch URB header. */ 992 for (int i = 0; i <= 1; i++) { 993 load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); 994 load->num_components = 4; 995 load->src[0] = nir_src_for_ssa(zero); 996 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); 997 nir_intrinsic_set_base(load, i * 4 * sizeof(uint32_t)); 998 nir_builder_instr_insert(&b, &load->instr); 999 1000 store = nir_intrinsic_instr_create(nir, nir_intrinsic_store_output); 1001 store->num_components = 4; 1002 store->src[0] = nir_src_for_ssa(&load->dest.ssa); 1003 store->src[1] = nir_src_for_ssa(zero); 1004 nir_intrinsic_set_base(store, VARYING_SLOT_TESS_LEVEL_INNER - i); 1005 nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW); 1006 nir_builder_instr_insert(&b, &store->instr); 1007 } 1008 1009 /* Copy inputs to outputs. */ 1010 uint64_t varyings = nir->info.inputs_read; 1011 1012 while (varyings != 0) { 1013 const int varying = ffsll(varyings) - 1; 1014 1015 load = nir_intrinsic_instr_create(nir, 1016 nir_intrinsic_load_per_vertex_input); 1017 load->num_components = 4; 1018 load->src[0] = nir_src_for_ssa(invoc_id); 1019 load->src[1] = nir_src_for_ssa(zero); 1020 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); 1021 nir_intrinsic_set_base(load, varying); 1022 nir_builder_instr_insert(&b, &load->instr); 1023 1024 store = nir_intrinsic_instr_create(nir, 1025 nir_intrinsic_store_per_vertex_output); 1026 store->num_components = 4; 1027 store->src[0] = nir_src_for_ssa(&load->dest.ssa); 1028 store->src[1] = nir_src_for_ssa(invoc_id); 1029 store->src[2] = nir_src_for_ssa(zero); 1030 nir_intrinsic_set_base(store, varying); 1031 nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW); 1032 nir_builder_instr_insert(&b, &store->instr); 1033 1034 varyings &= ~BITFIELD64_BIT(varying); 1035 } 1036 1037 nir_validate_shader(nir, "in brw_nir_create_passthrough_tcs"); 1038 1039 nir = brw_preprocess_nir(compiler, nir); 1040 1041 return nir; 1042} 1043