zink_compiler.c revision 7ec681f3
1/* 2 * Copyright 2018 Collabora Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "zink_context.h" 25#include "zink_compiler.h" 26#include "zink_program.h" 27#include "zink_screen.h" 28#include "nir_to_spirv/nir_to_spirv.h" 29 30#include "pipe/p_state.h" 31 32#include "nir.h" 33#include "compiler/nir/nir_builder.h" 34 35#include "nir/tgsi_to_nir.h" 36#include "tgsi/tgsi_dump.h" 37#include "tgsi/tgsi_from_mesa.h" 38 39#include "util/u_memory.h" 40 41static void 42create_vs_pushconst(nir_shader *nir) 43{ 44 nir_variable *vs_pushconst; 45 /* create compatible layout for the ntv push constant loader */ 46 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2); 47 fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0); 48 fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed"); 49 fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed); 50 fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0); 51 fields[1].name = ralloc_asprintf(nir, "draw_id"); 52 fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id); 53 vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const, 54 glsl_struct_type(fields, 2, "struct", false), "vs_pushconst"); 55 vs_pushconst->data.location = INT_MAX; //doesn't really matter 56} 57 58static void 59create_cs_pushconst(nir_shader *nir) 60{ 61 nir_variable *cs_pushconst; 62 /* create compatible layout for the ntv push constant loader */ 63 struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field)); 64 fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0); 65 fields[0].name = ralloc_asprintf(nir, "work_dim"); 66 fields[0].offset = 0; 67 cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const, 68 glsl_struct_type(fields, 1, "struct", false), "cs_pushconst"); 69 cs_pushconst->data.location = INT_MAX; //doesn't really matter 70} 71 72static bool 73reads_work_dim(nir_shader *shader) 74{ 75 return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM); 76} 77 78static bool 79lower_discard_if_instr(nir_builder *b, nir_instr *instr_, UNUSED void *cb_data) 80{ 81 if (instr_->type != nir_instr_type_intrinsic) 82 return false; 83 84 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(instr_); 85 86 if (instr->intrinsic == nir_intrinsic_discard_if) { 87 b->cursor = nir_before_instr(&instr->instr); 88 89 nir_if *if_stmt = nir_push_if(b, nir_ssa_for_src(b, instr->src[0], 1)); 90 nir_discard(b); 91 nir_pop_if(b, if_stmt); 92 nir_instr_remove(&instr->instr); 93 return true; 94 } 95 /* a shader like this (shaders@glsl-fs-discard-04): 96 97 uniform int j, k; 98 99 void main() 100 { 101 for (int i = 0; i < j; i++) { 102 if (i > k) 103 continue; 104 discard; 105 } 106 gl_FragColor = vec4(0.0, 1.0, 0.0, 0.0); 107 } 108 109 110 111 will generate nir like: 112 113 loop { 114 //snip 115 if ssa_11 { 116 block block_5: 117 / preds: block_4 / 118 vec1 32 ssa_17 = iadd ssa_50, ssa_31 119 / succs: block_7 / 120 } else { 121 block block_6: 122 / preds: block_4 / 123 intrinsic discard () () <-- not last instruction 124 vec1 32 ssa_23 = iadd ssa_50, ssa_31 <-- dead code loop itr increment 125 / succs: block_7 / 126 } 127 //snip 128 } 129 130 which means that we can't assert like this: 131 132 assert(instr->intrinsic != nir_intrinsic_discard || 133 nir_block_last_instr(instr->instr.block) == &instr->instr); 134 135 136 and it's unnecessary anyway since post-vtn optimizing will dce the instructions following the discard 137 */ 138 139 return false; 140} 141 142static bool 143lower_discard_if(nir_shader *shader) 144{ 145 return nir_shader_instructions_pass(shader, 146 lower_discard_if_instr, 147 nir_metadata_dominance, 148 NULL); 149} 150 151static bool 152lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data) 153{ 154 if (in->type != nir_instr_type_intrinsic) 155 return false; 156 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); 157 if (instr->intrinsic != nir_intrinsic_load_work_dim) 158 return false; 159 160 if (instr->intrinsic == nir_intrinsic_load_work_dim) { 161 b->cursor = nir_after_instr(&instr->instr); 162 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); 163 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); 164 nir_intrinsic_set_range(load, 3 * sizeof(uint32_t)); 165 load->num_components = 1; 166 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim"); 167 nir_builder_instr_insert(b, &load->instr); 168 169 nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa); 170 } 171 172 return true; 173} 174 175static bool 176lower_work_dim(nir_shader *shader) 177{ 178 if (shader->info.stage != MESA_SHADER_KERNEL) 179 return false; 180 181 if (!reads_work_dim(shader)) 182 return false; 183 184 return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL); 185} 186 187static bool 188lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data) 189{ 190 if (instr->type != nir_instr_type_deref) 191 return false; 192 nir_deref_instr *deref = nir_instr_as_deref(instr); 193 if (deref->deref_type != nir_deref_type_var) 194 return false; 195 nir_variable *var = nir_deref_instr_get_variable(deref); 196 if (var->data.mode != nir_var_shader_in) 197 return false; 198 if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3) 199 return false; 200 201 /* create second variable for the split */ 202 nir_variable *var2 = nir_variable_clone(var, b->shader); 203 /* split new variable into second slot */ 204 var2->data.driver_location++; 205 nir_shader_add_variable(b->shader, var2); 206 207 unsigned total_num_components = glsl_get_vector_elements(var->type); 208 /* new variable is the second half of the dvec */ 209 var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2); 210 /* clamp original variable to a dvec2 */ 211 deref->type = var->type = glsl_vector_type(glsl_get_base_type(var->type), 2); 212 213 /* create deref instr for new variable */ 214 b->cursor = nir_after_instr(instr); 215 nir_deref_instr *deref2 = nir_build_deref_var(b, var2); 216 217 nir_foreach_use_safe(use_src, &deref->dest.ssa) { 218 nir_instr *use_instr = use_src->parent_instr; 219 assert(use_instr->type == nir_instr_type_intrinsic && 220 nir_instr_as_intrinsic(use_instr)->intrinsic == nir_intrinsic_load_deref); 221 222 /* this is a load instruction for the deref, and we need to split it into two instructions that we can 223 * then zip back into a single ssa def */ 224 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr); 225 /* clamp the first load to 2 64bit components */ 226 intr->num_components = intr->dest.ssa.num_components = 2; 227 b->cursor = nir_after_instr(use_instr); 228 /* this is the second load instruction for the second half of the dvec3/4 components */ 229 nir_intrinsic_instr *intr2 = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref); 230 intr2->src[0] = nir_src_for_ssa(&deref2->dest.ssa); 231 intr2->num_components = total_num_components - 2; 232 nir_ssa_dest_init(&intr2->instr, &intr2->dest, intr2->num_components, 64, NULL); 233 nir_builder_instr_insert(b, &intr2->instr); 234 235 nir_ssa_def *def[4]; 236 /* create a new dvec3/4 comprised of all the loaded components from both variables */ 237 def[0] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 0)); 238 def[1] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 1)); 239 def[2] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 0)); 240 if (total_num_components == 4) 241 def[3] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 1)); 242 nir_ssa_def *new_vec = nir_vec(b, def, total_num_components); 243 /* use the assembled dvec3/4 for all other uses of the load */ 244 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec, 245 new_vec->parent_instr); 246 } 247 248 return true; 249} 250 251/* "64-bit three- and four-component vectors consume two consecutive locations." 252 * - 14.1.4. Location Assignment 253 * 254 * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which 255 * are assigned to consecutive locations, loaded separately, and then assembled back into a 256 * composite value that's used in place of the original loaded ssa src 257 */ 258static bool 259lower_64bit_vertex_attribs(nir_shader *shader) 260{ 261 if (shader->info.stage != MESA_SHADER_VERTEX) 262 return false; 263 264 return nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL); 265} 266 267static bool 268lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data) 269{ 270 if (in->type != nir_instr_type_intrinsic) 271 return false; 272 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); 273 if (instr->intrinsic != nir_intrinsic_load_base_vertex) 274 return false; 275 276 b->cursor = nir_after_instr(&instr->instr); 277 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); 278 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); 279 nir_intrinsic_set_range(load, 4); 280 load->num_components = 1; 281 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed"); 282 nir_builder_instr_insert(b, &load->instr); 283 284 nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel, 285 nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL), 286 &instr->dest.ssa, 287 nir_imm_int(b, 0), 288 NULL); 289 290 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite, 291 composite->parent_instr); 292 return true; 293} 294 295static bool 296lower_basevertex(nir_shader *shader) 297{ 298 if (shader->info.stage != MESA_SHADER_VERTEX) 299 return false; 300 301 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX)) 302 return false; 303 304 return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL); 305} 306 307 308static bool 309lower_drawid_instr(nir_builder *b, nir_instr *in, void *data) 310{ 311 if (in->type != nir_instr_type_intrinsic) 312 return false; 313 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); 314 if (instr->intrinsic != nir_intrinsic_load_draw_id) 315 return false; 316 317 b->cursor = nir_before_instr(&instr->instr); 318 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); 319 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1)); 320 nir_intrinsic_set_range(load, 4); 321 load->num_components = 1; 322 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id"); 323 nir_builder_instr_insert(b, &load->instr); 324 325 nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa); 326 327 return true; 328} 329 330static bool 331lower_drawid(nir_shader *shader) 332{ 333 if (shader->info.stage != MESA_SHADER_VERTEX) 334 return false; 335 336 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID)) 337 return false; 338 339 return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL); 340} 341 342static bool 343lower_dual_blend(nir_shader *shader) 344{ 345 bool progress = false; 346 nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1); 347 if (var) { 348 var->data.location = FRAG_RESULT_DATA0; 349 var->data.index = 1; 350 progress = true; 351 } 352 nir_shader_preserve_all_metadata(shader); 353 return progress; 354} 355 356void 357zink_screen_init_compiler(struct zink_screen *screen) 358{ 359 static const struct nir_shader_compiler_options 360 default_options = { 361 .lower_ffma16 = true, 362 .lower_ffma32 = true, 363 .lower_ffma64 = true, 364 .lower_scmp = true, 365 .lower_fdph = true, 366 .lower_flrp32 = true, 367 .lower_fpow = true, 368 .lower_fsat = true, 369 .lower_extract_byte = true, 370 .lower_extract_word = true, 371 .lower_insert_byte = true, 372 .lower_insert_word = true, 373 .lower_mul_high = true, 374 .lower_rotate = true, 375 .lower_uadd_carry = true, 376 .lower_pack_64_2x32_split = true, 377 .lower_unpack_64_2x32_split = true, 378 .lower_pack_32_2x16_split = true, 379 .lower_unpack_32_2x16_split = true, 380 .lower_vector_cmp = true, 381 .lower_int64_options = 0, 382 .lower_doubles_options = ~nir_lower_fp64_full_software, 383 .lower_uniforms_to_ubo = true, 384 .has_fsub = true, 385 .has_isub = true, 386 .lower_mul_2x32_64 = true, 387 .support_16bit_alu = true, /* not quite what it sounds like */ 388 }; 389 390 screen->nir_options = default_options; 391 392 if (!screen->info.feats.features.shaderInt64) 393 screen->nir_options.lower_int64_options = ~0; 394 395 if (!screen->info.feats.features.shaderFloat64) { 396 screen->nir_options.lower_doubles_options = ~0; 397 screen->nir_options.lower_flrp64 = true; 398 screen->nir_options.lower_ffma64 = true; 399 } 400} 401 402const void * 403zink_get_compiler_options(struct pipe_screen *pscreen, 404 enum pipe_shader_ir ir, 405 enum pipe_shader_type shader) 406{ 407 assert(ir == PIPE_SHADER_IR_NIR); 408 return &zink_screen(pscreen)->nir_options; 409} 410 411struct nir_shader * 412zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens) 413{ 414 if (zink_debug & ZINK_DEBUG_TGSI) { 415 fprintf(stderr, "TGSI shader:\n---8<---\n"); 416 tgsi_dump_to_file(tokens, 0, stderr); 417 fprintf(stderr, "---8<---\n\n"); 418 } 419 420 return tgsi_to_nir(tokens, screen, false); 421} 422 423static void 424optimize_nir(struct nir_shader *s) 425{ 426 bool progress; 427 do { 428 progress = false; 429 NIR_PASS_V(s, nir_lower_vars_to_ssa); 430 NIR_PASS(progress, s, nir_copy_prop); 431 NIR_PASS(progress, s, nir_opt_remove_phis); 432 NIR_PASS(progress, s, nir_opt_dce); 433 NIR_PASS(progress, s, nir_opt_dead_cf); 434 NIR_PASS(progress, s, nir_opt_cse); 435 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); 436 NIR_PASS(progress, s, nir_opt_algebraic); 437 NIR_PASS(progress, s, nir_opt_constant_folding); 438 NIR_PASS(progress, s, nir_opt_undef); 439 NIR_PASS(progress, s, zink_nir_lower_b2b); 440 } while (progress); 441 442 do { 443 progress = false; 444 NIR_PASS(progress, s, nir_opt_algebraic_late); 445 if (progress) { 446 NIR_PASS_V(s, nir_copy_prop); 447 NIR_PASS_V(s, nir_opt_dce); 448 NIR_PASS_V(s, nir_opt_cse); 449 } 450 } while (progress); 451} 452 453/* - copy the lowered fbfetch variable 454 * - set the new one up as an input attachment for descriptor 0.6 455 * - load it as an image 456 * - overwrite the previous load 457 */ 458static bool 459lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data) 460{ 461 if (instr->type != nir_instr_type_intrinsic) 462 return false; 463 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 464 if (intr->intrinsic != nir_intrinsic_load_deref) 465 return false; 466 nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); 467 if (var != data) 468 return false; 469 b->cursor = nir_after_instr(instr); 470 nir_variable *fbfetch = nir_variable_clone(data, b->shader); 471 /* If Dim is SubpassData, ... Image Format must be Unknown 472 * - SPIRV OpTypeImage specification 473 */ 474 fbfetch->data.image.format = 0; 475 fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */ 476 fbfetch->data.mode = nir_var_uniform; 477 fbfetch->data.binding = ZINK_FBFETCH_BINDING; 478 fbfetch->type = glsl_image_type(GLSL_SAMPLER_DIM_SUBPASS, false, GLSL_TYPE_FLOAT); 479 nir_shader_add_variable(b->shader, fbfetch); 480 nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa; 481 nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), nir_ssa_undef(b, 1, 32), nir_imm_int(b, 0)); 482 unsigned swiz[4] = {2, 1, 0, 3}; 483 nir_ssa_def *swizzle = nir_swizzle(b, load, swiz, 4); 484 nir_ssa_def_rewrite_uses(&intr->dest.ssa, swizzle); 485 return true; 486} 487 488static bool 489lower_fbfetch(nir_shader *shader, nir_variable **fbfetch) 490{ 491 nir_foreach_shader_out_variable(var, shader) { 492 if (var->data.fb_fetch_output) { 493 *fbfetch = var; 494 break; 495 } 496 } 497 assert(*fbfetch); 498 if (!*fbfetch) 499 return false; 500 return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, *fbfetch); 501} 502 503/* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */ 504static bool 505check_psiz(struct nir_shader *s) 506{ 507 nir_foreach_shader_out_variable(var, s) { 508 if (var->data.location == VARYING_SLOT_PSIZ) { 509 /* genuine PSIZ outputs will have this set */ 510 return !!var->data.explicit_location; 511 } 512 } 513 return false; 514} 515 516static void 517update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info, 518 uint64_t outputs_written, bool have_psiz) 519{ 520 uint8_t reverse_map[64] = {0}; 521 unsigned slot = 0; 522 /* semi-copied from iris */ 523 while (outputs_written) { 524 int bit = u_bit_scan64(&outputs_written); 525 /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */ 526 if (bit == VARYING_SLOT_PSIZ && !have_psiz) 527 continue; 528 reverse_map[slot++] = bit; 529 } 530 531 nir_foreach_shader_out_variable(var, zs->nir) 532 var->data.explicit_xfb_buffer = 0; 533 534 bool inlined[64] = {0}; 535 for (unsigned i = 0; i < so_info->num_outputs; i++) { 536 const struct pipe_stream_output *output = &so_info->output[i]; 537 unsigned slot = reverse_map[output->register_index]; 538 /* always set stride to be used during draw */ 539 zs->streamout.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer]; 540 if ((zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) && 541 !output->start_component) { 542 nir_variable *var = NULL; 543 while (!var) 544 var = nir_find_variable_with_location(zs->nir, nir_var_shader_out, slot--); 545 slot++; 546 if (inlined[slot]) 547 continue; 548 assert(var && var->data.location == slot); 549 /* if this is the entire variable, try to blast it out during the initial declaration */ 550 if (glsl_get_components(var->type) == output->num_components) { 551 var->data.explicit_xfb_buffer = 1; 552 var->data.xfb.buffer = output->output_buffer; 553 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4; 554 var->data.offset = output->dst_offset * 4; 555 var->data.stream = output->stream; 556 inlined[slot] = true; 557 continue; 558 } 559 } 560 zs->streamout.so_info.output[zs->streamout.so_info.num_outputs] = *output; 561 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ 562 zs->streamout.so_info_slots[zs->streamout.so_info.num_outputs++] = reverse_map[output->register_index]; 563 } 564 zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs; 565} 566 567struct decompose_state { 568 nir_variable **split; 569 bool needs_w; 570}; 571 572static bool 573lower_attrib(nir_builder *b, nir_instr *instr, void *data) 574{ 575 struct decompose_state *state = data; 576 nir_variable **split = state->split; 577 if (instr->type != nir_instr_type_intrinsic) 578 return false; 579 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 580 if (intr->intrinsic != nir_intrinsic_load_deref) 581 return false; 582 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 583 nir_variable *var = nir_deref_instr_get_variable(deref); 584 if (var != split[0]) 585 return false; 586 unsigned num_components = glsl_get_vector_elements(split[0]->type); 587 b->cursor = nir_after_instr(instr); 588 nir_ssa_def *loads[4]; 589 for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++) 590 loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1])); 591 if (state->needs_w) { 592 /* oob load w comopnent to get correct value for int/float */ 593 loads[3] = nir_channel(b, loads[0], 3); 594 loads[0] = nir_channel(b, loads[0], 0); 595 } 596 nir_ssa_def *new_load = nir_vec(b, loads, num_components); 597 nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load); 598 nir_instr_remove_v(instr); 599 return true; 600} 601 602static bool 603decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w) 604{ 605 uint32_t bits = 0; 606 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) 607 bits |= BITFIELD_BIT(var->data.driver_location); 608 bits = ~bits; 609 u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) { 610 nir_variable *split[5]; 611 struct decompose_state state; 612 state.split = split; 613 nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location); 614 assert(var); 615 split[0] = var; 616 bits |= BITFIELD_BIT(var->data.driver_location); 617 const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type); 618 unsigned num_components = glsl_get_vector_elements(var->type); 619 state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4; 620 for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) { 621 split[i+1] = nir_variable_clone(var, nir); 622 split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i); 623 if (decomposed_attrs_without_w & BITFIELD_BIT(location)) 624 split[i+1]->type = !i && num_components == 4 ? var->type : new_type; 625 else 626 split[i+1]->type = new_type; 627 split[i+1]->data.driver_location = ffs(bits) - 1; 628 bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location); 629 nir_shader_add_variable(nir, split[i+1]); 630 } 631 var->data.mode = nir_var_shader_temp; 632 nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state); 633 } 634 nir_fixup_deref_modes(nir); 635 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); 636 optimize_nir(nir); 637 return true; 638} 639 640static void 641assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map) 642{ 643 unsigned slot = var->data.location; 644 switch (var->data.location) { 645 case VARYING_SLOT_POS: 646 case VARYING_SLOT_PNTC: 647 case VARYING_SLOT_PSIZ: 648 case VARYING_SLOT_LAYER: 649 case VARYING_SLOT_PRIMITIVE_ID: 650 case VARYING_SLOT_CLIP_DIST0: 651 case VARYING_SLOT_CULL_DIST0: 652 case VARYING_SLOT_VIEWPORT: 653 case VARYING_SLOT_FACE: 654 case VARYING_SLOT_TESS_LEVEL_OUTER: 655 case VARYING_SLOT_TESS_LEVEL_INNER: 656 /* use a sentinel value to avoid counting later */ 657 var->data.driver_location = UINT_MAX; 658 break; 659 660 default: 661 if (var->data.patch) { 662 assert(var->data.location >= VARYING_SLOT_PATCH0); 663 slot = var->data.location - VARYING_SLOT_PATCH0; 664 } else if (var->data.location >= VARYING_SLOT_VAR0 && 665 var->data.mode == nir_var_shader_in && 666 stage == MESA_SHADER_TESS_EVAL) { 667 slot = var->data.location - VARYING_SLOT_VAR0; 668 } else { 669 if (slot_map[var->data.location] == 0xff) { 670 assert(*reserved < MAX_VARYING); 671 slot_map[var->data.location] = *reserved; 672 *reserved += glsl_count_vec4_slots(var->type, false, false); 673 } 674 slot = slot_map[var->data.location]; 675 assert(slot < MAX_VARYING); 676 } 677 var->data.driver_location = slot; 678 } 679} 680 681ALWAYS_INLINE static bool 682is_texcoord(gl_shader_stage stage, const nir_variable *var) 683{ 684 if (stage != MESA_SHADER_FRAGMENT) 685 return false; 686 return var->data.location >= VARYING_SLOT_TEX0 && 687 var->data.location <= VARYING_SLOT_TEX7; 688} 689 690static bool 691assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map) 692{ 693 switch (var->data.location) { 694 case VARYING_SLOT_POS: 695 case VARYING_SLOT_PNTC: 696 case VARYING_SLOT_PSIZ: 697 case VARYING_SLOT_LAYER: 698 case VARYING_SLOT_PRIMITIVE_ID: 699 case VARYING_SLOT_CLIP_DIST0: 700 case VARYING_SLOT_CULL_DIST0: 701 case VARYING_SLOT_VIEWPORT: 702 case VARYING_SLOT_FACE: 703 case VARYING_SLOT_TESS_LEVEL_OUTER: 704 case VARYING_SLOT_TESS_LEVEL_INNER: 705 /* use a sentinel value to avoid counting later */ 706 var->data.driver_location = UINT_MAX; 707 break; 708 default: 709 if (var->data.patch) { 710 assert(var->data.location >= VARYING_SLOT_PATCH0); 711 var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0; 712 } else if (var->data.location >= VARYING_SLOT_VAR0 && 713 stage == MESA_SHADER_TESS_CTRL && 714 var->data.mode == nir_var_shader_out) 715 var->data.driver_location = var->data.location - VARYING_SLOT_VAR0; 716 else { 717 if (slot_map[var->data.location] == (unsigned char)-1) { 718 if (!is_texcoord(stage, var)) 719 /* dead io */ 720 return false; 721 /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE */ 722 slot_map[var->data.location] = (*reserved)++; 723 } 724 var->data.driver_location = slot_map[var->data.location]; 725 } 726 } 727 return true; 728} 729 730 731static bool 732rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data) 733{ 734 nir_variable *var = data; 735 if (instr->type != nir_instr_type_intrinsic) 736 return false; 737 738 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 739 if (intr->intrinsic != nir_intrinsic_load_deref) 740 return false; 741 nir_variable *deref_var = nir_intrinsic_get_var(intr, 0); 742 if (deref_var != var) 743 return false; 744 nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest)); 745 nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef); 746 return true; 747} 748 749void 750zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer) 751{ 752 unsigned reserved = 0; 753 unsigned char slot_map[VARYING_SLOT_MAX]; 754 memset(slot_map, -1, sizeof(slot_map)); 755 bool do_fixup = false; 756 nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer; 757 if (producer->info.stage == MESA_SHADER_TESS_CTRL) { 758 /* never assign from tcs -> tes, always invert */ 759 nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in) 760 assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map); 761 nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) { 762 if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map)) 763 /* this is an output, nothing more needs to be done for it to be dropped */ 764 do_fixup = true; 765 } 766 } else { 767 nir_foreach_variable_with_modes(var, producer, nir_var_shader_out) 768 assign_producer_var_io(producer->info.stage, var, &reserved, slot_map); 769 nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) { 770 if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) { 771 do_fixup = true; 772 /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */ 773 nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var); 774 } 775 } 776 } 777 if (!do_fixup) 778 return; 779 nir_fixup_deref_modes(nir); 780 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); 781 optimize_nir(nir); 782} 783 784VkShaderModule 785zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key) 786{ 787 VkShaderModule mod = VK_NULL_HANDLE; 788 void *streamout = NULL; 789 nir_shader *nir = nir_shader_clone(NULL, base_nir); 790 791 if (key) { 792 if (key->inline_uniforms) { 793 NIR_PASS_V(nir, nir_inline_uniforms, 794 nir->info.num_inlinable_uniforms, 795 key->base.inlined_uniform_values, 796 nir->info.inlinable_uniform_dw_offsets); 797 798 optimize_nir(nir); 799 800 /* This must be done again. */ 801 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | 802 nir_var_shader_out); 803 } 804 805 /* TODO: use a separate mem ctx here for ralloc */ 806 switch (zs->nir->info.stage) { 807 case MESA_SHADER_VERTEX: { 808 uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0; 809 const struct zink_vs_key *vs_key = zink_vs_key(key); 810 switch (vs_key->size) { 811 case 4: 812 decomposed_attrs = vs_key->u32.decomposed_attrs; 813 decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w; 814 break; 815 case 2: 816 decomposed_attrs = vs_key->u16.decomposed_attrs; 817 decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w; 818 break; 819 case 1: 820 decomposed_attrs = vs_key->u8.decomposed_attrs; 821 decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w; 822 break; 823 default: break; 824 } 825 if (decomposed_attrs || decomposed_attrs_without_w) 826 NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w); 827 FALLTHROUGH; 828 } 829 case MESA_SHADER_TESS_EVAL: 830 case MESA_SHADER_GEOMETRY: 831 if (zink_vs_key_base(key)->last_vertex_stage) { 832 if (zs->streamout.have_xfb) 833 streamout = &zs->streamout; 834 835 if (!zink_vs_key_base(key)->clip_halfz) { 836 NIR_PASS_V(nir, nir_lower_clip_halfz); 837 } 838 if (zink_vs_key_base(key)->push_drawid) { 839 NIR_PASS_V(nir, lower_drawid); 840 } 841 } 842 break; 843 case MESA_SHADER_FRAGMENT: 844 if (!zink_fs_key(key)->samples && 845 nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) { 846 /* VK will always use gl_SampleMask[] values even if sample count is 0, 847 * so we need to skip this write here to mimic GL's behavior of ignoring it 848 */ 849 nir_foreach_shader_out_variable(var, nir) { 850 if (var->data.location == FRAG_RESULT_SAMPLE_MASK) 851 var->data.mode = nir_var_shader_temp; 852 } 853 nir_fixup_deref_modes(nir); 854 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); 855 optimize_nir(nir); 856 } 857 if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) { 858 NIR_PASS_V(nir, lower_dual_blend); 859 } 860 if (zink_fs_key(key)->coord_replace_bits) { 861 NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits, 862 false, zink_fs_key(key)->coord_replace_yinvert); 863 } 864 if (nir->info.fs.uses_fbfetch_output) { 865 nir_variable *fbfetch = NULL; 866 NIR_PASS_V(nir, lower_fbfetch, &fbfetch); 867 /* old variable must be deleted to avoid spirv errors */ 868 fbfetch->data.mode = nir_var_shader_temp; 869 nir_fixup_deref_modes(nir); 870 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); 871 optimize_nir(nir); 872 } 873 break; 874 default: break; 875 } 876 } 877 NIR_PASS_V(nir, nir_convert_from_ssa, true); 878 879 struct spirv_shader *spirv = nir_to_spirv(nir, streamout, screen->spirv_version); 880 if (!spirv) 881 goto done; 882 883 if (zink_debug & ZINK_DEBUG_SPIRV) { 884 char buf[256]; 885 static int i; 886 snprintf(buf, sizeof(buf), "dump%02d.spv", i++); 887 FILE *fp = fopen(buf, "wb"); 888 if (fp) { 889 fwrite(spirv->words, sizeof(uint32_t), spirv->num_words, fp); 890 fclose(fp); 891 fprintf(stderr, "wrote '%s'...\n", buf); 892 } 893 } 894 895 VkShaderModuleCreateInfo smci = {0}; 896 smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 897 smci.codeSize = spirv->num_words * sizeof(uint32_t); 898 smci.pCode = spirv->words; 899 900 if (VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod) != VK_SUCCESS) 901 mod = VK_NULL_HANDLE; 902 903done: 904 ralloc_free(nir); 905 906 /* TODO: determine if there's any reason to cache spirv output? */ 907 ralloc_free(spirv); 908 return mod; 909} 910 911static bool 912lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data) 913{ 914 if (instr->type != nir_instr_type_intrinsic) 915 return false; 916 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 917 if (intr->intrinsic != nir_intrinsic_load_instance_id) 918 return false; 919 b->cursor = nir_after_instr(instr); 920 nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b)); 921 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr); 922 return true; 923} 924 925static bool 926lower_baseinstance(nir_shader *shader) 927{ 928 if (shader->info.stage != MESA_SHADER_VERTEX) 929 return false; 930 return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL); 931} 932 933bool nir_lower_dynamic_bo_access(nir_shader *shader); 934 935/* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access 936 * so instead we delete all those broken variables and just make new ones 937 */ 938static bool 939unbreak_bos(nir_shader *shader) 940{ 941 uint32_t ssbo_used = 0; 942 uint32_t ubo_used = 0; 943 uint64_t max_ssbo_size = 0; 944 uint64_t max_ubo_size = 0; 945 bool ssbo_sizes[PIPE_MAX_SHADER_BUFFERS] = {false}; 946 947 if (!shader->info.num_ssbos && !shader->info.num_ubos && !shader->num_uniforms) 948 return false; 949 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 950 nir_foreach_block(block, impl) { 951 nir_foreach_instr(instr, block) { 952 if (instr->type != nir_instr_type_intrinsic) 953 continue; 954 955 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 956 switch (intrin->intrinsic) { 957 case nir_intrinsic_store_ssbo: 958 ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[1])); 959 break; 960 961 case nir_intrinsic_get_ssbo_size: { 962 uint32_t slot = nir_src_as_uint(intrin->src[0]); 963 ssbo_used |= BITFIELD_BIT(slot); 964 ssbo_sizes[slot] = true; 965 break; 966 } 967 case nir_intrinsic_ssbo_atomic_add: 968 case nir_intrinsic_ssbo_atomic_imin: 969 case nir_intrinsic_ssbo_atomic_umin: 970 case nir_intrinsic_ssbo_atomic_imax: 971 case nir_intrinsic_ssbo_atomic_umax: 972 case nir_intrinsic_ssbo_atomic_and: 973 case nir_intrinsic_ssbo_atomic_or: 974 case nir_intrinsic_ssbo_atomic_xor: 975 case nir_intrinsic_ssbo_atomic_exchange: 976 case nir_intrinsic_ssbo_atomic_comp_swap: 977 case nir_intrinsic_ssbo_atomic_fmin: 978 case nir_intrinsic_ssbo_atomic_fmax: 979 case nir_intrinsic_ssbo_atomic_fcomp_swap: 980 case nir_intrinsic_load_ssbo: 981 ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0])); 982 break; 983 case nir_intrinsic_load_ubo: 984 case nir_intrinsic_load_ubo_vec4: 985 ubo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0])); 986 break; 987 default: 988 break; 989 } 990 } 991 } 992 993 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) { 994 const struct glsl_type *type = glsl_without_array(var->type); 995 if (type_is_counter(type)) 996 continue; 997 unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false); 998 if (var->data.mode == nir_var_mem_ubo) 999 max_ubo_size = MAX2(max_ubo_size, size); 1000 else 1001 max_ssbo_size = MAX2(max_ssbo_size, size); 1002 var->data.mode = nir_var_shader_temp; 1003 } 1004 nir_fixup_deref_modes(shader); 1005 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL); 1006 optimize_nir(shader); 1007 1008 if (!ssbo_used && !ubo_used) 1009 return false; 1010 1011 struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2); 1012 fields[0].name = ralloc_strdup(shader, "base"); 1013 fields[1].name = ralloc_strdup(shader, "unsized"); 1014 if (ubo_used) { 1015 const struct glsl_type *ubo_type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4); 1016 fields[0].type = ubo_type; 1017 u_foreach_bit(slot, ubo_used) { 1018 char buf[64]; 1019 snprintf(buf, sizeof(buf), "ubo_slot_%u", slot); 1020 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, glsl_struct_type(fields, 1, "struct", false), buf); 1021 var->interface_type = var->type; 1022 var->data.driver_location = slot; 1023 } 1024 } 1025 if (ssbo_used) { 1026 const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4); 1027 const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4); 1028 fields[0].type = ssbo_type; 1029 u_foreach_bit(slot, ssbo_used) { 1030 char buf[64]; 1031 snprintf(buf, sizeof(buf), "ssbo_slot_%u", slot); 1032 if (ssbo_sizes[slot]) 1033 fields[1].type = unsized; 1034 else 1035 fields[1].type = NULL; 1036 nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo, 1037 glsl_struct_type(fields, 1 + !!ssbo_sizes[slot], "struct", false), buf); 1038 var->interface_type = var->type; 1039 var->data.driver_location = slot; 1040 } 1041 } 1042 return true; 1043} 1044 1045/* this is a "default" bindless texture used if the shader has no texture variables */ 1046static nir_variable * 1047create_bindless_texture(nir_shader *nir, nir_tex_instr *tex) 1048{ 1049 unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0; 1050 nir_variable *var; 1051 1052 const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT); 1053 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture"); 1054 var->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS; 1055 var->data.driver_location = var->data.binding = binding; 1056 return var; 1057} 1058 1059/* this is a "default" bindless image used if the shader has no image variables */ 1060static nir_variable * 1061create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim) 1062{ 1063 unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2; 1064 nir_variable *var; 1065 1066 const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); 1067 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image"); 1068 var->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS; 1069 var->data.driver_location = var->data.binding = binding; 1070 var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM; 1071 return var; 1072} 1073 1074/* rewrite bindless instructions as array deref instructions */ 1075static bool 1076lower_bindless_instr(nir_builder *b, nir_instr *in, void *data) 1077{ 1078 nir_variable **bindless = data; 1079 1080 if (in->type == nir_instr_type_tex) { 1081 nir_tex_instr *tex = nir_instr_as_tex(in); 1082 int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle); 1083 if (idx == -1) 1084 return false; 1085 1086 nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless[1] : bindless[0]; 1087 if (!var) 1088 var = create_bindless_texture(b->shader, tex); 1089 b->cursor = nir_before_instr(in); 1090 nir_deref_instr *deref = nir_build_deref_var(b, var); 1091 if (glsl_type_is_array(var->type)) 1092 deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32)); 1093 nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa); 1094 1095 /* bindless sampling uses the variable type directly, which means the tex instr has to exactly 1096 * match up with it in contrast to normal sampler ops where things are a bit more flexible; 1097 * this results in cases where a shader is passed with sampler2DArray but the tex instr only has 1098 * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors 1099 * 1100 * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing 1101 * - Warhammer 40k: Dawn of War III 1102 */ 1103 unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type)); 1104 unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord); 1105 unsigned coord_components = nir_src_num_components(tex->src[c].src); 1106 if (coord_components < needed_components) { 1107 nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components); 1108 nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def); 1109 tex->coord_components = needed_components; 1110 } 1111 return true; 1112 } 1113 if (in->type != nir_instr_type_intrinsic) 1114 return false; 1115 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); 1116 1117 nir_intrinsic_op op; 1118#define OP_SWAP(OP) \ 1119 case nir_intrinsic_bindless_image_##OP: \ 1120 op = nir_intrinsic_image_deref_##OP; \ 1121 break; 1122 1123 1124 /* convert bindless intrinsics to deref intrinsics */ 1125 switch (instr->intrinsic) { 1126 OP_SWAP(atomic_add) 1127 OP_SWAP(atomic_and) 1128 OP_SWAP(atomic_comp_swap) 1129 OP_SWAP(atomic_dec_wrap) 1130 OP_SWAP(atomic_exchange) 1131 OP_SWAP(atomic_fadd) 1132 OP_SWAP(atomic_fmax) 1133 OP_SWAP(atomic_fmin) 1134 OP_SWAP(atomic_imax) 1135 OP_SWAP(atomic_imin) 1136 OP_SWAP(atomic_inc_wrap) 1137 OP_SWAP(atomic_or) 1138 OP_SWAP(atomic_umax) 1139 OP_SWAP(atomic_umin) 1140 OP_SWAP(atomic_xor) 1141 OP_SWAP(format) 1142 OP_SWAP(load) 1143 OP_SWAP(order) 1144 OP_SWAP(samples) 1145 OP_SWAP(size) 1146 OP_SWAP(store) 1147 default: 1148 return false; 1149 } 1150 1151 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); 1152 nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless[3] : bindless[2]; 1153 if (!var) 1154 var = create_bindless_image(b->shader, dim); 1155 instr->intrinsic = op; 1156 b->cursor = nir_before_instr(in); 1157 nir_deref_instr *deref = nir_build_deref_var(b, var); 1158 if (glsl_type_is_array(var->type)) 1159 deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32)); 1160 nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa); 1161 return true; 1162} 1163 1164static bool 1165lower_bindless(nir_shader *shader, nir_variable **bindless) 1166{ 1167 if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless)) 1168 return false; 1169 nir_fixup_deref_modes(shader); 1170 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL); 1171 optimize_nir(shader); 1172 return true; 1173} 1174 1175/* convert shader image/texture io variables to int64 handles for bindless indexing */ 1176static bool 1177lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data) 1178{ 1179 if (in->type != nir_instr_type_intrinsic) 1180 return false; 1181 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); 1182 if (instr->intrinsic != nir_intrinsic_load_deref && 1183 instr->intrinsic != nir_intrinsic_store_deref) 1184 return false; 1185 1186 nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]); 1187 nir_variable *var = nir_deref_instr_get_variable(src_deref); 1188 if (var->data.bindless) 1189 return false; 1190 if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out) 1191 return false; 1192 if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type)) 1193 return false; 1194 1195 var->type = glsl_int64_t_type(); 1196 var->data.bindless = 1; 1197 b->cursor = nir_before_instr(in); 1198 nir_deref_instr *deref = nir_build_deref_var(b, var); 1199 if (instr->intrinsic == nir_intrinsic_load_deref) { 1200 nir_ssa_def *def = nir_load_deref(b, deref); 1201 nir_instr_rewrite_src_ssa(in, &instr->src[0], def); 1202 nir_ssa_def_rewrite_uses(&instr->dest.ssa, def); 1203 } else { 1204 nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr)); 1205 } 1206 nir_instr_remove(in); 1207 nir_instr_remove(&src_deref->instr); 1208 return true; 1209} 1210 1211static bool 1212lower_bindless_io(nir_shader *shader) 1213{ 1214 return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL); 1215} 1216 1217static uint32_t 1218zink_binding(gl_shader_stage stage, VkDescriptorType type, int index) 1219{ 1220 if (stage == MESA_SHADER_NONE) { 1221 unreachable("not supported"); 1222 } else { 1223 switch (type) { 1224 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 1225 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 1226 assert(index < PIPE_MAX_CONSTANT_BUFFERS); 1227 return (stage * PIPE_MAX_CONSTANT_BUFFERS) + index; 1228 1229 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 1230 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 1231 assert(index < PIPE_MAX_SAMPLERS); 1232 return (stage * PIPE_MAX_SAMPLERS) + index; 1233 1234 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 1235 assert(index < PIPE_MAX_SHADER_BUFFERS); 1236 return (stage * PIPE_MAX_SHADER_BUFFERS) + index; 1237 1238 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 1239 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 1240 assert(index < PIPE_MAX_SHADER_IMAGES); 1241 return (stage * PIPE_MAX_SHADER_IMAGES) + index; 1242 1243 default: 1244 unreachable("unexpected type"); 1245 } 1246 } 1247} 1248 1249static void 1250handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, nir_variable **bindless) 1251{ 1252 if (glsl_type_is_struct(type)) { 1253 for (unsigned i = 0; i < glsl_get_length(type); i++) 1254 handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless); 1255 return; 1256 } 1257 1258 /* just a random scalar in a struct */ 1259 if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type)) 1260 return; 1261 1262 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type); 1263 unsigned binding; 1264 switch (vktype) { 1265 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 1266 binding = 0; 1267 break; 1268 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 1269 binding = 1; 1270 break; 1271 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 1272 binding = 2; 1273 break; 1274 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 1275 binding = 3; 1276 break; 1277 default: 1278 unreachable("unknown"); 1279 } 1280 if (!bindless[binding]) { 1281 bindless[binding] = nir_variable_clone(var, nir); 1282 bindless[binding]->data.bindless = 0; 1283 bindless[binding]->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS; 1284 bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0); 1285 bindless[binding]->data.driver_location = bindless[binding]->data.binding = binding; 1286 if (!bindless[binding]->data.image.format) 1287 bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM; 1288 nir_shader_add_variable(nir, bindless[binding]); 1289 } else { 1290 assert(glsl_get_sampler_dim(glsl_without_array(bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type))); 1291 } 1292 var->data.mode = nir_var_shader_temp; 1293} 1294 1295static enum pipe_prim_type 1296gl_prim_to_pipe(unsigned primitive_type) 1297{ 1298 switch (primitive_type) { 1299 case GL_POINTS: 1300 return PIPE_PRIM_POINTS; 1301 case GL_LINES: 1302 case GL_LINE_LOOP: 1303 case GL_LINE_STRIP: 1304 case GL_LINES_ADJACENCY: 1305 case GL_LINE_STRIP_ADJACENCY: 1306 case GL_ISOLINES: 1307 return PIPE_PRIM_LINES; 1308 default: 1309 return PIPE_PRIM_TRIANGLES; 1310 } 1311} 1312 1313static enum pipe_prim_type 1314get_shader_base_prim_type(struct nir_shader *nir) 1315{ 1316 switch (nir->info.stage) { 1317 case MESA_SHADER_GEOMETRY: 1318 return gl_prim_to_pipe(nir->info.gs.output_primitive); 1319 case MESA_SHADER_TESS_EVAL: 1320 return nir->info.tess.point_mode ? PIPE_PRIM_POINTS : gl_prim_to_pipe(nir->info.tess.primitive_mode); 1321 default: 1322 break; 1323 } 1324 return PIPE_PRIM_MAX; 1325} 1326 1327struct zink_shader * 1328zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, 1329 const struct pipe_stream_output_info *so_info) 1330{ 1331 struct zink_shader *ret = CALLOC_STRUCT(zink_shader); 1332 bool have_psiz = false; 1333 1334 ret->hash = _mesa_hash_pointer(ret); 1335 ret->reduced_prim = get_shader_base_prim_type(nir); 1336 1337 ret->programs = _mesa_pointer_set_create(NULL); 1338 simple_mtx_init(&ret->lock, mtx_plain); 1339 1340 nir_variable_mode indirect_derefs_modes = nir_var_function_temp; 1341 if (nir->info.stage == MESA_SHADER_TESS_CTRL || 1342 nir->info.stage == MESA_SHADER_TESS_EVAL) 1343 indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out; 1344 1345 NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes, 1346 UINT32_MAX); 1347 1348 if (nir->info.stage == MESA_SHADER_VERTEX) 1349 create_vs_pushconst(nir); 1350 else if (nir->info.stage == MESA_SHADER_TESS_CTRL || 1351 nir->info.stage == MESA_SHADER_TESS_EVAL) 1352 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 1353 else if (nir->info.stage == MESA_SHADER_KERNEL) 1354 create_cs_pushconst(nir); 1355 1356 if (nir->info.stage < MESA_SHADER_FRAGMENT) 1357 have_psiz = check_psiz(nir); 1358 NIR_PASS_V(nir, lower_basevertex); 1359 NIR_PASS_V(nir, lower_work_dim); 1360 NIR_PASS_V(nir, nir_lower_regs_to_ssa); 1361 NIR_PASS_V(nir, lower_baseinstance); 1362 1363 { 1364 nir_lower_subgroups_options subgroup_options = {0}; 1365 subgroup_options.lower_to_scalar = true; 1366 subgroup_options.subgroup_size = screen->info.props11.subgroupSize; 1367 subgroup_options.ballot_bit_size = 32; 1368 subgroup_options.ballot_components = 4; 1369 subgroup_options.lower_subgroup_masks = true; 1370 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options); 1371 } 1372 1373 optimize_nir(nir); 1374 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); 1375 NIR_PASS_V(nir, lower_discard_if); 1376 NIR_PASS_V(nir, nir_lower_fragcolor, 1377 nir->info.fs.color_is_dual_source ? 1 : 8); 1378 NIR_PASS_V(nir, lower_64bit_vertex_attribs); 1379 NIR_PASS_V(nir, unbreak_bos); 1380 1381 if (zink_debug & ZINK_DEBUG_NIR) { 1382 fprintf(stderr, "NIR shader:\n---8<---\n"); 1383 nir_print_shader(nir, stderr); 1384 fprintf(stderr, "---8<---\n"); 1385 } 1386 1387 nir_variable *bindless[4] = {0}; 1388 bool has_bindless_io = false; 1389 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) { 1390 if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) { 1391 has_bindless_io = true; 1392 break; 1393 } 1394 } 1395 if (has_bindless_io) 1396 NIR_PASS_V(nir, lower_bindless_io); 1397 1398 foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) { 1399 if (_nir_shader_variable_has_mode(var, nir_var_uniform | 1400 nir_var_mem_ubo | 1401 nir_var_mem_ssbo)) { 1402 enum zink_descriptor_type ztype; 1403 const struct glsl_type *type = glsl_without_array(var->type); 1404 if (var->data.mode == nir_var_mem_ubo) { 1405 ztype = ZINK_DESCRIPTOR_TYPE_UBO; 1406 /* buffer 0 is a push descriptor */ 1407 var->data.descriptor_set = !!var->data.driver_location; 1408 var->data.binding = !var->data.driver_location ? nir->info.stage : 1409 zink_binding(nir->info.stage, 1410 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1411 var->data.driver_location); 1412 assert(var->data.driver_location || var->data.binding < 10); 1413 VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; 1414 int binding = var->data.binding; 1415 1416 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; 1417 ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding; 1418 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; 1419 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; 1420 ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index); 1421 ret->num_bindings[ztype]++; 1422 } else if (var->data.mode == nir_var_mem_ssbo) { 1423 ztype = ZINK_DESCRIPTOR_TYPE_SSBO; 1424 var->data.descriptor_set = ztype + 1; 1425 var->data.binding = zink_binding(nir->info.stage, 1426 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1427 var->data.driver_location); 1428 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; 1429 ret->ssbos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index); 1430 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; 1431 ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 1432 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; 1433 ret->num_bindings[ztype]++; 1434 } else { 1435 assert(var->data.mode == nir_var_uniform); 1436 if (var->data.bindless) { 1437 ret->bindless = true; 1438 handle_bindless_var(nir, var, type, bindless); 1439 } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) { 1440 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type); 1441 ztype = zink_desc_type_from_vktype(vktype); 1442 if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) 1443 ret->num_texel_buffers++; 1444 var->data.driver_location = var->data.binding; 1445 var->data.descriptor_set = ztype + 1; 1446 var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location); 1447 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; 1448 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; 1449 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; 1450 if (glsl_type_is_array(var->type)) 1451 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type); 1452 else 1453 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; 1454 ret->num_bindings[ztype]++; 1455 } 1456 } 1457 } 1458 } 1459 bool bindless_lowered = false; 1460 NIR_PASS(bindless_lowered, nir, lower_bindless, bindless); 1461 ret->bindless |= bindless_lowered; 1462 1463 ret->nir = nir; 1464 if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings) 1465 update_so_info(ret, so_info, nir->info.outputs_written, have_psiz); 1466 1467 return ret; 1468} 1469 1470char * 1471zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr) 1472{ 1473 struct zink_screen *screen = zink_screen(pscreen); 1474 nir_shader *nir = nirptr; 1475 1476 if (!screen->info.feats.features.shaderImageGatherExtended) { 1477 nir_lower_tex_options tex_opts = {0}; 1478 tex_opts.lower_tg4_offsets = true; 1479 NIR_PASS_V(nir, nir_lower_tex, &tex_opts); 1480 } 1481 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false); 1482 if (nir->info.stage == MESA_SHADER_GEOMETRY) 1483 NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream); 1484 optimize_nir(nir); 1485 if (nir->info.num_ubos || nir->info.num_ssbos) 1486 NIR_PASS_V(nir, nir_lower_dynamic_bo_access); 1487 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 1488 if (screen->driconf.inline_uniforms) 1489 nir_find_inlinable_uniforms(nir); 1490 1491 return NULL; 1492} 1493 1494void 1495zink_shader_free(struct zink_context *ctx, struct zink_shader *shader) 1496{ 1497 struct zink_screen *screen = zink_screen(ctx->base.screen); 1498 set_foreach(shader->programs, entry) { 1499 if (shader->nir->info.stage == MESA_SHADER_COMPUTE) { 1500 struct zink_compute_program *comp = (void*)entry->key; 1501 if (!comp->base.removed) { 1502 _mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader); 1503 comp->base.removed = true; 1504 } 1505 comp->shader = NULL; 1506 zink_compute_program_reference(screen, &comp, NULL); 1507 } else { 1508 struct zink_gfx_program *prog = (void*)entry->key; 1509 enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage); 1510 assert(pstage < ZINK_SHADER_COUNT); 1511 if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) { 1512 _mesa_hash_table_remove_key(&ctx->program_cache[prog->stages_present >> 2], prog->shaders); 1513 prog->base.removed = true; 1514 } 1515 prog->shaders[pstage] = NULL; 1516 if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated) 1517 /* automatically destroy generated tcs shaders when tes is destroyed */ 1518 zink_shader_free(ctx, shader->generated); 1519 zink_gfx_program_reference(screen, &prog, NULL); 1520 } 1521 } 1522 _mesa_set_destroy(shader->programs, NULL); 1523 ralloc_free(shader->nir); 1524 FREE(shader); 1525} 1526 1527 1528/* creating a passthrough tcs shader that's roughly: 1529 1530#version 150 1531#extension GL_ARB_tessellation_shader : require 1532 1533in vec4 some_var[gl_MaxPatchVertices]; 1534out vec4 some_var_out; 1535 1536layout(push_constant) uniform tcsPushConstants { 1537 layout(offset = 0) float TessLevelInner[2]; 1538 layout(offset = 8) float TessLevelOuter[4]; 1539} u_tcsPushConstants; 1540layout(vertices = $vertices_per_patch) out; 1541void main() 1542{ 1543 gl_TessLevelInner = u_tcsPushConstants.TessLevelInner; 1544 gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter; 1545 some_var_out = some_var[gl_InvocationID]; 1546} 1547 1548*/ 1549struct zink_shader * 1550zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch) 1551{ 1552 struct zink_shader *ret = CALLOC_STRUCT(zink_shader); 1553 ret->hash = _mesa_hash_pointer(ret); 1554 ret->programs = _mesa_pointer_set_create(NULL); 1555 simple_mtx_init(&ret->lock, mtx_plain); 1556 1557 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL); 1558 nir_function *fn = nir_function_create(nir, "main"); 1559 fn->is_entrypoint = true; 1560 nir_function_impl *impl = nir_function_impl_create(fn); 1561 1562 nir_builder b; 1563 nir_builder_init(&b, impl); 1564 b.cursor = nir_before_block(nir_start_block(impl)); 1565 1566 nir_ssa_def *invocation_id = nir_load_invocation_id(&b); 1567 1568 nir_foreach_shader_out_variable(var, vs->nir) { 1569 const struct glsl_type *type = var->type; 1570 const struct glsl_type *in_type = var->type; 1571 const struct glsl_type *out_type = var->type; 1572 char buf[1024]; 1573 snprintf(buf, sizeof(buf), "%s_out", var->name); 1574 in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0); 1575 out_type = glsl_array_type(type, vertices_per_patch, 0); 1576 1577 nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name); 1578 nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf); 1579 out->data.location = in->data.location = var->data.location; 1580 out->data.location_frac = in->data.location_frac = var->data.location_frac; 1581 1582 /* gl_in[] receives values from equivalent built-in output 1583 variables written by the vertex shader (section 2.14.7). Each array 1584 element of gl_in[] is a structure holding values for a specific vertex of 1585 the input patch. The length of gl_in[] is equal to the 1586 implementation-dependent maximum patch size (gl_MaxPatchVertices). 1587 - ARB_tessellation_shader 1588 */ 1589 for (unsigned i = 0; i < vertices_per_patch; i++) { 1590 /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */ 1591 nir_if *start_block = nir_push_if(&b, nir_ieq(&b, invocation_id, nir_imm_int(&b, i))); 1592 nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id); 1593 nir_ssa_def *load = nir_load_deref(&b, in_array_var); 1594 nir_deref_instr *out_array_var = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, out), i); 1595 nir_store_deref(&b, out_array_var, load, 0xff); 1596 nir_pop_if(&b, start_block); 1597 } 1598 } 1599 nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner"); 1600 gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER; 1601 gl_TessLevelInner->data.patch = 1; 1602 nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter"); 1603 gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER; 1604 gl_TessLevelOuter->data.patch = 1; 1605 1606 /* hacks so we can size these right for now */ 1607 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3); 1608 /* just use a single blob for padding here because it's easier */ 1609 fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0); 1610 fields[0].name = ralloc_asprintf(nir, "padding"); 1611 fields[0].offset = 0; 1612 fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0); 1613 fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner"); 1614 fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level); 1615 fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0); 1616 fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter"); 1617 fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level); 1618 nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const, 1619 glsl_struct_type(fields, 3, "struct", false), "pushconst"); 1620 pushconst->data.location = VARYING_SLOT_VAR0; 1621 1622 nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8); 1623 nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16); 1624 1625 for (unsigned i = 0; i < 2; i++) { 1626 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i); 1627 nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff); 1628 } 1629 for (unsigned i = 0; i < 4; i++) { 1630 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i); 1631 nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff); 1632 } 1633 1634 nir->info.tess.tcs_vertices_out = vertices_per_patch; 1635 nir_validate_shader(nir, "created"); 1636 1637 NIR_PASS_V(nir, nir_lower_regs_to_ssa); 1638 optimize_nir(nir); 1639 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); 1640 NIR_PASS_V(nir, lower_discard_if); 1641 NIR_PASS_V(nir, nir_convert_from_ssa, true); 1642 1643 ret->nir = nir; 1644 ret->is_generated = true; 1645 return ret; 1646} 1647