iris_program.c revision 7ec681f3
1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file iris_program.c 25 * 26 * This file contains the driver interface for compiling shaders. 27 * 28 * See iris_program_cache.c for the in-memory program cache where the 29 * compiled shaders are stored. 30 */ 31 32#include <stdio.h> 33#include <errno.h> 34#include "pipe/p_defines.h" 35#include "pipe/p_state.h" 36#include "pipe/p_context.h" 37#include "pipe/p_screen.h" 38#include "util/u_atomic.h" 39#include "util/u_upload_mgr.h" 40#include "util/debug.h" 41#include "util/u_async_debug.h" 42#include "compiler/nir/nir.h" 43#include "compiler/nir/nir_builder.h" 44#include "compiler/nir/nir_serialize.h" 45#include "intel/compiler/brw_compiler.h" 46#include "intel/compiler/brw_nir.h" 47#include "iris_context.h" 48#include "nir/tgsi_to_nir.h" 49 50#define KEY_ID(prefix) .prefix.program_string_id = ish->program_id 51#define BRW_KEY_INIT(gen, prog_id) \ 52 .base.program_string_id = prog_id, \ 53 .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \ 54 .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \ 55 .base.tex.compressed_multisample_layout_mask = ~0, \ 56 .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0) 57 58struct iris_threaded_compile_job { 59 struct iris_screen *screen; 60 struct u_upload_mgr *uploader; 61 struct pipe_debug_callback *dbg; 62 struct iris_uncompiled_shader *ish; 63 struct iris_compiled_shader *shader; 64}; 65 66static unsigned 67get_new_program_id(struct iris_screen *screen) 68{ 69 return p_atomic_inc_return(&screen->program_id); 70} 71 72void 73iris_finalize_program(struct iris_compiled_shader *shader, 74 struct brw_stage_prog_data *prog_data, 75 uint32_t *streamout, 76 enum brw_param_builtin *system_values, 77 unsigned num_system_values, 78 unsigned kernel_input_size, 79 unsigned num_cbufs, 80 const struct iris_binding_table *bt) 81{ 82 shader->prog_data = prog_data; 83 shader->streamout = streamout; 84 shader->system_values = system_values; 85 shader->num_system_values = num_system_values; 86 shader->kernel_input_size = kernel_input_size; 87 shader->num_cbufs = num_cbufs; 88 shader->bt = *bt; 89 90 ralloc_steal(shader, shader->prog_data); 91 ralloc_steal(shader->prog_data, (void *)prog_data->relocs); 92 ralloc_steal(shader->prog_data, prog_data->param); 93 ralloc_steal(shader->prog_data, prog_data->pull_param); 94 ralloc_steal(shader, shader->streamout); 95 ralloc_steal(shader, shader->system_values); 96} 97 98static struct brw_vs_prog_key 99iris_to_brw_vs_key(const struct intel_device_info *devinfo, 100 const struct iris_vs_prog_key *key) 101{ 102 return (struct brw_vs_prog_key) { 103 BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id), 104 105 /* Don't tell the backend about our clip plane constants, we've 106 * already lowered them in NIR and don't want it doing it again. 107 */ 108 .nr_userclip_plane_consts = 0, 109 }; 110} 111 112static struct brw_tcs_prog_key 113iris_to_brw_tcs_key(const struct intel_device_info *devinfo, 114 const struct iris_tcs_prog_key *key) 115{ 116 return (struct brw_tcs_prog_key) { 117 BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id), 118 .tes_primitive_mode = key->tes_primitive_mode, 119 .input_vertices = key->input_vertices, 120 .patch_outputs_written = key->patch_outputs_written, 121 .outputs_written = key->outputs_written, 122 .quads_workaround = key->quads_workaround, 123 }; 124} 125 126static struct brw_tes_prog_key 127iris_to_brw_tes_key(const struct intel_device_info *devinfo, 128 const struct iris_tes_prog_key *key) 129{ 130 return (struct brw_tes_prog_key) { 131 BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id), 132 .patch_inputs_read = key->patch_inputs_read, 133 .inputs_read = key->inputs_read, 134 }; 135} 136 137static struct brw_gs_prog_key 138iris_to_brw_gs_key(const struct intel_device_info *devinfo, 139 const struct iris_gs_prog_key *key) 140{ 141 return (struct brw_gs_prog_key) { 142 BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id), 143 }; 144} 145 146static struct brw_wm_prog_key 147iris_to_brw_fs_key(const struct intel_device_info *devinfo, 148 const struct iris_fs_prog_key *key) 149{ 150 return (struct brw_wm_prog_key) { 151 BRW_KEY_INIT(devinfo->ver, key->base.program_string_id), 152 .nr_color_regions = key->nr_color_regions, 153 .flat_shade = key->flat_shade, 154 .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha, 155 .alpha_to_coverage = key->alpha_to_coverage, 156 .clamp_fragment_color = key->clamp_fragment_color, 157 .persample_interp = key->persample_interp, 158 .multisample_fbo = key->multisample_fbo, 159 .force_dual_color_blend = key->force_dual_color_blend, 160 .coherent_fb_fetch = key->coherent_fb_fetch, 161 .color_outputs_valid = key->color_outputs_valid, 162 .input_slots_valid = key->input_slots_valid, 163 .ignore_sample_mask_out = !key->multisample_fbo, 164 }; 165} 166 167static struct brw_cs_prog_key 168iris_to_brw_cs_key(const struct intel_device_info *devinfo, 169 const struct iris_cs_prog_key *key) 170{ 171 return (struct brw_cs_prog_key) { 172 BRW_KEY_INIT(devinfo->ver, key->base.program_string_id), 173 }; 174} 175 176static void * 177upload_state(struct u_upload_mgr *uploader, 178 struct iris_state_ref *ref, 179 unsigned size, 180 unsigned alignment) 181{ 182 void *p = NULL; 183 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p); 184 return p; 185} 186 187void 188iris_upload_ubo_ssbo_surf_state(struct iris_context *ice, 189 struct pipe_shader_buffer *buf, 190 struct iris_state_ref *surf_state, 191 isl_surf_usage_flags_t usage) 192{ 193 struct pipe_context *ctx = &ice->ctx; 194 struct iris_screen *screen = (struct iris_screen *) ctx->screen; 195 bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT; 196 197 void *map = 198 upload_state(ice->state.surface_uploader, surf_state, 199 screen->isl_dev.ss.size, 64); 200 if (!unlikely(map)) { 201 surf_state->res = NULL; 202 return; 203 } 204 205 struct iris_resource *res = (void *) buf->buffer; 206 struct iris_bo *surf_bo = iris_resource_bo(surf_state->res); 207 surf_state->offset += iris_bo_offset_from_base_address(surf_bo); 208 209 const bool dataport = ssbo || !screen->compiler->indirect_ubos_use_sampler; 210 211 isl_buffer_fill_state(&screen->isl_dev, map, 212 .address = res->bo->address + res->offset + 213 buf->buffer_offset, 214 .size_B = buf->buffer_size - res->offset, 215 .format = dataport ? ISL_FORMAT_RAW 216 : ISL_FORMAT_R32G32B32A32_FLOAT, 217 .swizzle = ISL_SWIZZLE_IDENTITY, 218 .stride_B = 1, 219 .mocs = iris_mocs(res->bo, &screen->isl_dev, usage)); 220} 221 222static nir_ssa_def * 223get_aoa_deref_offset(nir_builder *b, 224 nir_deref_instr *deref, 225 unsigned elem_size) 226{ 227 unsigned array_size = elem_size; 228 nir_ssa_def *offset = nir_imm_int(b, 0); 229 230 while (deref->deref_type != nir_deref_type_var) { 231 assert(deref->deref_type == nir_deref_type_array); 232 233 /* This level's element size is the previous level's array size */ 234 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 235 assert(deref->arr.index.ssa); 236 offset = nir_iadd(b, offset, 237 nir_imul(b, index, nir_imm_int(b, array_size))); 238 239 deref = nir_deref_instr_parent(deref); 240 assert(glsl_type_is_array(deref->type)); 241 array_size *= glsl_get_length(deref->type); 242 } 243 244 /* Accessing an invalid surface index with the dataport can result in a 245 * hang. According to the spec "if the index used to select an individual 246 * element is negative or greater than or equal to the size of the array, 247 * the results of the operation are undefined but may not lead to 248 * termination" -- which is one of the possible outcomes of the hang. 249 * Clamp the index to prevent access outside of the array bounds. 250 */ 251 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size)); 252} 253 254static void 255iris_lower_storage_image_derefs(nir_shader *nir) 256{ 257 nir_function_impl *impl = nir_shader_get_entrypoint(nir); 258 259 nir_builder b; 260 nir_builder_init(&b, impl); 261 262 nir_foreach_block(block, impl) { 263 nir_foreach_instr_safe(instr, block) { 264 if (instr->type != nir_instr_type_intrinsic) 265 continue; 266 267 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 268 switch (intrin->intrinsic) { 269 case nir_intrinsic_image_deref_load: 270 case nir_intrinsic_image_deref_store: 271 case nir_intrinsic_image_deref_atomic_add: 272 case nir_intrinsic_image_deref_atomic_imin: 273 case nir_intrinsic_image_deref_atomic_umin: 274 case nir_intrinsic_image_deref_atomic_imax: 275 case nir_intrinsic_image_deref_atomic_umax: 276 case nir_intrinsic_image_deref_atomic_and: 277 case nir_intrinsic_image_deref_atomic_or: 278 case nir_intrinsic_image_deref_atomic_xor: 279 case nir_intrinsic_image_deref_atomic_exchange: 280 case nir_intrinsic_image_deref_atomic_comp_swap: 281 case nir_intrinsic_image_deref_size: 282 case nir_intrinsic_image_deref_samples: 283 case nir_intrinsic_image_deref_load_raw_intel: 284 case nir_intrinsic_image_deref_store_raw_intel: { 285 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 286 nir_variable *var = nir_deref_instr_get_variable(deref); 287 288 b.cursor = nir_before_instr(&intrin->instr); 289 nir_ssa_def *index = 290 nir_iadd(&b, nir_imm_int(&b, var->data.driver_location), 291 get_aoa_deref_offset(&b, deref, 1)); 292 nir_rewrite_image_intrinsic(intrin, index, false); 293 break; 294 } 295 296 default: 297 break; 298 } 299 } 300 } 301} 302 303static bool 304iris_uses_image_atomic(const nir_shader *shader) 305{ 306 nir_foreach_function(function, shader) { 307 if (function->impl == NULL) 308 continue; 309 310 nir_foreach_block(block, function->impl) { 311 nir_foreach_instr(instr, block) { 312 if (instr->type != nir_instr_type_intrinsic) 313 continue; 314 315 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 316 switch (intrin->intrinsic) { 317 case nir_intrinsic_image_deref_atomic_add: 318 case nir_intrinsic_image_deref_atomic_imin: 319 case nir_intrinsic_image_deref_atomic_umin: 320 case nir_intrinsic_image_deref_atomic_imax: 321 case nir_intrinsic_image_deref_atomic_umax: 322 case nir_intrinsic_image_deref_atomic_and: 323 case nir_intrinsic_image_deref_atomic_or: 324 case nir_intrinsic_image_deref_atomic_xor: 325 case nir_intrinsic_image_deref_atomic_exchange: 326 case nir_intrinsic_image_deref_atomic_comp_swap: 327 unreachable("Should have been lowered in " 328 "iris_lower_storage_image_derefs"); 329 330 case nir_intrinsic_image_atomic_add: 331 case nir_intrinsic_image_atomic_imin: 332 case nir_intrinsic_image_atomic_umin: 333 case nir_intrinsic_image_atomic_imax: 334 case nir_intrinsic_image_atomic_umax: 335 case nir_intrinsic_image_atomic_and: 336 case nir_intrinsic_image_atomic_or: 337 case nir_intrinsic_image_atomic_xor: 338 case nir_intrinsic_image_atomic_exchange: 339 case nir_intrinsic_image_atomic_comp_swap: 340 return true; 341 342 default: 343 break; 344 } 345 } 346 } 347 } 348 349 return false; 350} 351 352/** 353 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag. 354 */ 355static bool 356iris_fix_edge_flags(nir_shader *nir) 357{ 358 if (nir->info.stage != MESA_SHADER_VERTEX) { 359 nir_shader_preserve_all_metadata(nir); 360 return false; 361 } 362 363 nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out, 364 VARYING_SLOT_EDGE); 365 if (!var) { 366 nir_shader_preserve_all_metadata(nir); 367 return false; 368 } 369 370 var->data.mode = nir_var_shader_temp; 371 nir->info.outputs_written &= ~VARYING_BIT_EDGE; 372 nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG; 373 nir_fixup_deref_modes(nir); 374 375 nir_foreach_function(f, nir) { 376 if (f->impl) { 377 nir_metadata_preserve(f->impl, nir_metadata_block_index | 378 nir_metadata_dominance | 379 nir_metadata_live_ssa_defs | 380 nir_metadata_loop_analysis); 381 } else { 382 nir_metadata_preserve(f->impl, nir_metadata_all); 383 } 384 } 385 386 return true; 387} 388 389/** 390 * Fix an uncompiled shader's stream output info. 391 * 392 * Core Gallium stores output->register_index as a "slot" number, where 393 * slots are assigned consecutively to all outputs in info->outputs_written. 394 * This naive packing of outputs doesn't work for us - we too have slots, 395 * but the layout is defined by the VUE map, which we won't have until we 396 * compile a specific shader variant. So, we remap these and simply store 397 * VARYING_SLOT_* in our copy's output->register_index fields. 398 * 399 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W 400 * components of our VUE header. See brw_vue_map.c for the layout. 401 */ 402static void 403update_so_info(struct pipe_stream_output_info *so_info, 404 uint64_t outputs_written) 405{ 406 uint8_t reverse_map[64] = {}; 407 unsigned slot = 0; 408 while (outputs_written) { 409 reverse_map[slot++] = u_bit_scan64(&outputs_written); 410 } 411 412 for (unsigned i = 0; i < so_info->num_outputs; i++) { 413 struct pipe_stream_output *output = &so_info->output[i]; 414 415 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ 416 output->register_index = reverse_map[output->register_index]; 417 418 /* The VUE header contains three scalar fields packed together: 419 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w 420 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y 421 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z 422 */ 423 switch (output->register_index) { 424 case VARYING_SLOT_LAYER: 425 assert(output->num_components == 1); 426 output->register_index = VARYING_SLOT_PSIZ; 427 output->start_component = 1; 428 break; 429 case VARYING_SLOT_VIEWPORT: 430 assert(output->num_components == 1); 431 output->register_index = VARYING_SLOT_PSIZ; 432 output->start_component = 2; 433 break; 434 case VARYING_SLOT_PSIZ: 435 assert(output->num_components == 1); 436 output->start_component = 3; 437 break; 438 } 439 440 //info->outputs_written |= 1ull << output->register_index; 441 } 442} 443 444static void 445setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx, 446 unsigned offset, unsigned n) 447{ 448 assert(offset % sizeof(uint32_t) == 0); 449 450 for (unsigned i = 0; i < n; ++i) 451 sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i); 452 453 for (unsigned i = n; i < 4; ++i) 454 sysvals[i] = BRW_PARAM_BUILTIN_ZERO; 455} 456 457/** 458 * Associate NIR uniform variables with the prog_data->param[] mechanism 459 * used by the backend. Also, decide which UBOs we'd like to push in an 460 * ideal situation (though the backend can reduce this). 461 */ 462static void 463iris_setup_uniforms(const struct brw_compiler *compiler, 464 void *mem_ctx, 465 nir_shader *nir, 466 struct brw_stage_prog_data *prog_data, 467 unsigned kernel_input_size, 468 enum brw_param_builtin **out_system_values, 469 unsigned *out_num_system_values, 470 unsigned *out_num_cbufs) 471{ 472 UNUSED const struct intel_device_info *devinfo = compiler->devinfo; 473 474 unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t)); 475 476 const unsigned IRIS_MAX_SYSTEM_VALUES = 477 PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE; 478 enum brw_param_builtin *system_values = 479 rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES); 480 unsigned num_system_values = 0; 481 482 unsigned patch_vert_idx = -1; 483 unsigned ucp_idx[IRIS_MAX_CLIP_PLANES]; 484 unsigned img_idx[PIPE_MAX_SHADER_IMAGES]; 485 unsigned variable_group_size_idx = -1; 486 unsigned work_dim_idx = -1; 487 memset(ucp_idx, -1, sizeof(ucp_idx)); 488 memset(img_idx, -1, sizeof(img_idx)); 489 490 nir_function_impl *impl = nir_shader_get_entrypoint(nir); 491 492 nir_builder b; 493 nir_builder_init(&b, impl); 494 495 b.cursor = nir_before_block(nir_start_block(impl)); 496 nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32); 497 498 /* Turn system value intrinsics into uniforms */ 499 nir_foreach_block(block, impl) { 500 nir_foreach_instr_safe(instr, block) { 501 if (instr->type != nir_instr_type_intrinsic) 502 continue; 503 504 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 505 nir_ssa_def *offset; 506 507 switch (intrin->intrinsic) { 508 case nir_intrinsic_load_constant: { 509 unsigned load_size = intrin->dest.ssa.num_components * 510 intrin->dest.ssa.bit_size / 8; 511 unsigned load_align = intrin->dest.ssa.bit_size / 8; 512 513 /* This one is special because it reads from the shader constant 514 * data and not cbuf0 which gallium uploads for us. 515 */ 516 b.cursor = nir_instr_remove(&intrin->instr); 517 518 nir_ssa_def *offset = 519 nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1), 520 nir_intrinsic_base(intrin)); 521 522 assert(load_size < b.shader->constant_data_size); 523 unsigned max_offset = b.shader->constant_data_size - load_size; 524 offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset)); 525 526 nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(&b, 527 nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW), 528 nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH)); 529 530 nir_ssa_def *data = 531 nir_load_global(&b, nir_iadd(&b, const_data_base_addr, 532 nir_u2u64(&b, offset)), 533 load_align, 534 intrin->dest.ssa.num_components, 535 intrin->dest.ssa.bit_size); 536 537 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 538 data); 539 continue; 540 } 541 case nir_intrinsic_load_user_clip_plane: { 542 unsigned ucp = nir_intrinsic_ucp_id(intrin); 543 544 if (ucp_idx[ucp] == -1) { 545 ucp_idx[ucp] = num_system_values; 546 num_system_values += 4; 547 } 548 549 for (int i = 0; i < 4; i++) { 550 system_values[ucp_idx[ucp] + i] = 551 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i); 552 } 553 554 b.cursor = nir_before_instr(instr); 555 offset = nir_imm_int(&b, system_values_start + 556 ucp_idx[ucp] * sizeof(uint32_t)); 557 break; 558 } 559 case nir_intrinsic_load_patch_vertices_in: 560 if (patch_vert_idx == -1) 561 patch_vert_idx = num_system_values++; 562 563 system_values[patch_vert_idx] = 564 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN; 565 566 b.cursor = nir_before_instr(instr); 567 offset = nir_imm_int(&b, system_values_start + 568 patch_vert_idx * sizeof(uint32_t)); 569 break; 570 case nir_intrinsic_image_deref_load_param_intel: { 571 assert(devinfo->ver < 9); 572 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 573 nir_variable *var = nir_deref_instr_get_variable(deref); 574 575 if (img_idx[var->data.binding] == -1) { 576 /* GL only allows arrays of arrays of images. */ 577 assert(glsl_type_is_image(glsl_without_array(var->type))); 578 unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type)); 579 580 for (int i = 0; i < num_images; i++) { 581 const unsigned img = var->data.binding + i; 582 583 img_idx[img] = num_system_values; 584 num_system_values += BRW_IMAGE_PARAM_SIZE; 585 586 uint32_t *img_sv = &system_values[img_idx[img]]; 587 588 setup_vec4_image_sysval( 589 img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img, 590 offsetof(struct brw_image_param, offset), 2); 591 setup_vec4_image_sysval( 592 img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img, 593 offsetof(struct brw_image_param, size), 3); 594 setup_vec4_image_sysval( 595 img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img, 596 offsetof(struct brw_image_param, stride), 4); 597 setup_vec4_image_sysval( 598 img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img, 599 offsetof(struct brw_image_param, tiling), 3); 600 setup_vec4_image_sysval( 601 img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img, 602 offsetof(struct brw_image_param, swizzling), 2); 603 } 604 } 605 606 b.cursor = nir_before_instr(instr); 607 offset = nir_iadd(&b, 608 get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4), 609 nir_imm_int(&b, system_values_start + 610 img_idx[var->data.binding] * 4 + 611 nir_intrinsic_base(intrin) * 16)); 612 break; 613 } 614 case nir_intrinsic_load_workgroup_size: { 615 assert(nir->info.workgroup_size_variable); 616 if (variable_group_size_idx == -1) { 617 variable_group_size_idx = num_system_values; 618 num_system_values += 3; 619 for (int i = 0; i < 3; i++) { 620 system_values[variable_group_size_idx + i] = 621 BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i; 622 } 623 } 624 625 b.cursor = nir_before_instr(instr); 626 offset = nir_imm_int(&b, system_values_start + 627 variable_group_size_idx * sizeof(uint32_t)); 628 break; 629 } 630 case nir_intrinsic_load_work_dim: { 631 if (work_dim_idx == -1) { 632 work_dim_idx = num_system_values++; 633 system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM; 634 } 635 b.cursor = nir_before_instr(instr); 636 offset = nir_imm_int(&b, system_values_start + 637 work_dim_idx * sizeof(uint32_t)); 638 break; 639 } 640 case nir_intrinsic_load_kernel_input: { 641 assert(nir_intrinsic_base(intrin) + 642 nir_intrinsic_range(intrin) <= kernel_input_size); 643 b.cursor = nir_before_instr(instr); 644 offset = nir_iadd_imm(&b, intrin->src[0].ssa, 645 nir_intrinsic_base(intrin)); 646 break; 647 } 648 default: 649 continue; 650 } 651 652 nir_ssa_def *load = 653 nir_load_ubo(&b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, 654 temp_ubo_name, offset, 655 .align_mul = 4, 656 .align_offset = 0, 657 .range_base = 0, 658 .range = ~0); 659 660 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 661 load); 662 nir_instr_remove(instr); 663 } 664 } 665 666 nir_validate_shader(nir, "before remapping"); 667 668 /* Uniforms are stored in constant buffer 0, the 669 * user-facing UBOs are indexed by one. So if any constant buffer is 670 * needed, the constant buffer 0 will be needed, so account for it. 671 */ 672 unsigned num_cbufs = nir->info.num_ubos; 673 if (num_cbufs || nir->num_uniforms) 674 num_cbufs++; 675 676 /* Place the new params in a new cbuf. */ 677 if (num_system_values > 0 || kernel_input_size > 0) { 678 unsigned sysval_cbuf_index = num_cbufs; 679 num_cbufs++; 680 681 system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin, 682 num_system_values); 683 684 nir_foreach_block(block, impl) { 685 nir_foreach_instr_safe(instr, block) { 686 if (instr->type != nir_instr_type_intrinsic) 687 continue; 688 689 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr); 690 691 if (load->intrinsic != nir_intrinsic_load_ubo) 692 continue; 693 694 b.cursor = nir_before_instr(instr); 695 696 assert(load->src[0].is_ssa); 697 698 if (load->src[0].ssa == temp_ubo_name) { 699 nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index); 700 nir_instr_rewrite_src(instr, &load->src[0], 701 nir_src_for_ssa(imm)); 702 } 703 } 704 } 705 706 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */ 707 nir_opt_constant_folding(nir); 708 } else { 709 ralloc_free(system_values); 710 system_values = NULL; 711 } 712 713 assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS); 714 nir_validate_shader(nir, "after remap"); 715 716 /* We don't use params[] but gallium leaves num_uniforms set. We use this 717 * to detect when cbuf0 exists but we don't need it anymore when we get 718 * here. Instead, zero it out so that the back-end doesn't get confused 719 * when nr_params * 4 != num_uniforms != nr_params * 4. 720 */ 721 nir->num_uniforms = 0; 722 723 *out_system_values = system_values; 724 *out_num_system_values = num_system_values; 725 *out_num_cbufs = num_cbufs; 726} 727 728static const char *surface_group_names[] = { 729 [IRIS_SURFACE_GROUP_RENDER_TARGET] = "render target", 730 [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read", 731 [IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups", 732 [IRIS_SURFACE_GROUP_TEXTURE] = "texture", 733 [IRIS_SURFACE_GROUP_UBO] = "ubo", 734 [IRIS_SURFACE_GROUP_SSBO] = "ssbo", 735 [IRIS_SURFACE_GROUP_IMAGE] = "image", 736}; 737 738static void 739iris_print_binding_table(FILE *fp, const char *name, 740 const struct iris_binding_table *bt) 741{ 742 STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT); 743 744 uint32_t total = 0; 745 uint32_t compacted = 0; 746 747 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) { 748 uint32_t size = bt->sizes[i]; 749 total += size; 750 if (size) 751 compacted += util_bitcount64(bt->used_mask[i]); 752 } 753 754 if (total == 0) { 755 fprintf(fp, "Binding table for %s is empty\n\n", name); 756 return; 757 } 758 759 if (total != compacted) { 760 fprintf(fp, "Binding table for %s " 761 "(compacted to %u entries from %u entries)\n", 762 name, compacted, total); 763 } else { 764 fprintf(fp, "Binding table for %s (%u entries)\n", name, total); 765 } 766 767 uint32_t entry = 0; 768 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) { 769 uint64_t mask = bt->used_mask[i]; 770 while (mask) { 771 int index = u_bit_scan64(&mask); 772 fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index); 773 } 774 } 775 fprintf(fp, "\n"); 776} 777 778enum { 779 /* Max elements in a surface group. */ 780 SURFACE_GROUP_MAX_ELEMENTS = 64, 781}; 782 783/** 784 * Map a <group, index> pair to a binding table index. 785 * 786 * For example: <UBO, 5> => binding table index 12 787 */ 788uint32_t 789iris_group_index_to_bti(const struct iris_binding_table *bt, 790 enum iris_surface_group group, uint32_t index) 791{ 792 assert(index < bt->sizes[group]); 793 uint64_t mask = bt->used_mask[group]; 794 uint64_t bit = 1ull << index; 795 if (bit & mask) { 796 return bt->offsets[group] + util_bitcount64((bit - 1) & mask); 797 } else { 798 return IRIS_SURFACE_NOT_USED; 799 } 800} 801 802/** 803 * Map a binding table index back to a <group, index> pair. 804 * 805 * For example: binding table index 12 => <UBO, 5> 806 */ 807uint32_t 808iris_bti_to_group_index(const struct iris_binding_table *bt, 809 enum iris_surface_group group, uint32_t bti) 810{ 811 uint64_t used_mask = bt->used_mask[group]; 812 assert(bti >= bt->offsets[group]); 813 814 uint32_t c = bti - bt->offsets[group]; 815 while (used_mask) { 816 int i = u_bit_scan64(&used_mask); 817 if (c == 0) 818 return i; 819 c--; 820 } 821 822 return IRIS_SURFACE_NOT_USED; 823} 824 825static void 826rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt, 827 nir_instr *instr, nir_src *src, 828 enum iris_surface_group group) 829{ 830 assert(bt->sizes[group] > 0); 831 832 b->cursor = nir_before_instr(instr); 833 nir_ssa_def *bti; 834 if (nir_src_is_const(*src)) { 835 uint32_t index = nir_src_as_uint(*src); 836 bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index), 837 src->ssa->bit_size); 838 } else { 839 /* Indirect usage makes all the surfaces of the group to be available, 840 * so we can just add the base. 841 */ 842 assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group])); 843 bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]); 844 } 845 nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti)); 846} 847 848static void 849mark_used_with_src(struct iris_binding_table *bt, nir_src *src, 850 enum iris_surface_group group) 851{ 852 assert(bt->sizes[group] > 0); 853 854 if (nir_src_is_const(*src)) { 855 uint64_t index = nir_src_as_uint(*src); 856 assert(index < bt->sizes[group]); 857 bt->used_mask[group] |= 1ull << index; 858 } else { 859 /* There's an indirect usage, we need all the surfaces. */ 860 bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]); 861 } 862} 863 864static bool 865skip_compacting_binding_tables(void) 866{ 867 static int skip = -1; 868 if (skip < 0) 869 skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false); 870 return skip; 871} 872 873/** 874 * Set up the binding table indices and apply to the shader. 875 */ 876static void 877iris_setup_binding_table(const struct intel_device_info *devinfo, 878 struct nir_shader *nir, 879 struct iris_binding_table *bt, 880 unsigned num_render_targets, 881 unsigned num_system_values, 882 unsigned num_cbufs) 883{ 884 const struct shader_info *info = &nir->info; 885 886 memset(bt, 0, sizeof(*bt)); 887 888 /* Set the sizes for each surface group. For some groups, we already know 889 * upfront how many will be used, so mark them. 890 */ 891 if (info->stage == MESA_SHADER_FRAGMENT) { 892 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets; 893 /* All render targets used. */ 894 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] = 895 BITFIELD64_MASK(num_render_targets); 896 897 /* Setup render target read surface group in order to support non-coherent 898 * framebuffer fetch on Gfx8 899 */ 900 if (devinfo->ver == 8 && info->outputs_read) { 901 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets; 902 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = 903 BITFIELD64_MASK(num_render_targets); 904 } 905 } else if (info->stage == MESA_SHADER_COMPUTE) { 906 bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1; 907 } 908 909 bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = BITSET_LAST_BIT(info->textures_used); 910 bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used[0]; 911 912 bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images; 913 914 /* Allocate an extra slot in the UBO section for NIR constants. 915 * Binding table compaction will remove it if unnecessary. 916 * 917 * We don't include them in iris_compiled_shader::num_cbufs because 918 * they are uploaded separately from shs->constbuf[], but from a shader 919 * point of view, they're another UBO (at the end of the section). 920 */ 921 bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1; 922 923 bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos; 924 925 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) 926 assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS); 927 928 /* Mark surfaces used for the cases we don't have the information available 929 * upfront. 930 */ 931 nir_function_impl *impl = nir_shader_get_entrypoint(nir); 932 nir_foreach_block (block, impl) { 933 nir_foreach_instr (instr, block) { 934 if (instr->type != nir_instr_type_intrinsic) 935 continue; 936 937 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 938 switch (intrin->intrinsic) { 939 case nir_intrinsic_load_num_workgroups: 940 bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1; 941 break; 942 943 case nir_intrinsic_load_output: 944 if (devinfo->ver == 8) { 945 mark_used_with_src(bt, &intrin->src[0], 946 IRIS_SURFACE_GROUP_RENDER_TARGET_READ); 947 } 948 break; 949 950 case nir_intrinsic_image_size: 951 case nir_intrinsic_image_load: 952 case nir_intrinsic_image_store: 953 case nir_intrinsic_image_atomic_add: 954 case nir_intrinsic_image_atomic_imin: 955 case nir_intrinsic_image_atomic_umin: 956 case nir_intrinsic_image_atomic_imax: 957 case nir_intrinsic_image_atomic_umax: 958 case nir_intrinsic_image_atomic_and: 959 case nir_intrinsic_image_atomic_or: 960 case nir_intrinsic_image_atomic_xor: 961 case nir_intrinsic_image_atomic_exchange: 962 case nir_intrinsic_image_atomic_comp_swap: 963 case nir_intrinsic_image_load_raw_intel: 964 case nir_intrinsic_image_store_raw_intel: 965 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE); 966 break; 967 968 case nir_intrinsic_load_ubo: 969 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO); 970 break; 971 972 case nir_intrinsic_store_ssbo: 973 mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO); 974 break; 975 976 case nir_intrinsic_get_ssbo_size: 977 case nir_intrinsic_ssbo_atomic_add: 978 case nir_intrinsic_ssbo_atomic_imin: 979 case nir_intrinsic_ssbo_atomic_umin: 980 case nir_intrinsic_ssbo_atomic_imax: 981 case nir_intrinsic_ssbo_atomic_umax: 982 case nir_intrinsic_ssbo_atomic_and: 983 case nir_intrinsic_ssbo_atomic_or: 984 case nir_intrinsic_ssbo_atomic_xor: 985 case nir_intrinsic_ssbo_atomic_exchange: 986 case nir_intrinsic_ssbo_atomic_comp_swap: 987 case nir_intrinsic_ssbo_atomic_fmin: 988 case nir_intrinsic_ssbo_atomic_fmax: 989 case nir_intrinsic_ssbo_atomic_fcomp_swap: 990 case nir_intrinsic_load_ssbo: 991 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO); 992 break; 993 994 default: 995 break; 996 } 997 } 998 } 999 1000 /* When disable we just mark everything as used. */ 1001 if (unlikely(skip_compacting_binding_tables())) { 1002 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) 1003 bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]); 1004 } 1005 1006 /* Calculate the offsets and the binding table size based on the used 1007 * surfaces. After this point, the functions to go between "group indices" 1008 * and binding table indices can be used. 1009 */ 1010 uint32_t next = 0; 1011 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) { 1012 if (bt->used_mask[i] != 0) { 1013 bt->offsets[i] = next; 1014 next += util_bitcount64(bt->used_mask[i]); 1015 } 1016 } 1017 bt->size_bytes = next * 4; 1018 1019 if (INTEL_DEBUG(DEBUG_BT)) { 1020 iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt); 1021 } 1022 1023 /* Apply the binding table indices. The backend compiler is not expected 1024 * to change those, as we haven't set any of the *_start entries in brw 1025 * binding_table. 1026 */ 1027 nir_builder b; 1028 nir_builder_init(&b, impl); 1029 1030 nir_foreach_block (block, impl) { 1031 nir_foreach_instr (instr, block) { 1032 if (instr->type == nir_instr_type_tex) { 1033 nir_tex_instr *tex = nir_instr_as_tex(instr); 1034 tex->texture_index = 1035 iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE, 1036 tex->texture_index); 1037 continue; 1038 } 1039 1040 if (instr->type != nir_instr_type_intrinsic) 1041 continue; 1042 1043 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1044 switch (intrin->intrinsic) { 1045 case nir_intrinsic_image_size: 1046 case nir_intrinsic_image_load: 1047 case nir_intrinsic_image_store: 1048 case nir_intrinsic_image_atomic_add: 1049 case nir_intrinsic_image_atomic_imin: 1050 case nir_intrinsic_image_atomic_umin: 1051 case nir_intrinsic_image_atomic_imax: 1052 case nir_intrinsic_image_atomic_umax: 1053 case nir_intrinsic_image_atomic_and: 1054 case nir_intrinsic_image_atomic_or: 1055 case nir_intrinsic_image_atomic_xor: 1056 case nir_intrinsic_image_atomic_exchange: 1057 case nir_intrinsic_image_atomic_comp_swap: 1058 case nir_intrinsic_image_load_raw_intel: 1059 case nir_intrinsic_image_store_raw_intel: 1060 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0], 1061 IRIS_SURFACE_GROUP_IMAGE); 1062 break; 1063 1064 case nir_intrinsic_load_ubo: 1065 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0], 1066 IRIS_SURFACE_GROUP_UBO); 1067 break; 1068 1069 case nir_intrinsic_store_ssbo: 1070 rewrite_src_with_bti(&b, bt, instr, &intrin->src[1], 1071 IRIS_SURFACE_GROUP_SSBO); 1072 break; 1073 1074 case nir_intrinsic_load_output: 1075 if (devinfo->ver == 8) { 1076 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0], 1077 IRIS_SURFACE_GROUP_RENDER_TARGET_READ); 1078 } 1079 break; 1080 1081 case nir_intrinsic_get_ssbo_size: 1082 case nir_intrinsic_ssbo_atomic_add: 1083 case nir_intrinsic_ssbo_atomic_imin: 1084 case nir_intrinsic_ssbo_atomic_umin: 1085 case nir_intrinsic_ssbo_atomic_imax: 1086 case nir_intrinsic_ssbo_atomic_umax: 1087 case nir_intrinsic_ssbo_atomic_and: 1088 case nir_intrinsic_ssbo_atomic_or: 1089 case nir_intrinsic_ssbo_atomic_xor: 1090 case nir_intrinsic_ssbo_atomic_exchange: 1091 case nir_intrinsic_ssbo_atomic_comp_swap: 1092 case nir_intrinsic_ssbo_atomic_fmin: 1093 case nir_intrinsic_ssbo_atomic_fmax: 1094 case nir_intrinsic_ssbo_atomic_fcomp_swap: 1095 case nir_intrinsic_load_ssbo: 1096 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0], 1097 IRIS_SURFACE_GROUP_SSBO); 1098 break; 1099 1100 default: 1101 break; 1102 } 1103 } 1104 } 1105} 1106 1107static void 1108iris_debug_recompile(struct iris_screen *screen, 1109 struct pipe_debug_callback *dbg, 1110 struct iris_uncompiled_shader *ish, 1111 const struct brw_base_prog_key *key) 1112{ 1113 if (!ish || list_is_empty(&ish->variants) 1114 || list_is_singular(&ish->variants)) 1115 return; 1116 1117 const struct intel_device_info *devinfo = &screen->devinfo; 1118 const struct brw_compiler *c = screen->compiler; 1119 const struct shader_info *info = &ish->nir->info; 1120 1121 brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n", 1122 _mesa_shader_stage_to_string(info->stage), 1123 info->name ? info->name : "(no identifier)", 1124 info->label ? info->label : ""); 1125 1126 struct iris_compiled_shader *shader = 1127 list_first_entry(&ish->variants, struct iris_compiled_shader, link); 1128 const void *old_iris_key = &shader->key; 1129 1130 union brw_any_prog_key old_key; 1131 1132 switch (info->stage) { 1133 case MESA_SHADER_VERTEX: 1134 old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key); 1135 break; 1136 case MESA_SHADER_TESS_CTRL: 1137 old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key); 1138 break; 1139 case MESA_SHADER_TESS_EVAL: 1140 old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key); 1141 break; 1142 case MESA_SHADER_GEOMETRY: 1143 old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key); 1144 break; 1145 case MESA_SHADER_FRAGMENT: 1146 old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key); 1147 break; 1148 case MESA_SHADER_COMPUTE: 1149 old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key); 1150 break; 1151 default: 1152 unreachable("invalid shader stage"); 1153 } 1154 1155 brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key); 1156} 1157 1158static void 1159check_urb_size(struct iris_context *ice, 1160 unsigned needed_size, 1161 gl_shader_stage stage) 1162{ 1163 unsigned last_allocated_size = ice->shaders.urb.size[stage]; 1164 1165 /* If the last URB allocation wasn't large enough for our needs, 1166 * flag it as needing to be reconfigured. Otherwise, we can use 1167 * the existing config. However, if the URB is constrained, and 1168 * we can shrink our size for this stage, we may be able to gain 1169 * extra concurrency by reconfiguring it to be smaller. Do so. 1170 */ 1171 if (last_allocated_size < needed_size || 1172 (ice->shaders.urb.constrained && last_allocated_size > needed_size)) { 1173 ice->state.dirty |= IRIS_DIRTY_URB; 1174 } 1175} 1176 1177/** 1178 * Get the shader for the last enabled geometry stage. 1179 * 1180 * This stage is the one which will feed stream output and the rasterizer. 1181 */ 1182static gl_shader_stage 1183last_vue_stage(struct iris_context *ice) 1184{ 1185 if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) 1186 return MESA_SHADER_GEOMETRY; 1187 1188 if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) 1189 return MESA_SHADER_TESS_EVAL; 1190 1191 return MESA_SHADER_VERTEX; 1192} 1193 1194/** 1195 * \param added Set to \c true if the variant was added to the list (i.e., a 1196 * variant matching \c key was not found). Set to \c false 1197 * otherwise. 1198 */ 1199static inline struct iris_compiled_shader * 1200find_or_add_variant(const struct iris_screen *screen, 1201 struct iris_uncompiled_shader *ish, 1202 enum iris_program_cache_id cache_id, 1203 const void *key, unsigned key_size, 1204 bool *added) 1205{ 1206 struct list_head *start = ish->variants.next; 1207 1208 *added = false; 1209 1210 if (screen->precompile) { 1211 /* Check the first list entry. There will always be at least one 1212 * variant in the list (most likely the precompile variant), and 1213 * other contexts only append new variants, so we can safely check 1214 * it without locking, saving that cost in the common case. 1215 */ 1216 struct iris_compiled_shader *first = 1217 list_first_entry(&ish->variants, struct iris_compiled_shader, link); 1218 1219 if (memcmp(&first->key, key, key_size) == 0) { 1220 util_queue_fence_wait(&first->ready); 1221 return first; 1222 } 1223 1224 /* Skip this one in the loop below */ 1225 start = first->link.next; 1226 } 1227 1228 struct iris_compiled_shader *variant = NULL; 1229 1230 /* If it doesn't match, we have to walk the list; other contexts may be 1231 * concurrently appending shaders to it, so we need to lock here. 1232 */ 1233 simple_mtx_lock(&ish->lock); 1234 1235 list_for_each_entry_from(struct iris_compiled_shader, v, start, 1236 &ish->variants, link) { 1237 if (memcmp(&v->key, key, key_size) == 0) { 1238 variant = v; 1239 break; 1240 } 1241 } 1242 1243 if (variant == NULL) { 1244 variant = iris_create_shader_variant(screen, NULL, cache_id, 1245 key_size, key); 1246 1247 /* Append our new variant to the shader's variant list. */ 1248 list_addtail(&variant->link, &ish->variants); 1249 *added = true; 1250 1251 simple_mtx_unlock(&ish->lock); 1252 } else { 1253 simple_mtx_unlock(&ish->lock); 1254 1255 util_queue_fence_wait(&variant->ready); 1256 } 1257 1258 return variant; 1259} 1260 1261static void 1262iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata, 1263 UNUSED int thread_index) 1264{ 1265 free(_job); 1266} 1267 1268static void 1269iris_schedule_compile(struct iris_screen *screen, 1270 struct util_queue_fence *ready_fence, 1271 struct pipe_debug_callback *dbg, 1272 struct iris_threaded_compile_job *job, 1273 util_queue_execute_func execute) 1274 1275{ 1276 util_queue_fence_init(ready_fence); 1277 1278 struct util_async_debug_callback async_debug; 1279 1280 if (dbg) { 1281 u_async_debug_init(&async_debug); 1282 job->dbg = &async_debug.base; 1283 } 1284 1285 util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute, 1286 iris_threaded_compile_job_delete, 0); 1287 1288 if (screen->driconf.sync_compile || dbg) 1289 util_queue_fence_wait(ready_fence); 1290 1291 if (dbg) { 1292 u_async_debug_drain(&async_debug, dbg); 1293 u_async_debug_cleanup(&async_debug); 1294 } 1295} 1296 1297/** 1298 * Compile a vertex shader, and upload the assembly. 1299 */ 1300static void 1301iris_compile_vs(struct iris_screen *screen, 1302 struct u_upload_mgr *uploader, 1303 struct pipe_debug_callback *dbg, 1304 struct iris_uncompiled_shader *ish, 1305 struct iris_compiled_shader *shader) 1306{ 1307 const struct brw_compiler *compiler = screen->compiler; 1308 const struct intel_device_info *devinfo = &screen->devinfo; 1309 void *mem_ctx = ralloc_context(NULL); 1310 struct brw_vs_prog_data *vs_prog_data = 1311 rzalloc(mem_ctx, struct brw_vs_prog_data); 1312 struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base; 1313 struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 1314 enum brw_param_builtin *system_values; 1315 unsigned num_system_values; 1316 unsigned num_cbufs; 1317 1318 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 1319 const struct iris_vs_prog_key *const key = &shader->key.vs; 1320 1321 if (key->vue.nr_userclip_plane_consts) { 1322 nir_function_impl *impl = nir_shader_get_entrypoint(nir); 1323 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1, 1324 true, false, NULL); 1325 nir_lower_io_to_temporaries(nir, impl, true, false); 1326 nir_lower_global_vars_to_local(nir); 1327 nir_lower_vars_to_ssa(nir); 1328 nir_shader_gather_info(nir, impl); 1329 } 1330 1331 prog_data->use_alt_mode = nir->info.is_arb_asm; 1332 1333 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values, 1334 &num_system_values, &num_cbufs); 1335 1336 struct iris_binding_table bt; 1337 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, 1338 num_system_values, num_cbufs); 1339 1340 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); 1341 1342 brw_compute_vue_map(devinfo, 1343 &vue_prog_data->vue_map, nir->info.outputs_written, 1344 nir->info.separate_shader, /* pos_slots */ 1); 1345 1346 struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key); 1347 1348 struct brw_compile_vs_params params = { 1349 .nir = nir, 1350 .key = &brw_key, 1351 .prog_data = vs_prog_data, 1352 .log_data = dbg, 1353 }; 1354 1355 const unsigned *program = brw_compile_vs(compiler, mem_ctx, ¶ms); 1356 if (program == NULL) { 1357 dbg_printf("Failed to compile vertex shader: %s\n", params.error_str); 1358 ralloc_free(mem_ctx); 1359 1360 shader->compilation_failed = true; 1361 util_queue_fence_signal(&shader->ready); 1362 1363 return; 1364 } 1365 1366 shader->compilation_failed = false; 1367 1368 iris_debug_recompile(screen, dbg, ish, &brw_key.base); 1369 1370 uint32_t *so_decls = 1371 screen->vtbl.create_so_decl_list(&ish->stream_output, 1372 &vue_prog_data->vue_map); 1373 1374 iris_finalize_program(shader, prog_data, so_decls, system_values, 1375 num_system_values, 0, num_cbufs, &bt); 1376 1377 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS, 1378 sizeof(*key), key, program); 1379 1380 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key)); 1381 1382 ralloc_free(mem_ctx); 1383} 1384 1385/** 1386 * Update the current vertex shader variant. 1387 * 1388 * Fill out the key, look in the cache, compile and bind if needed. 1389 */ 1390static void 1391iris_update_compiled_vs(struct iris_context *ice) 1392{ 1393 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 1394 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX]; 1395 struct u_upload_mgr *uploader = ice->shaders.uploader_driver; 1396 struct iris_uncompiled_shader *ish = 1397 ice->shaders.uncompiled[MESA_SHADER_VERTEX]; 1398 1399 struct iris_vs_prog_key key = { KEY_ID(vue.base) }; 1400 screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key); 1401 1402 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS]; 1403 bool added; 1404 struct iris_compiled_shader *shader = 1405 find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added); 1406 1407 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader, 1408 &key, sizeof(key))) { 1409 iris_compile_vs(screen, uploader, &ice->dbg, ish, shader); 1410 } 1411 1412 if (shader->compilation_failed) 1413 shader = NULL; 1414 1415 if (old != shader) { 1416 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX], 1417 shader); 1418 ice->state.dirty |= IRIS_DIRTY_VF_SGVS; 1419 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS | 1420 IRIS_STAGE_DIRTY_BINDINGS_VS | 1421 IRIS_STAGE_DIRTY_CONSTANTS_VS; 1422 shs->sysvals_need_upload = true; 1423 1424 unsigned urb_entry_size = shader ? 1425 ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0; 1426 check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX); 1427 } 1428} 1429 1430/** 1431 * Get the shader_info for a given stage, or NULL if the stage is disabled. 1432 */ 1433const struct shader_info * 1434iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage) 1435{ 1436 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage]; 1437 1438 if (!ish) 1439 return NULL; 1440 1441 const nir_shader *nir = ish->nir; 1442 return &nir->info; 1443} 1444 1445/** 1446 * Get the union of TCS output and TES input slots. 1447 * 1448 * TCS and TES need to agree on a common URB entry layout. In particular, 1449 * the data for all patch vertices is stored in a single URB entry (unlike 1450 * GS which has one entry per input vertex). This means that per-vertex 1451 * array indexing needs a stride. 1452 * 1453 * SSO requires locations to match, but doesn't require the number of 1454 * outputs/inputs to match (in fact, the TCS often has extra outputs). 1455 * So, we need to take the extra step of unifying these on the fly. 1456 */ 1457static void 1458get_unified_tess_slots(const struct iris_context *ice, 1459 uint64_t *per_vertex_slots, 1460 uint32_t *per_patch_slots) 1461{ 1462 const struct shader_info *tcs = 1463 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL); 1464 const struct shader_info *tes = 1465 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL); 1466 1467 *per_vertex_slots = tes->inputs_read; 1468 *per_patch_slots = tes->patch_inputs_read; 1469 1470 if (tcs) { 1471 *per_vertex_slots |= tcs->outputs_written; 1472 *per_patch_slots |= tcs->patch_outputs_written; 1473 } 1474} 1475 1476/** 1477 * Compile a tessellation control shader, and upload the assembly. 1478 */ 1479static void 1480iris_compile_tcs(struct iris_screen *screen, 1481 struct hash_table *passthrough_ht, 1482 struct u_upload_mgr *uploader, 1483 struct pipe_debug_callback *dbg, 1484 struct iris_uncompiled_shader *ish, 1485 struct iris_compiled_shader *shader) 1486{ 1487 const struct brw_compiler *compiler = screen->compiler; 1488 const struct nir_shader_compiler_options *options = 1489 compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions; 1490 void *mem_ctx = ralloc_context(NULL); 1491 struct brw_tcs_prog_data *tcs_prog_data = 1492 rzalloc(mem_ctx, struct brw_tcs_prog_data); 1493 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; 1494 struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 1495 const struct intel_device_info *devinfo = &screen->devinfo; 1496 enum brw_param_builtin *system_values = NULL; 1497 unsigned num_system_values = 0; 1498 unsigned num_cbufs = 0; 1499 1500 nir_shader *nir; 1501 1502 struct iris_binding_table bt; 1503 1504 const struct iris_tcs_prog_key *const key = &shader->key.tcs; 1505 struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key); 1506 1507 if (ish) { 1508 nir = nir_shader_clone(mem_ctx, ish->nir); 1509 1510 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values, 1511 &num_system_values, &num_cbufs); 1512 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, 1513 num_system_values, num_cbufs); 1514 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); 1515 } else { 1516 nir = 1517 brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, &brw_key); 1518 1519 /* Reserve space for passing the default tess levels as constants. */ 1520 num_cbufs = 1; 1521 num_system_values = 8; 1522 system_values = 1523 rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values); 1524 prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values); 1525 prog_data->nr_params = num_system_values; 1526 1527 if (key->tes_primitive_mode == GL_QUADS) { 1528 for (int i = 0; i < 4; i++) 1529 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; 1530 1531 system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; 1532 system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y; 1533 } else if (key->tes_primitive_mode == GL_TRIANGLES) { 1534 for (int i = 0; i < 3; i++) 1535 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; 1536 1537 system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; 1538 } else { 1539 assert(key->tes_primitive_mode == GL_ISOLINES); 1540 system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y; 1541 system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; 1542 } 1543 1544 /* Manually setup the TCS binding table. */ 1545 memset(&bt, 0, sizeof(bt)); 1546 bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1; 1547 bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1; 1548 bt.size_bytes = 4; 1549 1550 prog_data->ubo_ranges[0].length = 1; 1551 } 1552 1553 char *error_str = NULL; 1554 const unsigned *program = 1555 brw_compile_tcs(compiler, dbg, mem_ctx, &brw_key, tcs_prog_data, 1556 nir, -1, NULL, &error_str); 1557 if (program == NULL) { 1558 dbg_printf("Failed to compile control shader: %s\n", error_str); 1559 ralloc_free(mem_ctx); 1560 1561 shader->compilation_failed = true; 1562 util_queue_fence_signal(&shader->ready); 1563 1564 return; 1565 } 1566 1567 shader->compilation_failed = false; 1568 1569 iris_debug_recompile(screen, dbg, ish, &brw_key.base); 1570 1571 iris_finalize_program(shader, prog_data, NULL, system_values, 1572 num_system_values, 0, num_cbufs, &bt); 1573 1574 iris_upload_shader(screen, ish, shader, passthrough_ht, uploader, 1575 IRIS_CACHE_TCS, sizeof(*key), key, program); 1576 1577 if (ish) 1578 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key)); 1579 1580 ralloc_free(mem_ctx); 1581} 1582 1583/** 1584 * Update the current tessellation control shader variant. 1585 * 1586 * Fill out the key, look in the cache, compile and bind if needed. 1587 */ 1588static void 1589iris_update_compiled_tcs(struct iris_context *ice) 1590{ 1591 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL]; 1592 struct iris_uncompiled_shader *tcs = 1593 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL]; 1594 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 1595 struct u_upload_mgr *uploader = ice->shaders.uploader_driver; 1596 const struct brw_compiler *compiler = screen->compiler; 1597 const struct intel_device_info *devinfo = &screen->devinfo; 1598 1599 const struct shader_info *tes_info = 1600 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL); 1601 struct iris_tcs_prog_key key = { 1602 .vue.base.program_string_id = tcs ? tcs->program_id : 0, 1603 .tes_primitive_mode = tes_info->tess.primitive_mode, 1604 .input_vertices = 1605 !tcs || compiler->use_tcs_8_patch ? ice->state.vertices_per_patch : 0, 1606 .quads_workaround = devinfo->ver < 9 && 1607 tes_info->tess.primitive_mode == GL_QUADS && 1608 tes_info->tess.spacing == TESS_SPACING_EQUAL, 1609 }; 1610 get_unified_tess_slots(ice, &key.outputs_written, 1611 &key.patch_outputs_written); 1612 screen->vtbl.populate_tcs_key(ice, &key); 1613 1614 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS]; 1615 struct iris_compiled_shader *shader; 1616 bool added = false; 1617 1618 if (tcs != NULL) { 1619 shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key, 1620 sizeof(key), &added); 1621 } else { 1622 /* Look for and possibly create a passthrough TCS */ 1623 shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key); 1624 1625 1626 if (shader == NULL) { 1627 shader = iris_create_shader_variant(screen, ice->shaders.cache, 1628 IRIS_CACHE_TCS, sizeof(key), &key); 1629 added = true; 1630 } 1631 1632 } 1633 1634 /* If the shader was not found in (whichever cache), call iris_compile_tcs 1635 * if either ish is NULL or the shader could not be found in the disk 1636 * cache. 1637 */ 1638 if (added && 1639 (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader, 1640 &key, sizeof(key)))) { 1641 iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs, 1642 shader); 1643 } 1644 1645 if (shader->compilation_failed) 1646 shader = NULL; 1647 1648 if (old != shader) { 1649 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], 1650 shader); 1651 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS | 1652 IRIS_STAGE_DIRTY_BINDINGS_TCS | 1653 IRIS_STAGE_DIRTY_CONSTANTS_TCS; 1654 shs->sysvals_need_upload = true; 1655 1656 unsigned urb_entry_size = shader ? 1657 ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0; 1658 check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL); 1659 } 1660} 1661 1662/** 1663 * Compile a tessellation evaluation shader, and upload the assembly. 1664 */ 1665static void 1666iris_compile_tes(struct iris_screen *screen, 1667 struct u_upload_mgr *uploader, 1668 struct pipe_debug_callback *dbg, 1669 struct iris_uncompiled_shader *ish, 1670 struct iris_compiled_shader *shader) 1671{ 1672 const struct brw_compiler *compiler = screen->compiler; 1673 void *mem_ctx = ralloc_context(NULL); 1674 struct brw_tes_prog_data *tes_prog_data = 1675 rzalloc(mem_ctx, struct brw_tes_prog_data); 1676 struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base; 1677 struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 1678 enum brw_param_builtin *system_values; 1679 const struct intel_device_info *devinfo = &screen->devinfo; 1680 unsigned num_system_values; 1681 unsigned num_cbufs; 1682 1683 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 1684 const struct iris_tes_prog_key *const key = &shader->key.tes; 1685 1686 if (key->vue.nr_userclip_plane_consts) { 1687 nir_function_impl *impl = nir_shader_get_entrypoint(nir); 1688 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1, 1689 true, false, NULL); 1690 nir_lower_io_to_temporaries(nir, impl, true, false); 1691 nir_lower_global_vars_to_local(nir); 1692 nir_lower_vars_to_ssa(nir); 1693 nir_shader_gather_info(nir, impl); 1694 } 1695 1696 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values, 1697 &num_system_values, &num_cbufs); 1698 1699 struct iris_binding_table bt; 1700 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, 1701 num_system_values, num_cbufs); 1702 1703 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); 1704 1705 struct brw_vue_map input_vue_map; 1706 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, 1707 key->patch_inputs_read); 1708 1709 struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(devinfo, key); 1710 1711 char *error_str = NULL; 1712 const unsigned *program = 1713 brw_compile_tes(compiler, dbg, mem_ctx, &brw_key, &input_vue_map, 1714 tes_prog_data, nir, -1, NULL, &error_str); 1715 if (program == NULL) { 1716 dbg_printf("Failed to compile evaluation shader: %s\n", error_str); 1717 ralloc_free(mem_ctx); 1718 1719 shader->compilation_failed = true; 1720 util_queue_fence_signal(&shader->ready); 1721 1722 return; 1723 } 1724 1725 shader->compilation_failed = false; 1726 1727 iris_debug_recompile(screen, dbg, ish, &brw_key.base); 1728 1729 uint32_t *so_decls = 1730 screen->vtbl.create_so_decl_list(&ish->stream_output, 1731 &vue_prog_data->vue_map); 1732 1733 iris_finalize_program(shader, prog_data, so_decls, system_values, 1734 num_system_values, 0, num_cbufs, &bt); 1735 1736 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES, 1737 sizeof(*key), key, program); 1738 1739 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key)); 1740 1741 ralloc_free(mem_ctx); 1742} 1743 1744/** 1745 * Update the current tessellation evaluation shader variant. 1746 * 1747 * Fill out the key, look in the cache, compile and bind if needed. 1748 */ 1749static void 1750iris_update_compiled_tes(struct iris_context *ice) 1751{ 1752 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 1753 struct u_upload_mgr *uploader = ice->shaders.uploader_driver; 1754 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL]; 1755 struct iris_uncompiled_shader *ish = 1756 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; 1757 1758 struct iris_tes_prog_key key = { KEY_ID(vue.base) }; 1759 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read); 1760 screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key); 1761 1762 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES]; 1763 bool added; 1764 struct iris_compiled_shader *shader = 1765 find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added); 1766 1767 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader, 1768 &key, sizeof(key))) { 1769 iris_compile_tes(screen, uploader, &ice->dbg, ish, shader); 1770 } 1771 1772 if (shader->compilation_failed) 1773 shader = NULL; 1774 1775 if (old != shader) { 1776 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], 1777 shader); 1778 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES | 1779 IRIS_STAGE_DIRTY_BINDINGS_TES | 1780 IRIS_STAGE_DIRTY_CONSTANTS_TES; 1781 shs->sysvals_need_upload = true; 1782 1783 unsigned urb_entry_size = shader ? 1784 ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0; 1785 check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL); 1786 } 1787 1788 /* TODO: Could compare and avoid flagging this. */ 1789 const struct shader_info *tes_info = &ish->nir->info; 1790 if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) { 1791 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES; 1792 ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true; 1793 } 1794} 1795 1796/** 1797 * Compile a geometry shader, and upload the assembly. 1798 */ 1799static void 1800iris_compile_gs(struct iris_screen *screen, 1801 struct u_upload_mgr *uploader, 1802 struct pipe_debug_callback *dbg, 1803 struct iris_uncompiled_shader *ish, 1804 struct iris_compiled_shader *shader) 1805{ 1806 const struct brw_compiler *compiler = screen->compiler; 1807 const struct intel_device_info *devinfo = &screen->devinfo; 1808 void *mem_ctx = ralloc_context(NULL); 1809 struct brw_gs_prog_data *gs_prog_data = 1810 rzalloc(mem_ctx, struct brw_gs_prog_data); 1811 struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base; 1812 struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 1813 enum brw_param_builtin *system_values; 1814 unsigned num_system_values; 1815 unsigned num_cbufs; 1816 1817 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 1818 const struct iris_gs_prog_key *const key = &shader->key.gs; 1819 1820 if (key->vue.nr_userclip_plane_consts) { 1821 nir_function_impl *impl = nir_shader_get_entrypoint(nir); 1822 nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1, 1823 false, NULL); 1824 nir_lower_io_to_temporaries(nir, impl, true, false); 1825 nir_lower_global_vars_to_local(nir); 1826 nir_lower_vars_to_ssa(nir); 1827 nir_shader_gather_info(nir, impl); 1828 } 1829 1830 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values, 1831 &num_system_values, &num_cbufs); 1832 1833 struct iris_binding_table bt; 1834 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, 1835 num_system_values, num_cbufs); 1836 1837 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); 1838 1839 brw_compute_vue_map(devinfo, 1840 &vue_prog_data->vue_map, nir->info.outputs_written, 1841 nir->info.separate_shader, /* pos_slots */ 1); 1842 1843 struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key); 1844 1845 char *error_str = NULL; 1846 const unsigned *program = 1847 brw_compile_gs(compiler, dbg, mem_ctx, &brw_key, gs_prog_data, 1848 nir, -1, NULL, &error_str); 1849 if (program == NULL) { 1850 dbg_printf("Failed to compile geometry shader: %s\n", error_str); 1851 ralloc_free(mem_ctx); 1852 1853 shader->compilation_failed = true; 1854 util_queue_fence_signal(&shader->ready); 1855 1856 return; 1857 } 1858 1859 shader->compilation_failed = false; 1860 1861 iris_debug_recompile(screen, dbg, ish, &brw_key.base); 1862 1863 uint32_t *so_decls = 1864 screen->vtbl.create_so_decl_list(&ish->stream_output, 1865 &vue_prog_data->vue_map); 1866 1867 iris_finalize_program(shader, prog_data, so_decls, system_values, 1868 num_system_values, 0, num_cbufs, &bt); 1869 1870 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS, 1871 sizeof(*key), key, program); 1872 1873 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key)); 1874 1875 ralloc_free(mem_ctx); 1876} 1877 1878/** 1879 * Update the current geometry shader variant. 1880 * 1881 * Fill out the key, look in the cache, compile and bind if needed. 1882 */ 1883static void 1884iris_update_compiled_gs(struct iris_context *ice) 1885{ 1886 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY]; 1887 struct u_upload_mgr *uploader = ice->shaders.uploader_driver; 1888 struct iris_uncompiled_shader *ish = 1889 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]; 1890 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS]; 1891 struct iris_compiled_shader *shader = NULL; 1892 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 1893 1894 if (ish) { 1895 struct iris_gs_prog_key key = { KEY_ID(vue.base) }; 1896 screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key); 1897 1898 bool added; 1899 1900 shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key, 1901 sizeof(key), &added); 1902 1903 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader, 1904 &key, sizeof(key))) { 1905 iris_compile_gs(screen, uploader, &ice->dbg, ish, shader); 1906 } 1907 1908 if (shader->compilation_failed) 1909 shader = NULL; 1910 } 1911 1912 if (old != shader) { 1913 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY], 1914 shader); 1915 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS | 1916 IRIS_STAGE_DIRTY_BINDINGS_GS | 1917 IRIS_STAGE_DIRTY_CONSTANTS_GS; 1918 shs->sysvals_need_upload = true; 1919 1920 unsigned urb_entry_size = shader ? 1921 ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0; 1922 check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY); 1923 } 1924} 1925 1926/** 1927 * Compile a fragment (pixel) shader, and upload the assembly. 1928 */ 1929static void 1930iris_compile_fs(struct iris_screen *screen, 1931 struct u_upload_mgr *uploader, 1932 struct pipe_debug_callback *dbg, 1933 struct iris_uncompiled_shader *ish, 1934 struct iris_compiled_shader *shader, 1935 struct brw_vue_map *vue_map) 1936{ 1937 const struct brw_compiler *compiler = screen->compiler; 1938 void *mem_ctx = ralloc_context(NULL); 1939 struct brw_wm_prog_data *fs_prog_data = 1940 rzalloc(mem_ctx, struct brw_wm_prog_data); 1941 struct brw_stage_prog_data *prog_data = &fs_prog_data->base; 1942 enum brw_param_builtin *system_values; 1943 const struct intel_device_info *devinfo = &screen->devinfo; 1944 unsigned num_system_values; 1945 unsigned num_cbufs; 1946 1947 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 1948 const struct iris_fs_prog_key *const key = &shader->key.fs; 1949 1950 prog_data->use_alt_mode = nir->info.is_arb_asm; 1951 1952 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values, 1953 &num_system_values, &num_cbufs); 1954 1955 /* Lower output variables to load_output intrinsics before setting up 1956 * binding tables, so iris_setup_binding_table can map any load_output 1957 * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for 1958 * non-coherent framebuffer fetches. 1959 */ 1960 brw_nir_lower_fs_outputs(nir); 1961 1962 /* On Gfx11+, shader RT write messages have a "Null Render Target" bit 1963 * and do not need a binding table entry with a null surface. Earlier 1964 * generations need an entry for a null surface. 1965 */ 1966 int null_rts = devinfo->ver < 11 ? 1 : 0; 1967 1968 struct iris_binding_table bt; 1969 iris_setup_binding_table(devinfo, nir, &bt, 1970 MAX2(key->nr_color_regions, null_rts), 1971 num_system_values, num_cbufs); 1972 1973 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); 1974 1975 struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(devinfo, key); 1976 1977 struct brw_compile_fs_params params = { 1978 .nir = nir, 1979 .key = &brw_key, 1980 .prog_data = fs_prog_data, 1981 1982 .allow_spilling = true, 1983 .vue_map = vue_map, 1984 1985 .log_data = dbg, 1986 }; 1987 1988 const unsigned *program = brw_compile_fs(compiler, mem_ctx, ¶ms); 1989 if (program == NULL) { 1990 dbg_printf("Failed to compile fragment shader: %s\n", params.error_str); 1991 ralloc_free(mem_ctx); 1992 1993 shader->compilation_failed = true; 1994 util_queue_fence_signal(&shader->ready); 1995 1996 return; 1997 } 1998 1999 shader->compilation_failed = false; 2000 2001 iris_debug_recompile(screen, dbg, ish, &brw_key.base); 2002 2003 iris_finalize_program(shader, prog_data, NULL, system_values, 2004 num_system_values, 0, num_cbufs, &bt); 2005 2006 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS, 2007 sizeof(*key), key, program); 2008 2009 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key)); 2010 2011 ralloc_free(mem_ctx); 2012} 2013 2014/** 2015 * Update the current fragment shader variant. 2016 * 2017 * Fill out the key, look in the cache, compile and bind if needed. 2018 */ 2019static void 2020iris_update_compiled_fs(struct iris_context *ice) 2021{ 2022 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT]; 2023 struct u_upload_mgr *uploader = ice->shaders.uploader_driver; 2024 struct iris_uncompiled_shader *ish = 2025 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; 2026 struct iris_fs_prog_key key = { KEY_ID(base) }; 2027 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 2028 screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key); 2029 2030 struct brw_vue_map *last_vue_map = 2031 &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map; 2032 2033 if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP)) 2034 key.input_slots_valid = last_vue_map->slots_valid; 2035 2036 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS]; 2037 bool added; 2038 struct iris_compiled_shader *shader = 2039 find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key, 2040 sizeof(key), &added); 2041 2042 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader, 2043 &key, sizeof(key))) { 2044 iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map); 2045 } 2046 2047 if (shader->compilation_failed) 2048 shader = NULL; 2049 2050 if (old != shader) { 2051 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE 2052 // toggles. might be able to avoid flagging SBE too. 2053 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT], 2054 shader); 2055 ice->state.dirty |= IRIS_DIRTY_WM | 2056 IRIS_DIRTY_CLIP | 2057 IRIS_DIRTY_SBE; 2058 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS | 2059 IRIS_STAGE_DIRTY_BINDINGS_FS | 2060 IRIS_STAGE_DIRTY_CONSTANTS_FS; 2061 shs->sysvals_need_upload = true; 2062 } 2063} 2064 2065/** 2066 * Update the last enabled stage's VUE map. 2067 * 2068 * When the shader feeding the rasterizer's output interface changes, we 2069 * need to re-emit various packets. 2070 */ 2071static void 2072update_last_vue_map(struct iris_context *ice, 2073 struct iris_compiled_shader *shader) 2074{ 2075 struct brw_vue_prog_data *vue_prog_data = (void *) shader->prog_data; 2076 struct brw_vue_map *vue_map = &vue_prog_data->vue_map; 2077 struct brw_vue_map *old_map = !ice->shaders.last_vue_shader ? NULL : 2078 &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map; 2079 const uint64_t changed_slots = 2080 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid; 2081 2082 if (changed_slots & VARYING_BIT_VIEWPORT) { 2083 ice->state.num_viewports = 2084 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1; 2085 ice->state.dirty |= IRIS_DIRTY_CLIP | 2086 IRIS_DIRTY_SF_CL_VIEWPORT | 2087 IRIS_DIRTY_CC_VIEWPORT | 2088 IRIS_DIRTY_SCISSOR_RECT; 2089 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS | 2090 ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP]; 2091 } 2092 2093 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) { 2094 ice->state.dirty |= IRIS_DIRTY_SBE; 2095 } 2096 2097 iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader); 2098} 2099 2100static void 2101iris_update_pull_constant_descriptors(struct iris_context *ice, 2102 gl_shader_stage stage) 2103{ 2104 struct iris_compiled_shader *shader = ice->shaders.prog[stage]; 2105 2106 if (!shader || !shader->prog_data->has_ubo_pull) 2107 return; 2108 2109 struct iris_shader_state *shs = &ice->state.shaders[stage]; 2110 bool any_new_descriptors = 2111 shader->num_system_values > 0 && shs->sysvals_need_upload; 2112 2113 unsigned bound_cbufs = shs->bound_cbufs; 2114 2115 while (bound_cbufs) { 2116 const int i = u_bit_scan(&bound_cbufs); 2117 struct pipe_shader_buffer *cbuf = &shs->constbuf[i]; 2118 struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i]; 2119 if (!surf_state->res && cbuf->buffer) { 2120 iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state, 2121 ISL_SURF_USAGE_CONSTANT_BUFFER_BIT); 2122 any_new_descriptors = true; 2123 } 2124 } 2125 2126 if (any_new_descriptors) 2127 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage; 2128} 2129 2130/** 2131 * Update the current shader variants for the given state. 2132 * 2133 * This should be called on every draw call to ensure that the correct 2134 * shaders are bound. It will also flag any dirty state triggered by 2135 * swapping out those shaders. 2136 */ 2137void 2138iris_update_compiled_shaders(struct iris_context *ice) 2139{ 2140 const uint64_t stage_dirty = ice->state.stage_dirty; 2141 2142 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS | 2143 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) { 2144 struct iris_uncompiled_shader *tes = 2145 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; 2146 if (tes) { 2147 iris_update_compiled_tcs(ice); 2148 iris_update_compiled_tes(ice); 2149 } else { 2150 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL); 2151 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL); 2152 ice->state.stage_dirty |= 2153 IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES | 2154 IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES | 2155 IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES; 2156 2157 if (ice->shaders.urb.constrained) 2158 ice->state.dirty |= IRIS_DIRTY_URB; 2159 } 2160 } 2161 2162 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS) 2163 iris_update_compiled_vs(ice); 2164 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS) 2165 iris_update_compiled_gs(ice); 2166 2167 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS | 2168 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) { 2169 const struct iris_compiled_shader *gs = 2170 ice->shaders.prog[MESA_SHADER_GEOMETRY]; 2171 const struct iris_compiled_shader *tes = 2172 ice->shaders.prog[MESA_SHADER_TESS_EVAL]; 2173 2174 bool points_or_lines = false; 2175 2176 if (gs) { 2177 const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data; 2178 points_or_lines = 2179 gs_prog_data->output_topology == _3DPRIM_POINTLIST || 2180 gs_prog_data->output_topology == _3DPRIM_LINESTRIP; 2181 } else if (tes) { 2182 const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data; 2183 points_or_lines = 2184 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE || 2185 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT; 2186 } 2187 2188 if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) { 2189 /* Outbound to XY Clip enables */ 2190 ice->shaders.output_topology_is_points_or_lines = points_or_lines; 2191 ice->state.dirty |= IRIS_DIRTY_CLIP; 2192 } 2193 } 2194 2195 gl_shader_stage last_stage = last_vue_stage(ice); 2196 struct iris_compiled_shader *shader = ice->shaders.prog[last_stage]; 2197 struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage]; 2198 update_last_vue_map(ice, shader); 2199 if (ice->state.streamout != shader->streamout) { 2200 ice->state.streamout = shader->streamout; 2201 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT; 2202 } 2203 2204 if (ice->state.streamout_active) { 2205 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 2206 struct iris_stream_output_target *so = 2207 (void *) ice->state.so_target[i]; 2208 if (so) 2209 so->stride = ish->stream_output.stride[i] * sizeof(uint32_t); 2210 } 2211 } 2212 2213 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS) 2214 iris_update_compiled_fs(ice); 2215 2216 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) { 2217 if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i)) 2218 iris_update_pull_constant_descriptors(ice, i); 2219 } 2220} 2221 2222static void 2223iris_compile_cs(struct iris_screen *screen, 2224 struct u_upload_mgr *uploader, 2225 struct pipe_debug_callback *dbg, 2226 struct iris_uncompiled_shader *ish, 2227 struct iris_compiled_shader *shader) 2228{ 2229 const struct brw_compiler *compiler = screen->compiler; 2230 void *mem_ctx = ralloc_context(NULL); 2231 struct brw_cs_prog_data *cs_prog_data = 2232 rzalloc(mem_ctx, struct brw_cs_prog_data); 2233 struct brw_stage_prog_data *prog_data = &cs_prog_data->base; 2234 enum brw_param_builtin *system_values; 2235 const struct intel_device_info *devinfo = &screen->devinfo; 2236 unsigned num_system_values; 2237 unsigned num_cbufs; 2238 2239 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 2240 const struct iris_cs_prog_key *const key = &shader->key.cs; 2241 2242 NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics); 2243 2244 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 2245 ish->kernel_input_size, 2246 &system_values, &num_system_values, &num_cbufs); 2247 2248 struct iris_binding_table bt; 2249 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, 2250 num_system_values, num_cbufs); 2251 2252 struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(devinfo, key); 2253 2254 struct brw_compile_cs_params params = { 2255 .nir = nir, 2256 .key = &brw_key, 2257 .prog_data = cs_prog_data, 2258 .log_data = dbg, 2259 }; 2260 2261 const unsigned *program = brw_compile_cs(compiler, mem_ctx, ¶ms); 2262 if (program == NULL) { 2263 dbg_printf("Failed to compile compute shader: %s\n", params.error_str); 2264 2265 shader->compilation_failed = true; 2266 util_queue_fence_signal(&shader->ready); 2267 2268 return; 2269 } 2270 2271 shader->compilation_failed = false; 2272 2273 iris_debug_recompile(screen, dbg, ish, &brw_key.base); 2274 2275 iris_finalize_program(shader, prog_data, NULL, system_values, 2276 num_system_values, ish->kernel_input_size, num_cbufs, 2277 &bt); 2278 2279 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS, 2280 sizeof(*key), key, program); 2281 2282 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key)); 2283 2284 ralloc_free(mem_ctx); 2285} 2286 2287static void 2288iris_update_compiled_cs(struct iris_context *ice) 2289{ 2290 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE]; 2291 struct u_upload_mgr *uploader = ice->shaders.uploader_driver; 2292 struct iris_uncompiled_shader *ish = 2293 ice->shaders.uncompiled[MESA_SHADER_COMPUTE]; 2294 2295 struct iris_cs_prog_key key = { KEY_ID(base) }; 2296 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 2297 screen->vtbl.populate_cs_key(ice, &key); 2298 2299 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS]; 2300 bool added; 2301 struct iris_compiled_shader *shader = 2302 find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key, 2303 sizeof(key), &added); 2304 2305 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader, 2306 &key, sizeof(key))) { 2307 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader); 2308 } 2309 2310 if (shader->compilation_failed) 2311 shader = NULL; 2312 2313 if (old != shader) { 2314 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE], 2315 shader); 2316 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS | 2317 IRIS_STAGE_DIRTY_BINDINGS_CS | 2318 IRIS_STAGE_DIRTY_CONSTANTS_CS; 2319 shs->sysvals_need_upload = true; 2320 } 2321} 2322 2323void 2324iris_update_compiled_compute_shader(struct iris_context *ice) 2325{ 2326 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS) 2327 iris_update_compiled_cs(ice); 2328 2329 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) 2330 iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE); 2331} 2332 2333void 2334iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data, 2335 unsigned threads, 2336 uint32_t *dst) 2337{ 2338 assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0); 2339 assert(cs_prog_data->push.cross_thread.size == 0); 2340 assert(cs_prog_data->push.per_thread.dwords == 1); 2341 assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); 2342 for (unsigned t = 0; t < threads; t++) 2343 dst[8 * t] = t; 2344} 2345 2346/** 2347 * Allocate scratch BOs as needed for the given per-thread size and stage. 2348 */ 2349struct iris_bo * 2350iris_get_scratch_space(struct iris_context *ice, 2351 unsigned per_thread_scratch, 2352 gl_shader_stage stage) 2353{ 2354 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 2355 struct iris_bufmgr *bufmgr = screen->bufmgr; 2356 const struct intel_device_info *devinfo = &screen->devinfo; 2357 2358 unsigned encoded_size = ffs(per_thread_scratch) - 11; 2359 assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos)); 2360 assert(per_thread_scratch == 1 << (encoded_size + 10)); 2361 2362 /* On GFX version 12.5, scratch access changed to a surface-based model. 2363 * Instead of each shader type having its own layout based on IDs passed 2364 * from the relevant fixed-function unit, all scratch access is based on 2365 * thread IDs like it always has been for compute. 2366 */ 2367 if (devinfo->verx10 >= 125) 2368 stage = MESA_SHADER_COMPUTE; 2369 2370 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage]; 2371 2372 if (!*bop) { 2373 assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids)); 2374 uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage]; 2375 *bop = iris_bo_alloc(bufmgr, "scratch", size, 1, IRIS_MEMZONE_SHADER, 0); 2376 } 2377 2378 return *bop; 2379} 2380 2381const struct iris_state_ref * 2382iris_get_scratch_surf(struct iris_context *ice, 2383 unsigned per_thread_scratch) 2384{ 2385 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 2386 ASSERTED const struct intel_device_info *devinfo = &screen->devinfo; 2387 2388 assert(devinfo->verx10 >= 125); 2389 2390 unsigned encoded_size = ffs(per_thread_scratch) - 11; 2391 assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs)); 2392 assert(per_thread_scratch == 1 << (encoded_size + 10)); 2393 2394 struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size]; 2395 2396 if (ref->res) 2397 return ref; 2398 2399 struct iris_bo *scratch_bo = 2400 iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE); 2401 2402 void *map = upload_state(ice->state.bindless_uploader, ref, 2403 screen->isl_dev.ss.size, 64); 2404 2405 isl_buffer_fill_state(&screen->isl_dev, map, 2406 .address = scratch_bo->address, 2407 .size_B = scratch_bo->size, 2408 .format = ISL_FORMAT_RAW, 2409 .swizzle = ISL_SWIZZLE_IDENTITY, 2410 .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0), 2411 .stride_B = per_thread_scratch, 2412 .is_scratch = true); 2413 2414 return ref; 2415} 2416 2417/* ------------------------------------------------------------------- */ 2418 2419/** 2420 * The pipe->create_[stage]_state() driver hooks. 2421 * 2422 * Performs basic NIR preprocessing, records any state dependencies, and 2423 * returns an iris_uncompiled_shader as the Gallium CSO. 2424 * 2425 * Actual shader compilation to assembly happens later, at first use. 2426 */ 2427static void * 2428iris_create_uncompiled_shader(struct iris_screen *screen, 2429 nir_shader *nir, 2430 const struct pipe_stream_output_info *so_info) 2431{ 2432 struct iris_uncompiled_shader *ish = 2433 calloc(1, sizeof(struct iris_uncompiled_shader)); 2434 if (!ish) 2435 return NULL; 2436 2437 pipe_reference_init(&ish->ref, 1); 2438 list_inithead(&ish->variants); 2439 simple_mtx_init(&ish->lock, mtx_plain); 2440 2441 ish->uses_atomic_load_store = iris_uses_image_atomic(nir); 2442 2443 ish->program_id = get_new_program_id(screen); 2444 ish->nir = nir; 2445 if (so_info) { 2446 memcpy(&ish->stream_output, so_info, sizeof(*so_info)); 2447 update_so_info(&ish->stream_output, nir->info.outputs_written); 2448 } 2449 2450 if (screen->disk_cache) { 2451 /* Serialize the NIR to a binary blob that we can hash for the disk 2452 * cache. Drop unnecessary information (like variable names) 2453 * so the serialized NIR is smaller, and also to let us detect more 2454 * isomorphic shaders when hashing, increasing cache hits. 2455 */ 2456 struct blob blob; 2457 blob_init(&blob); 2458 nir_serialize(&blob, nir, true); 2459 _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1); 2460 blob_finish(&blob); 2461 } 2462 2463 return ish; 2464} 2465 2466static void * 2467iris_create_compute_state(struct pipe_context *ctx, 2468 const struct pipe_compute_state *state) 2469{ 2470 struct iris_context *ice = (void *) ctx; 2471 struct iris_screen *screen = (void *) ctx->screen; 2472 struct u_upload_mgr *uploader = ice->shaders.uploader_unsync; 2473 const nir_shader_compiler_options *options = 2474 screen->compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions; 2475 2476 nir_shader *nir; 2477 switch (state->ir_type) { 2478 case PIPE_SHADER_IR_NIR: 2479 nir = (void *)state->prog; 2480 break; 2481 2482 case PIPE_SHADER_IR_NIR_SERIALIZED: { 2483 struct blob_reader reader; 2484 const struct pipe_binary_program_header *hdr = state->prog; 2485 blob_reader_init(&reader, hdr->blob, hdr->num_bytes); 2486 nir = nir_deserialize(NULL, options, &reader); 2487 break; 2488 } 2489 2490 default: 2491 unreachable("Unsupported IR"); 2492 } 2493 2494 /* Most of iris doesn't really care about the difference between compute 2495 * shaders and kernels. We also tend to hard-code COMPUTE everywhere so 2496 * it's way easier if we just normalize to COMPUTE here. 2497 */ 2498 assert(nir->info.stage == MESA_SHADER_COMPUTE || 2499 nir->info.stage == MESA_SHADER_KERNEL); 2500 nir->info.stage = MESA_SHADER_COMPUTE; 2501 2502 struct iris_uncompiled_shader *ish = 2503 iris_create_uncompiled_shader(screen, nir, NULL); 2504 ish->kernel_input_size = state->req_input_mem; 2505 ish->kernel_shared_size = state->req_local_mem; 2506 2507 // XXX: disallow more than 64KB of shared variables 2508 2509 if (screen->precompile) { 2510 struct iris_cs_prog_key key = { KEY_ID(base) }; 2511 2512 struct iris_compiled_shader *shader = 2513 iris_create_shader_variant(screen, NULL, IRIS_CACHE_CS, 2514 sizeof(key), &key); 2515 2516 /* Append our new variant to the shader's variant list. */ 2517 list_addtail(&shader->link, &ish->variants); 2518 2519 if (!iris_disk_cache_retrieve(screen, uploader, ish, shader, 2520 &key, sizeof(key))) { 2521 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader); 2522 } 2523 } 2524 2525 return ish; 2526} 2527 2528static void 2529iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index) 2530{ 2531 const struct iris_threaded_compile_job *job = 2532 (struct iris_threaded_compile_job *) _job; 2533 2534 struct iris_screen *screen = job->screen; 2535 struct u_upload_mgr *uploader = job->uploader; 2536 struct pipe_debug_callback *dbg = job->dbg; 2537 struct iris_uncompiled_shader *ish = job->ish; 2538 struct iris_compiled_shader *shader = job->shader; 2539 2540 switch (ish->nir->info.stage) { 2541 case MESA_SHADER_VERTEX: 2542 iris_compile_vs(screen, uploader, dbg, ish, shader); 2543 break; 2544 case MESA_SHADER_TESS_CTRL: 2545 iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader); 2546 break; 2547 case MESA_SHADER_TESS_EVAL: 2548 iris_compile_tes(screen, uploader, dbg, ish, shader); 2549 break; 2550 case MESA_SHADER_GEOMETRY: 2551 iris_compile_gs(screen, uploader, dbg, ish, shader); 2552 break; 2553 case MESA_SHADER_FRAGMENT: 2554 iris_compile_fs(screen, uploader, dbg, ish, shader, NULL); 2555 break; 2556 2557 default: 2558 unreachable("Invalid shader stage."); 2559 } 2560} 2561 2562static void * 2563iris_create_shader_state(struct pipe_context *ctx, 2564 const struct pipe_shader_state *state) 2565{ 2566 struct iris_context *ice = (void *) ctx; 2567 struct iris_screen *screen = (void *) ctx->screen; 2568 struct nir_shader *nir; 2569 2570 if (state->type == PIPE_SHADER_IR_TGSI) 2571 nir = tgsi_to_nir(state->tokens, ctx->screen, false); 2572 else 2573 nir = state->ir.nir; 2574 2575 const struct shader_info *const info = &nir->info; 2576 struct iris_uncompiled_shader *ish = 2577 iris_create_uncompiled_shader(screen, nir, &state->stream_output); 2578 2579 union iris_any_prog_key key; 2580 unsigned key_size = 0; 2581 2582 memset(&key, 0, sizeof(key)); 2583 2584 switch (info->stage) { 2585 case MESA_SHADER_VERTEX: 2586 /* User clip planes */ 2587 if (info->clip_distance_array_size == 0) 2588 ish->nos |= (1ull << IRIS_NOS_RASTERIZER); 2589 2590 key.vs = (struct iris_vs_prog_key) { KEY_ID(vue.base) }; 2591 key_size = sizeof(key.vs); 2592 break; 2593 2594 case MESA_SHADER_TESS_CTRL: { 2595 const unsigned _GL_TRIANGLES = 0x0004; 2596 2597 key.tcs = (struct iris_tcs_prog_key) { 2598 KEY_ID(vue.base), 2599 // XXX: make sure the linker fills this out from the TES... 2600 .tes_primitive_mode = 2601 info->tess.primitive_mode ? info->tess.primitive_mode 2602 : _GL_TRIANGLES, 2603 .outputs_written = info->outputs_written, 2604 .patch_outputs_written = info->patch_outputs_written, 2605 }; 2606 2607 /* 8_PATCH mode needs the key to contain the input patch dimensionality. 2608 * We don't have that information, so we randomly guess that the input 2609 * and output patches are the same size. This is a bad guess, but we 2610 * can't do much better. 2611 */ 2612 if (screen->compiler->use_tcs_8_patch) 2613 key.tcs.input_vertices = info->tess.tcs_vertices_out; 2614 2615 key_size = sizeof(key.tcs); 2616 break; 2617 } 2618 2619 case MESA_SHADER_TESS_EVAL: 2620 /* User clip planes */ 2621 if (info->clip_distance_array_size == 0) 2622 ish->nos |= (1ull << IRIS_NOS_RASTERIZER); 2623 2624 key.tes = (struct iris_tes_prog_key) { 2625 KEY_ID(vue.base), 2626 // XXX: not ideal, need TCS output/TES input unification 2627 .inputs_read = info->inputs_read, 2628 .patch_inputs_read = info->patch_inputs_read, 2629 }; 2630 2631 key_size = sizeof(key.tes); 2632 break; 2633 2634 case MESA_SHADER_GEOMETRY: 2635 /* User clip planes */ 2636 if (info->clip_distance_array_size == 0) 2637 ish->nos |= (1ull << IRIS_NOS_RASTERIZER); 2638 2639 key.gs = (struct iris_gs_prog_key) { KEY_ID(vue.base) }; 2640 key_size = sizeof(key.gs); 2641 break; 2642 2643 case MESA_SHADER_FRAGMENT: 2644 ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) | 2645 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) | 2646 (1ull << IRIS_NOS_RASTERIZER) | 2647 (1ull << IRIS_NOS_BLEND); 2648 2649 /* The program key needs the VUE map if there are > 16 inputs */ 2650 if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) { 2651 ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP); 2652 } 2653 2654 const uint64_t color_outputs = info->outputs_written & 2655 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | 2656 BITFIELD64_BIT(FRAG_RESULT_STENCIL) | 2657 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)); 2658 2659 bool can_rearrange_varyings = 2660 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16; 2661 2662 const struct intel_device_info *devinfo = &screen->devinfo; 2663 2664 key.fs = (struct iris_fs_prog_key) { 2665 KEY_ID(base), 2666 .nr_color_regions = util_bitcount(color_outputs), 2667 .coherent_fb_fetch = devinfo->ver >= 9, 2668 .input_slots_valid = 2669 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS, 2670 }; 2671 2672 key_size = sizeof(key.fs); 2673 break; 2674 2675 default: 2676 unreachable("Invalid shader stage."); 2677 } 2678 2679 if (screen->precompile) { 2680 struct u_upload_mgr *uploader = ice->shaders.uploader_unsync; 2681 2682 struct iris_compiled_shader *shader = 2683 iris_create_shader_variant(screen, NULL, 2684 (enum iris_program_cache_id) info->stage, 2685 key_size, &key); 2686 2687 /* Append our new variant to the shader's variant list. */ 2688 list_addtail(&shader->link, &ish->variants); 2689 2690 if (!iris_disk_cache_retrieve(screen, uploader, ish, shader, 2691 &key, key_size)) { 2692 assert(!util_queue_fence_is_signalled(&shader->ready)); 2693 2694 struct iris_threaded_compile_job *job = calloc(1, sizeof(*job)); 2695 2696 job->screen = screen; 2697 job->uploader = uploader; 2698 job->ish = ish; 2699 job->shader = shader; 2700 2701 iris_schedule_compile(screen, &ish->ready, &ice->dbg, job, 2702 iris_compile_shader); 2703 } 2704 } 2705 2706 return ish; 2707} 2708 2709/** 2710 * Called when the refcount on the iris_uncompiled_shader reaches 0. 2711 * 2712 * Frees the iris_uncompiled_shader. 2713 * 2714 * \sa iris_delete_shader_state 2715 */ 2716void 2717iris_destroy_shader_state(struct pipe_context *ctx, void *state) 2718{ 2719 struct iris_uncompiled_shader *ish = state; 2720 2721 /* No need to take ish->lock; we hold the last reference to ish */ 2722 list_for_each_entry_safe(struct iris_compiled_shader, shader, 2723 &ish->variants, link) { 2724 list_del(&shader->link); 2725 2726 iris_shader_variant_reference(&shader, NULL); 2727 } 2728 2729 simple_mtx_destroy(&ish->lock); 2730 util_queue_fence_destroy(&ish->ready); 2731 2732 ralloc_free(ish->nir); 2733 free(ish); 2734} 2735 2736/** 2737 * The pipe->delete_[stage]_state() driver hooks. 2738 * 2739 * \sa iris_destroy_shader_state 2740 */ 2741static void 2742iris_delete_shader_state(struct pipe_context *ctx, void *state) 2743{ 2744 struct iris_uncompiled_shader *ish = state; 2745 struct iris_context *ice = (void *) ctx; 2746 2747 const gl_shader_stage stage = ish->nir->info.stage; 2748 2749 if (ice->shaders.uncompiled[stage] == ish) { 2750 ice->shaders.uncompiled[stage] = NULL; 2751 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage; 2752 } 2753 2754 if (pipe_reference(&ish->ref, NULL)) 2755 iris_destroy_shader_state(ctx, state); 2756} 2757 2758/** 2759 * The pipe->bind_[stage]_state() driver hook. 2760 * 2761 * Binds an uncompiled shader as the current one for a particular stage. 2762 * Updates dirty tracking to account for the shader's NOS. 2763 */ 2764static void 2765bind_shader_state(struct iris_context *ice, 2766 struct iris_uncompiled_shader *ish, 2767 gl_shader_stage stage) 2768{ 2769 uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage; 2770 const uint64_t nos = ish ? ish->nos : 0; 2771 2772 const struct shader_info *old_info = iris_get_shader_info(ice, stage); 2773 const struct shader_info *new_info = ish ? &ish->nir->info : NULL; 2774 2775 if ((old_info ? BITSET_LAST_BIT(old_info->textures_used) : 0) != 2776 (new_info ? BITSET_LAST_BIT(new_info->textures_used) : 0)) { 2777 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage; 2778 } 2779 2780 ice->shaders.uncompiled[stage] = ish; 2781 ice->state.stage_dirty |= stage_dirty_bit; 2782 2783 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change 2784 * (or that they no longer need to do so). 2785 */ 2786 for (int i = 0; i < IRIS_NOS_COUNT; i++) { 2787 if (nos & (1 << i)) 2788 ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit; 2789 else 2790 ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit; 2791 } 2792} 2793 2794static void 2795iris_bind_vs_state(struct pipe_context *ctx, void *state) 2796{ 2797 struct iris_context *ice = (struct iris_context *)ctx; 2798 struct iris_uncompiled_shader *ish = state; 2799 2800 if (ish) { 2801 const struct shader_info *info = &ish->nir->info; 2802 if (ice->state.window_space_position != info->vs.window_space_position) { 2803 ice->state.window_space_position = info->vs.window_space_position; 2804 2805 ice->state.dirty |= IRIS_DIRTY_CLIP | 2806 IRIS_DIRTY_RASTER | 2807 IRIS_DIRTY_CC_VIEWPORT; 2808 } 2809 2810 const bool uses_draw_params = 2811 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) || 2812 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE); 2813 const bool uses_derived_draw_params = 2814 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) || 2815 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW); 2816 const bool needs_sgvs_element = uses_draw_params || 2817 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) || 2818 BITSET_TEST(info->system_values_read, 2819 SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); 2820 2821 if (ice->state.vs_uses_draw_params != uses_draw_params || 2822 ice->state.vs_uses_derived_draw_params != uses_derived_draw_params || 2823 ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag) { 2824 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS | 2825 IRIS_DIRTY_VERTEX_ELEMENTS; 2826 } 2827 2828 ice->state.vs_uses_draw_params = uses_draw_params; 2829 ice->state.vs_uses_derived_draw_params = uses_derived_draw_params; 2830 ice->state.vs_needs_sgvs_element = needs_sgvs_element; 2831 ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag; 2832 } 2833 2834 bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX); 2835} 2836 2837static void 2838iris_bind_tcs_state(struct pipe_context *ctx, void *state) 2839{ 2840 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL); 2841} 2842 2843static void 2844iris_bind_tes_state(struct pipe_context *ctx, void *state) 2845{ 2846 struct iris_context *ice = (struct iris_context *)ctx; 2847 2848 /* Enabling/disabling optional stages requires a URB reconfiguration. */ 2849 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) 2850 ice->state.dirty |= IRIS_DIRTY_URB; 2851 2852 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL); 2853} 2854 2855static void 2856iris_bind_gs_state(struct pipe_context *ctx, void *state) 2857{ 2858 struct iris_context *ice = (struct iris_context *)ctx; 2859 2860 /* Enabling/disabling optional stages requires a URB reconfiguration. */ 2861 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) 2862 ice->state.dirty |= IRIS_DIRTY_URB; 2863 2864 bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY); 2865} 2866 2867static void 2868iris_bind_fs_state(struct pipe_context *ctx, void *state) 2869{ 2870 struct iris_context *ice = (struct iris_context *) ctx; 2871 struct iris_screen *screen = (struct iris_screen *) ctx->screen; 2872 const struct intel_device_info *devinfo = &screen->devinfo; 2873 struct iris_uncompiled_shader *old_ish = 2874 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; 2875 struct iris_uncompiled_shader *new_ish = state; 2876 2877 const unsigned color_bits = 2878 BITFIELD64_BIT(FRAG_RESULT_COLOR) | 2879 BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS); 2880 2881 /* Fragment shader outputs influence HasWriteableRT */ 2882 if (!old_ish || !new_ish || 2883 (old_ish->nir->info.outputs_written & color_bits) != 2884 (new_ish->nir->info.outputs_written & color_bits)) 2885 ice->state.dirty |= IRIS_DIRTY_PS_BLEND; 2886 2887 if (devinfo->ver == 8) 2888 ice->state.dirty |= IRIS_DIRTY_PMA_FIX; 2889 2890 bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT); 2891} 2892 2893static void 2894iris_bind_cs_state(struct pipe_context *ctx, void *state) 2895{ 2896 bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE); 2897} 2898 2899static char * 2900iris_finalize_nir(struct pipe_screen *_screen, void *nirptr) 2901{ 2902 struct iris_screen *screen = (struct iris_screen *)_screen; 2903 struct nir_shader *nir = (struct nir_shader *) nirptr; 2904 const struct intel_device_info *devinfo = &screen->devinfo; 2905 2906 NIR_PASS_V(nir, iris_fix_edge_flags); 2907 2908 brw_preprocess_nir(screen->compiler, nir, NULL); 2909 2910 NIR_PASS_V(nir, brw_nir_lower_storage_image, devinfo); 2911 NIR_PASS_V(nir, iris_lower_storage_image_derefs); 2912 2913 nir_sweep(nir); 2914 2915 return NULL; 2916} 2917 2918static void 2919iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen, 2920 unsigned max_threads) 2921{ 2922 struct iris_screen *screen = (struct iris_screen *) pscreen; 2923 util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads); 2924} 2925 2926static bool 2927iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen, 2928 void *v_shader, 2929 enum pipe_shader_type p_stage) 2930{ 2931 struct iris_screen *screen = (struct iris_screen *) pscreen; 2932 2933 /* Threaded compilation is only used for the precompile. If precompile is 2934 * disabled, threaded compilation is "done." 2935 */ 2936 if (!screen->precompile) 2937 return true; 2938 2939 struct iris_uncompiled_shader *ish = v_shader; 2940 2941 /* When precompile is enabled, the first entry is the precompile variant. 2942 * Check the ready fence of the precompile variant. 2943 */ 2944 struct iris_compiled_shader *first = 2945 list_first_entry(&ish->variants, struct iris_compiled_shader, link); 2946 2947 return util_queue_fence_is_signalled(&first->ready); 2948} 2949 2950void 2951iris_init_screen_program_functions(struct pipe_screen *pscreen) 2952{ 2953 pscreen->is_parallel_shader_compilation_finished = 2954 iris_is_parallel_shader_compilation_finished; 2955 pscreen->set_max_shader_compiler_threads = 2956 iris_set_max_shader_compiler_threads; 2957 pscreen->finalize_nir = iris_finalize_nir; 2958} 2959 2960void 2961iris_init_program_functions(struct pipe_context *ctx) 2962{ 2963 ctx->create_vs_state = iris_create_shader_state; 2964 ctx->create_tcs_state = iris_create_shader_state; 2965 ctx->create_tes_state = iris_create_shader_state; 2966 ctx->create_gs_state = iris_create_shader_state; 2967 ctx->create_fs_state = iris_create_shader_state; 2968 ctx->create_compute_state = iris_create_compute_state; 2969 2970 ctx->delete_vs_state = iris_delete_shader_state; 2971 ctx->delete_tcs_state = iris_delete_shader_state; 2972 ctx->delete_tes_state = iris_delete_shader_state; 2973 ctx->delete_gs_state = iris_delete_shader_state; 2974 ctx->delete_fs_state = iris_delete_shader_state; 2975 ctx->delete_compute_state = iris_delete_shader_state; 2976 2977 ctx->bind_vs_state = iris_bind_vs_state; 2978 ctx->bind_tcs_state = iris_bind_tcs_state; 2979 ctx->bind_tes_state = iris_bind_tes_state; 2980 ctx->bind_gs_state = iris_bind_gs_state; 2981 ctx->bind_fs_state = iris_bind_fs_state; 2982 ctx->bind_compute_state = iris_bind_cs_state; 2983} 2984