1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keithw@vmware.com> 30 * Brian Paul 31 */ 32 33 34#include "main/errors.h" 35 36#include "main/hash.h" 37#include "main/mtypes.h" 38#include "program/prog_parameter.h" 39#include "program/prog_print.h" 40#include "program/prog_to_nir.h" 41#include "program/programopt.h" 42 43#include "compiler/glsl/gl_nir.h" 44#include "compiler/nir/nir.h" 45#include "compiler/nir/nir_serialize.h" 46#include "draw/draw_context.h" 47 48#include "pipe/p_context.h" 49#include "pipe/p_defines.h" 50#include "pipe/p_shader_tokens.h" 51#include "draw/draw_context.h" 52#include "tgsi/tgsi_dump.h" 53#include "tgsi/tgsi_emulate.h" 54#include "tgsi/tgsi_parse.h" 55#include "tgsi/tgsi_ureg.h" 56#include "nir/nir_to_tgsi.h" 57 58#include "util/u_memory.h" 59 60#include "st_debug.h" 61#include "st_cb_bitmap.h" 62#include "st_cb_drawpixels.h" 63#include "st_context.h" 64#include "st_tgsi_lower_depth_clamp.h" 65#include "st_tgsi_lower_yuv.h" 66#include "st_program.h" 67#include "st_atifs_to_nir.h" 68#include "st_nir.h" 69#include "st_shader_cache.h" 70#include "st_util.h" 71#include "cso_cache/cso_context.h" 72 73 74static void 75destroy_program_variants(struct st_context *st, struct gl_program *target); 76 77static void 78set_affected_state_flags(uint64_t *states, 79 struct gl_program *prog, 80 uint64_t new_constants, 81 uint64_t new_sampler_views, 82 uint64_t new_samplers, 83 uint64_t new_images, 84 uint64_t new_ubos, 85 uint64_t new_ssbos, 86 uint64_t new_atomics) 87{ 88 if (prog->Parameters->NumParameters) 89 *states |= new_constants; 90 91 if (prog->info.num_textures) 92 *states |= new_sampler_views | new_samplers; 93 94 if (prog->info.num_images) 95 *states |= new_images; 96 97 if (prog->info.num_ubos) 98 *states |= new_ubos; 99 100 if (prog->info.num_ssbos) 101 *states |= new_ssbos; 102 103 if (prog->info.num_abos) 104 *states |= new_atomics; 105} 106 107/** 108 * This determines which states will be updated when the shader is bound. 109 */ 110void 111st_set_prog_affected_state_flags(struct gl_program *prog) 112{ 113 uint64_t *states; 114 115 switch (prog->info.stage) { 116 case MESA_SHADER_VERTEX: 117 states = &((struct st_program*)prog)->affected_states; 118 119 *states = ST_NEW_VS_STATE | 120 ST_NEW_RASTERIZER | 121 ST_NEW_VERTEX_ARRAYS; 122 123 set_affected_state_flags(states, prog, 124 ST_NEW_VS_CONSTANTS, 125 ST_NEW_VS_SAMPLER_VIEWS, 126 ST_NEW_VS_SAMPLERS, 127 ST_NEW_VS_IMAGES, 128 ST_NEW_VS_UBOS, 129 ST_NEW_VS_SSBOS, 130 ST_NEW_VS_ATOMICS); 131 break; 132 133 case MESA_SHADER_TESS_CTRL: 134 states = &(st_program(prog))->affected_states; 135 136 *states = ST_NEW_TCS_STATE; 137 138 set_affected_state_flags(states, prog, 139 ST_NEW_TCS_CONSTANTS, 140 ST_NEW_TCS_SAMPLER_VIEWS, 141 ST_NEW_TCS_SAMPLERS, 142 ST_NEW_TCS_IMAGES, 143 ST_NEW_TCS_UBOS, 144 ST_NEW_TCS_SSBOS, 145 ST_NEW_TCS_ATOMICS); 146 break; 147 148 case MESA_SHADER_TESS_EVAL: 149 states = &(st_program(prog))->affected_states; 150 151 *states = ST_NEW_TES_STATE | 152 ST_NEW_RASTERIZER; 153 154 set_affected_state_flags(states, prog, 155 ST_NEW_TES_CONSTANTS, 156 ST_NEW_TES_SAMPLER_VIEWS, 157 ST_NEW_TES_SAMPLERS, 158 ST_NEW_TES_IMAGES, 159 ST_NEW_TES_UBOS, 160 ST_NEW_TES_SSBOS, 161 ST_NEW_TES_ATOMICS); 162 break; 163 164 case MESA_SHADER_GEOMETRY: 165 states = &(st_program(prog))->affected_states; 166 167 *states = ST_NEW_GS_STATE | 168 ST_NEW_RASTERIZER; 169 170 set_affected_state_flags(states, prog, 171 ST_NEW_GS_CONSTANTS, 172 ST_NEW_GS_SAMPLER_VIEWS, 173 ST_NEW_GS_SAMPLERS, 174 ST_NEW_GS_IMAGES, 175 ST_NEW_GS_UBOS, 176 ST_NEW_GS_SSBOS, 177 ST_NEW_GS_ATOMICS); 178 break; 179 180 case MESA_SHADER_FRAGMENT: 181 states = &((struct st_program*)prog)->affected_states; 182 183 /* gl_FragCoord and glDrawPixels always use constants. */ 184 *states = ST_NEW_FS_STATE | 185 ST_NEW_SAMPLE_SHADING | 186 ST_NEW_FS_CONSTANTS; 187 188 set_affected_state_flags(states, prog, 189 ST_NEW_FS_CONSTANTS, 190 ST_NEW_FS_SAMPLER_VIEWS, 191 ST_NEW_FS_SAMPLERS, 192 ST_NEW_FS_IMAGES, 193 ST_NEW_FS_UBOS, 194 ST_NEW_FS_SSBOS, 195 ST_NEW_FS_ATOMICS); 196 break; 197 198 case MESA_SHADER_COMPUTE: 199 states = &((struct st_program*)prog)->affected_states; 200 201 *states = ST_NEW_CS_STATE; 202 203 set_affected_state_flags(states, prog, 204 ST_NEW_CS_CONSTANTS, 205 ST_NEW_CS_SAMPLER_VIEWS, 206 ST_NEW_CS_SAMPLERS, 207 ST_NEW_CS_IMAGES, 208 ST_NEW_CS_UBOS, 209 ST_NEW_CS_SSBOS, 210 ST_NEW_CS_ATOMICS); 211 break; 212 213 default: 214 unreachable("unhandled shader stage"); 215 } 216} 217 218 219/** 220 * Delete a shader variant. Note the caller must unlink the variant from 221 * the linked list. 222 */ 223static void 224delete_variant(struct st_context *st, struct st_variant *v, GLenum target) 225{ 226 if (v->driver_shader) { 227 if (target == GL_VERTEX_PROGRAM_ARB && 228 ((struct st_common_variant*)v)->key.is_draw_shader) { 229 /* Draw shader. */ 230 draw_delete_vertex_shader(st->draw, v->driver_shader); 231 } else if (st->has_shareable_shaders || v->st == st) { 232 /* The shader's context matches the calling context, or we 233 * don't care. 234 */ 235 switch (target) { 236 case GL_VERTEX_PROGRAM_ARB: 237 st->pipe->delete_vs_state(st->pipe, v->driver_shader); 238 break; 239 case GL_TESS_CONTROL_PROGRAM_NV: 240 st->pipe->delete_tcs_state(st->pipe, v->driver_shader); 241 break; 242 case GL_TESS_EVALUATION_PROGRAM_NV: 243 st->pipe->delete_tes_state(st->pipe, v->driver_shader); 244 break; 245 case GL_GEOMETRY_PROGRAM_NV: 246 st->pipe->delete_gs_state(st->pipe, v->driver_shader); 247 break; 248 case GL_FRAGMENT_PROGRAM_ARB: 249 st->pipe->delete_fs_state(st->pipe, v->driver_shader); 250 break; 251 case GL_COMPUTE_PROGRAM_NV: 252 st->pipe->delete_compute_state(st->pipe, v->driver_shader); 253 break; 254 default: 255 unreachable("bad shader type in delete_basic_variant"); 256 } 257 } else { 258 /* We can't delete a shader with a context different from the one 259 * that created it. Add it to the creating context's zombie list. 260 */ 261 enum pipe_shader_type type = 262 pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target)); 263 264 st_save_zombie_shader(v->st, type, v->driver_shader); 265 } 266 } 267 268 free(v); 269} 270 271static void 272st_unbind_program(struct st_context *st, struct st_program *p) 273{ 274 /* Unbind the shader in cso_context and re-bind in st/mesa. */ 275 switch (p->Base.info.stage) { 276 case MESA_SHADER_VERTEX: 277 cso_set_vertex_shader_handle(st->cso_context, NULL); 278 st->dirty |= ST_NEW_VS_STATE; 279 break; 280 case MESA_SHADER_TESS_CTRL: 281 cso_set_tessctrl_shader_handle(st->cso_context, NULL); 282 st->dirty |= ST_NEW_TCS_STATE; 283 break; 284 case MESA_SHADER_TESS_EVAL: 285 cso_set_tesseval_shader_handle(st->cso_context, NULL); 286 st->dirty |= ST_NEW_TES_STATE; 287 break; 288 case MESA_SHADER_GEOMETRY: 289 cso_set_geometry_shader_handle(st->cso_context, NULL); 290 st->dirty |= ST_NEW_GS_STATE; 291 break; 292 case MESA_SHADER_FRAGMENT: 293 cso_set_fragment_shader_handle(st->cso_context, NULL); 294 st->dirty |= ST_NEW_FS_STATE; 295 break; 296 case MESA_SHADER_COMPUTE: 297 cso_set_compute_shader_handle(st->cso_context, NULL); 298 st->dirty |= ST_NEW_CS_STATE; 299 break; 300 default: 301 unreachable("invalid shader type"); 302 } 303} 304 305/** 306 * Free all basic program variants. 307 */ 308void 309st_release_variants(struct st_context *st, struct st_program *p) 310{ 311 struct st_variant *v; 312 313 /* If we are releasing shaders, re-bind them, because we don't 314 * know which shaders are bound in the driver. 315 */ 316 if (p->variants) 317 st_unbind_program(st, p); 318 319 for (v = p->variants; v; ) { 320 struct st_variant *next = v->next; 321 delete_variant(st, v, p->Base.Target); 322 v = next; 323 } 324 325 p->variants = NULL; 326 327 if (p->state.tokens) { 328 ureg_free_tokens(p->state.tokens); 329 p->state.tokens = NULL; 330 } 331 332 /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on 333 * it has resulted in the driver taking ownership of the NIR. Those 334 * callers should be NULLing out the nir field in any pipe_shader_state 335 * that might have this called in order to indicate that. 336 * 337 * GLSL IR and ARB programs will have set gl_program->nir to the same 338 * shader as ir->ir.nir, so it will be freed by _mesa_delete_program(). 339 */ 340} 341 342/** 343 * Free all basic program variants and unref program. 344 */ 345void 346st_release_program(struct st_context *st, struct st_program **p) 347{ 348 if (!*p) 349 return; 350 351 destroy_program_variants(st, &((*p)->Base)); 352 st_reference_prog(st, p, NULL); 353} 354 355void 356st_finalize_nir_before_variants(struct nir_shader *nir) 357{ 358 NIR_PASS_V(nir, nir_split_var_copies); 359 NIR_PASS_V(nir, nir_lower_var_copies); 360 if (nir->options->lower_all_io_to_temps || 361 nir->options->lower_all_io_to_elements || 362 nir->info.stage == MESA_SHADER_VERTEX || 363 nir->info.stage == MESA_SHADER_GEOMETRY) { 364 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 365 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { 366 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true); 367 } 368 369 /* st_nir_assign_vs_in_locations requires correct shader info. */ 370 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 371 372 st_nir_assign_vs_in_locations(nir); 373} 374 375static void 376st_prog_to_nir_postprocess(struct st_context *st, nir_shader *nir, 377 struct gl_program *prog) 378{ 379 struct pipe_screen *screen = st->screen; 380 381 NIR_PASS_V(nir, nir_lower_regs_to_ssa); 382 nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa"); 383 384 NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen); 385 NIR_PASS_V(nir, nir_lower_system_values); 386 NIR_PASS_V(nir, nir_lower_compute_system_values, NULL); 387 388 /* Optimise NIR */ 389 NIR_PASS_V(nir, nir_opt_constant_folding); 390 st_nir_opts(nir); 391 st_finalize_nir_before_variants(nir); 392 393 if (st->allow_st_finalize_nir_twice) { 394 char *msg = st_finalize_nir(st, prog, NULL, nir, true, true); 395 free(msg); 396 } 397 398 nir_validate_shader(nir, "after st/glsl finalize_nir"); 399} 400 401/** 402 * Translate ARB (asm) program to NIR 403 */ 404static nir_shader * 405st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog, 406 gl_shader_stage stage) 407{ 408 const struct nir_shader_compiler_options *options = 409 st_get_nir_compiler_options(st, prog->info.stage); 410 411 /* Translate to NIR */ 412 nir_shader *nir = prog_to_nir(prog, options); 413 414 st_prog_to_nir_postprocess(st, nir, prog); 415 416 return nir; 417} 418 419/** 420 * Prepare st_vertex_program info. 421 * 422 * attrib_to_index is an optional mapping from a vertex attrib to a shader 423 * input index. 424 */ 425void 426st_prepare_vertex_program(struct st_program *stp, uint8_t *out_attrib_to_index) 427{ 428 struct st_vertex_program *stvp = (struct st_vertex_program *)stp; 429 uint8_t attrib_to_index[VERT_ATTRIB_MAX] = {0}; 430 431 stvp->num_inputs = 0; 432 stvp->vert_attrib_mask = 0; 433 memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output)); 434 435 /* Determine number of inputs, the mappings between VERT_ATTRIB_x 436 * and TGSI generic input indexes, plus input attrib semantic info. 437 */ 438 for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) { 439 if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) { 440 attrib_to_index[attr] = stvp->num_inputs; 441 stvp->vert_attrib_mask |= BITFIELD_BIT(attr); 442 stvp->num_inputs++; 443 } 444 } 445 446 /* pre-setup potentially unused edgeflag input */ 447 attrib_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs; 448 449 /* Compute mapping of vertex program outputs to slots. */ 450 unsigned num_outputs = 0; 451 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) { 452 if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) 453 stvp->result_to_output[attr] = num_outputs++; 454 } 455 /* pre-setup potentially unused edgeflag output */ 456 stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs; 457 458 if (out_attrib_to_index) 459 memcpy(out_attrib_to_index, attrib_to_index, sizeof(attrib_to_index)); 460} 461 462void 463st_translate_stream_output_info(struct gl_program *prog) 464{ 465 struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback; 466 if (!info) 467 return; 468 469 /* Determine the (default) output register mapping for each output. */ 470 unsigned num_outputs = 0; 471 ubyte output_mapping[VARYING_SLOT_TESS_MAX]; 472 memset(output_mapping, 0, sizeof(output_mapping)); 473 474 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) { 475 if (prog->info.outputs_written & BITFIELD64_BIT(attr)) 476 output_mapping[attr] = num_outputs++; 477 } 478 479 /* Translate stream output info. */ 480 struct pipe_stream_output_info *so_info = 481 &((struct st_program*)prog)->state.stream_output; 482 483 for (unsigned i = 0; i < info->NumOutputs; i++) { 484 so_info->output[i].register_index = 485 output_mapping[info->Outputs[i].OutputRegister]; 486 so_info->output[i].start_component = info->Outputs[i].ComponentOffset; 487 so_info->output[i].num_components = info->Outputs[i].NumComponents; 488 so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer; 489 so_info->output[i].dst_offset = info->Outputs[i].DstOffset; 490 so_info->output[i].stream = info->Outputs[i].StreamId; 491 } 492 493 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 494 so_info->stride[i] = info->Buffers[i].Stride; 495 } 496 so_info->num_outputs = info->NumOutputs; 497} 498 499/** 500 * Creates a driver shader from a NIR shader. Takes ownership of the 501 * passed nir_shader. 502 */ 503struct pipe_shader_state * 504st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state) 505{ 506 struct pipe_context *pipe = st->pipe; 507 struct pipe_screen *screen = st->screen; 508 509 assert(state->type == PIPE_SHADER_IR_NIR); 510 nir_shader *nir = state->ir.nir; 511 gl_shader_stage stage = nir->info.stage; 512 enum pipe_shader_type sh = pipe_shader_type_from_mesa(stage); 513 514 if (ST_DEBUG & DEBUG_PRINT_IR) { 515 fprintf(stderr, "NIR before handing off to driver:\n"); 516 nir_print_shader(nir, stderr); 517 } 518 519 if (PIPE_SHADER_IR_NIR != 520 screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_PREFERRED_IR)) { 521 /* u_screen.c defaults to images as deref enabled for some reason (which 522 * is what radeonsi wants), but nir-to-tgsi requires lowered images. 523 */ 524 if (screen->get_param(screen, PIPE_CAP_NIR_IMAGES_AS_DEREF)) 525 NIR_PASS_V(nir, gl_nir_lower_images, false); 526 527 state->type = PIPE_SHADER_IR_TGSI; 528 state->tokens = nir_to_tgsi(nir, screen); 529 530 if (ST_DEBUG & DEBUG_PRINT_IR) { 531 fprintf(stderr, "TGSI for driver after nir-to-tgsi:\n"); 532 tgsi_dump(state->tokens, 0); 533 fprintf(stderr, "\n"); 534 } 535 } 536 537 struct pipe_shader_state *shader; 538 switch (stage) { 539 case MESA_SHADER_VERTEX: 540 shader = pipe->create_vs_state(pipe, state); 541 break; 542 case MESA_SHADER_TESS_CTRL: 543 shader = pipe->create_tcs_state(pipe, state); 544 break; 545 case MESA_SHADER_TESS_EVAL: 546 shader = pipe->create_tes_state(pipe, state); 547 break; 548 case MESA_SHADER_GEOMETRY: 549 shader = pipe->create_gs_state(pipe, state); 550 break; 551 case MESA_SHADER_FRAGMENT: 552 shader = pipe->create_fs_state(pipe, state); 553 break; 554 case MESA_SHADER_COMPUTE: { 555 struct pipe_compute_state cs = {0}; 556 cs.ir_type = state->type; 557 cs.req_local_mem = nir->info.shared_size; 558 559 if (state->type == PIPE_SHADER_IR_NIR) 560 cs.prog = state->ir.nir; 561 else 562 cs.prog = state->tokens; 563 564 shader = pipe->create_compute_state(pipe, &cs); 565 break; 566 } 567 default: 568 unreachable("unsupported shader stage"); 569 return NULL; 570 } 571 572 if (state->type == PIPE_SHADER_IR_TGSI) 573 tgsi_free_tokens(state->tokens); 574 575 return shader; 576} 577 578/** 579 * Translate a vertex program. 580 */ 581bool 582st_translate_vertex_program(struct st_context *st, 583 struct st_program *stp) 584{ 585 struct ureg_program *ureg; 586 enum pipe_error error; 587 unsigned num_outputs = 0; 588 unsigned attr; 589 ubyte output_semantic_name[VARYING_SLOT_MAX] = {0}; 590 ubyte output_semantic_index[VARYING_SLOT_MAX] = {0}; 591 592 if (stp->Base.arb.IsPositionInvariant) 593 _mesa_insert_mvp_code(st->ctx, &stp->Base); 594 595 /* ARB_vp: */ 596 if (!stp->glsl_to_tgsi) { 597 _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT); 598 599 /* This determines which states will be updated when the assembly 600 * shader is bound. 601 */ 602 stp->affected_states = ST_NEW_VS_STATE | 603 ST_NEW_RASTERIZER | 604 ST_NEW_VERTEX_ARRAYS; 605 606 if (stp->Base.Parameters->NumParameters) 607 stp->affected_states |= ST_NEW_VS_CONSTANTS; 608 609 if (stp->Base.nir) 610 ralloc_free(stp->Base.nir); 611 612 if (stp->serialized_nir) { 613 free(stp->serialized_nir); 614 stp->serialized_nir = NULL; 615 } 616 617 stp->state.type = PIPE_SHADER_IR_NIR; 618 stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base, 619 MESA_SHADER_VERTEX); 620 stp->Base.info = stp->Base.nir->info; 621 622 st_prepare_vertex_program(stp, NULL); 623 return true; 624 } 625 626 uint8_t input_to_index[VERT_ATTRIB_MAX]; 627 st_prepare_vertex_program(stp, input_to_index); 628 629 /* Get semantic names and indices. */ 630 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { 631 if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) { 632 unsigned slot = num_outputs++; 633 unsigned semantic_name, semantic_index; 634 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic, 635 &semantic_name, &semantic_index); 636 output_semantic_name[slot] = semantic_name; 637 output_semantic_index[slot] = semantic_index; 638 } 639 } 640 /* pre-setup potentially unused edgeflag output */ 641 output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG; 642 output_semantic_index[num_outputs] = 0; 643 644 ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->screen); 645 if (ureg == NULL) 646 return false; 647 648 ureg_setup_shader_info(ureg, &stp->Base.info); 649 650 if (ST_DEBUG & DEBUG_MESA) { 651 _mesa_print_program(&stp->Base); 652 _mesa_print_program_parameters(st->ctx, &stp->Base); 653 debug_printf("\n"); 654 } 655 656 struct st_vertex_program *stvp = (struct st_vertex_program *)stp; 657 658 error = st_translate_program(st->ctx, 659 PIPE_SHADER_VERTEX, 660 ureg, 661 stp->glsl_to_tgsi, 662 &stp->Base, 663 /* inputs */ 664 stvp->num_inputs, 665 input_to_index, 666 NULL, /* inputSlotToAttr */ 667 NULL, /* input semantic name */ 668 NULL, /* input semantic index */ 669 NULL, /* interp mode */ 670 /* outputs */ 671 num_outputs, 672 stvp->result_to_output, 673 output_semantic_name, 674 output_semantic_index); 675 676 st_translate_stream_output_info(&stp->Base); 677 678 free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi); 679 680 if (error) { 681 debug_printf("%s: failed to translate GLSL IR program:\n", __func__); 682 _mesa_print_program(&stp->Base); 683 debug_assert(0); 684 return false; 685 } 686 687 stp->state.tokens = ureg_get_tokens(ureg, NULL); 688 ureg_destroy(ureg); 689 690 stp->glsl_to_tgsi = NULL; 691 st_store_ir_in_disk_cache(st, &stp->Base, false); 692 693 return stp->state.tokens != NULL; 694} 695 696static struct nir_shader * 697get_nir_shader(struct st_context *st, struct st_program *stp) 698{ 699 if (stp->Base.nir) { 700 nir_shader *nir = stp->Base.nir; 701 702 /* The first shader variant takes ownership of NIR, so that there is 703 * no cloning. Additional shader variants are always generated from 704 * serialized NIR to save memory. 705 */ 706 stp->Base.nir = NULL; 707 assert(stp->serialized_nir && stp->serialized_nir_size); 708 return nir; 709 } 710 711 struct blob_reader blob_reader; 712 const struct nir_shader_compiler_options *options = 713 st_get_nir_compiler_options(st, stp->Base.info.stage); 714 715 blob_reader_init(&blob_reader, stp->serialized_nir, stp->serialized_nir_size); 716 return nir_deserialize(NULL, options, &blob_reader); 717} 718 719static void 720lower_ucp(struct st_context *st, 721 struct nir_shader *nir, 722 unsigned ucp_enables, 723 struct gl_program_parameter_list *params) 724{ 725 if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0) 726 NIR_PASS_V(nir, nir_lower_clip_disable, ucp_enables); 727 else { 728 struct pipe_screen *screen = st->screen; 729 bool can_compact = screen->get_param(screen, 730 PIPE_CAP_NIR_COMPACT_ARRAYS); 731 bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL; 732 733 gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH] = {{0}}; 734 for (int i = 0; i < MAX_CLIP_PLANES; ++i) { 735 if (use_eye) { 736 clipplane_state[i][0] = STATE_CLIPPLANE; 737 clipplane_state[i][1] = i; 738 } else { 739 clipplane_state[i][0] = STATE_CLIP_INTERNAL; 740 clipplane_state[i][1] = i; 741 } 742 _mesa_add_state_reference(params, clipplane_state[i]); 743 } 744 745 if (nir->info.stage == MESA_SHADER_VERTEX) { 746 NIR_PASS_V(nir, nir_lower_clip_vs, ucp_enables, 747 true, can_compact, clipplane_state); 748 } else if (nir->info.stage == MESA_SHADER_GEOMETRY) { 749 NIR_PASS_V(nir, nir_lower_clip_gs, ucp_enables, 750 can_compact, clipplane_state); 751 } 752 753 NIR_PASS_V(nir, nir_lower_io_to_temporaries, 754 nir_shader_get_entrypoint(nir), true, false); 755 NIR_PASS_V(nir, nir_lower_global_vars_to_local); 756 } 757} 758 759static const gl_state_index16 depth_range_state[STATE_LENGTH] = 760 { STATE_DEPTH_RANGE }; 761 762static struct st_common_variant * 763st_create_common_variant(struct st_context *st, 764 struct st_program *stp, 765 const struct st_common_variant_key *key) 766{ 767 struct st_common_variant *v = CALLOC_STRUCT(st_common_variant); 768 struct pipe_context *pipe = st->pipe; 769 struct pipe_shader_state state = {0}; 770 771 static const gl_state_index16 point_size_state[STATE_LENGTH] = 772 { STATE_POINT_SIZE_CLAMPED, 0 }; 773 struct gl_program_parameter_list *params = stp->Base.Parameters; 774 775 v->key = *key; 776 777 state.stream_output = stp->state.stream_output; 778 779 if (stp->state.type == PIPE_SHADER_IR_NIR) { 780 bool finalize = false; 781 782 state.type = PIPE_SHADER_IR_NIR; 783 state.ir.nir = get_nir_shader(st, stp); 784 const nir_shader_compiler_options *options = ((nir_shader *)state.ir.nir)->options; 785 786 if (key->clamp_color) { 787 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs); 788 finalize = true; 789 } 790 if (key->passthrough_edgeflags) { 791 NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags); 792 finalize = true; 793 } 794 795 if (key->lower_point_size) { 796 _mesa_add_state_reference(params, point_size_state); 797 NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov, 798 point_size_state); 799 800 switch (stp->Base.info.stage) { 801 case MESA_SHADER_VERTEX: 802 stp->affected_states |= ST_NEW_VS_CONSTANTS; 803 break; 804 case MESA_SHADER_TESS_EVAL: 805 stp->affected_states |= ST_NEW_TES_CONSTANTS; 806 break; 807 case MESA_SHADER_GEOMETRY: 808 stp->affected_states |= ST_NEW_GS_CONSTANTS; 809 break; 810 default: 811 unreachable("bad shader stage"); 812 } 813 814 finalize = true; 815 } 816 817 if (key->lower_ucp) { 818 assert(!options->unify_interfaces); 819 lower_ucp(st, state.ir.nir, key->lower_ucp, params); 820 finalize = true; 821 } 822 823 if (st->emulate_gl_clamp && 824 (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) { 825 nir_lower_tex_options tex_opts = {0}; 826 tex_opts.saturate_s = key->gl_clamp[0]; 827 tex_opts.saturate_t = key->gl_clamp[1]; 828 tex_opts.saturate_r = key->gl_clamp[2]; 829 NIR_PASS_V(state.ir.nir, nir_lower_tex, &tex_opts); 830 } 831 832 if (finalize || !st->allow_st_finalize_nir_twice) { 833 char *msg = st_finalize_nir(st, &stp->Base, stp->shader_program, state.ir.nir, 834 true, false); 835 free(msg); 836 837 /* Clip lowering and edgeflags may have introduced new varyings, so 838 * update the inputs_read/outputs_written. However, with 839 * unify_interfaces set (aka iris) the non-SSO varyings layout is 840 * decided at link time with outputs_written updated so the two line 841 * up. A driver with this flag set may not use any of the lowering 842 * passes that would change the varyings, so skip to make sure we don't 843 * break its linkage. 844 */ 845 if (!options->unify_interfaces) { 846 nir_shader_gather_info(state.ir.nir, 847 nir_shader_get_entrypoint(state.ir.nir)); 848 } 849 } 850 851 if (key->is_draw_shader) 852 v->base.driver_shader = draw_create_vertex_shader(st->draw, &state); 853 else 854 v->base.driver_shader = st_create_nir_shader(st, &state); 855 856 return v; 857 } 858 859 state.type = PIPE_SHADER_IR_TGSI; 860 state.tokens = tgsi_dup_tokens(stp->state.tokens); 861 862 /* Emulate features. */ 863 if (key->clamp_color || key->passthrough_edgeflags) { 864 const struct tgsi_token *tokens; 865 unsigned flags = 866 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) | 867 (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0); 868 869 tokens = tgsi_emulate(state.tokens, flags); 870 871 if (tokens) { 872 tgsi_free_tokens(state.tokens); 873 state.tokens = tokens; 874 } else { 875 fprintf(stderr, "mesa: cannot emulate deprecated features\n"); 876 } 877 } 878 879 if (key->lower_depth_clamp) { 880 unsigned depth_range_const = 881 _mesa_add_state_reference(params, depth_range_state); 882 883 const struct tgsi_token *tokens; 884 tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const, 885 key->clip_negative_one_to_one); 886 if (tokens != state.tokens) 887 tgsi_free_tokens(state.tokens); 888 state.tokens = tokens; 889 } 890 891 if (ST_DEBUG & DEBUG_PRINT_IR) 892 tgsi_dump(state.tokens, 0); 893 894 switch (stp->Base.info.stage) { 895 case MESA_SHADER_VERTEX: 896 if (key->is_draw_shader) 897 v->base.driver_shader = draw_create_vertex_shader(st->draw, &state); 898 else 899 v->base.driver_shader = pipe->create_vs_state(pipe, &state); 900 break; 901 case MESA_SHADER_TESS_CTRL: 902 v->base.driver_shader = pipe->create_tcs_state(pipe, &state); 903 break; 904 case MESA_SHADER_TESS_EVAL: 905 v->base.driver_shader = pipe->create_tes_state(pipe, &state); 906 break; 907 case MESA_SHADER_GEOMETRY: 908 v->base.driver_shader = pipe->create_gs_state(pipe, &state); 909 break; 910 case MESA_SHADER_COMPUTE: { 911 struct pipe_compute_state cs = {0}; 912 cs.ir_type = state.type; 913 cs.req_local_mem = stp->Base.info.shared_size; 914 915 if (state.type == PIPE_SHADER_IR_NIR) 916 cs.prog = state.ir.nir; 917 else 918 cs.prog = state.tokens; 919 920 v->base.driver_shader = pipe->create_compute_state(pipe, &cs); 921 break; 922 } 923 default: 924 assert(!"unhandled shader type"); 925 free(v); 926 return NULL; 927 } 928 929 if (state.tokens) { 930 tgsi_free_tokens(state.tokens); 931 } 932 933 return v; 934} 935 936static void 937st_add_variant(struct st_variant **list, struct st_variant *v) 938{ 939 struct st_variant *first = *list; 940 941 /* Make sure that the default variant stays the first in the list, and insert 942 * any later variants in as the second entry. 943 */ 944 if (first) { 945 v->next = first->next; 946 first->next = v; 947 } else { 948 *list = v; 949 } 950} 951 952/** 953 * Find/create a vertex program variant. 954 */ 955struct st_common_variant * 956st_get_common_variant(struct st_context *st, 957 struct st_program *stp, 958 const struct st_common_variant_key *key) 959{ 960 struct st_common_variant *v; 961 962 /* Search for existing variant */ 963 for (v = st_common_variant(stp->variants); v; 964 v = st_common_variant(v->base.next)) { 965 if (memcmp(&v->key, key, sizeof(*key)) == 0) { 966 break; 967 } 968 } 969 970 if (!v) { 971 if (stp->variants != NULL) { 972 _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM, 973 "Compiling %s shader variant (%s%s%s%s%s%s%s%s)", 974 _mesa_shader_stage_to_string(stp->Base.info.stage), 975 key->passthrough_edgeflags ? "edgeflags," : "", 976 key->clamp_color ? "clamp_color," : "", 977 key->lower_depth_clamp ? "depth_clamp," : "", 978 key->clip_negative_one_to_one ? "clip_negative_one," : "", 979 key->lower_point_size ? "point_size," : "", 980 key->lower_ucp ? "ucp," : "", 981 key->is_draw_shader ? "draw," : "", 982 key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : ""); 983 } 984 985 /* create now */ 986 v = st_create_common_variant(st, stp, key); 987 if (v) { 988 v->base.st = key->st; 989 990 if (stp->Base.info.stage == MESA_SHADER_VERTEX) { 991 struct st_vertex_program *stvp = (struct st_vertex_program *)stp; 992 993 v->vert_attrib_mask = 994 stvp->vert_attrib_mask | 995 (key->passthrough_edgeflags ? VERT_BIT_EDGEFLAG : 0); 996 } 997 998 st_add_variant(&stp->variants, &v->base); 999 } 1000 } 1001 1002 return v; 1003} 1004 1005 1006/** 1007 * Translate a Mesa fragment shader into a TGSI shader. 1008 */ 1009bool 1010st_translate_fragment_program(struct st_context *st, 1011 struct st_program *stfp) 1012{ 1013 /* Non-GLSL programs: */ 1014 if (!stfp->glsl_to_tgsi) { 1015 _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT); 1016 if (st->ctx->Const.GLSLFragCoordIsSysVal) 1017 _mesa_program_fragment_position_to_sysval(&stfp->Base); 1018 1019 /* This determines which states will be updated when the assembly 1020 * shader is bound. 1021 * 1022 * fragment.position and glDrawPixels always use constants. 1023 */ 1024 stfp->affected_states = ST_NEW_FS_STATE | 1025 ST_NEW_SAMPLE_SHADING | 1026 ST_NEW_FS_CONSTANTS; 1027 1028 if (stfp->ati_fs) { 1029 /* Just set them for ATI_fs unconditionally. */ 1030 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS | 1031 ST_NEW_FS_SAMPLERS; 1032 } else { 1033 /* ARB_fp */ 1034 if (stfp->Base.SamplersUsed) 1035 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS | 1036 ST_NEW_FS_SAMPLERS; 1037 } 1038 1039 /* Translate to NIR. ATI_fs translates at variant time. */ 1040 if (!stfp->ati_fs) { 1041 nir_shader *nir = 1042 st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT); 1043 1044 if (stfp->Base.nir) 1045 ralloc_free(stfp->Base.nir); 1046 if (stfp->serialized_nir) { 1047 free(stfp->serialized_nir); 1048 stfp->serialized_nir = NULL; 1049 } 1050 stfp->state.type = PIPE_SHADER_IR_NIR; 1051 stfp->Base.nir = nir; 1052 } 1053 1054 return true; 1055 } 1056 1057 ubyte outputMapping[2 * FRAG_RESULT_MAX]; 1058 ubyte inputMapping[VARYING_SLOT_MAX]; 1059 ubyte inputSlotToAttr[VARYING_SLOT_MAX]; 1060 ubyte interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ 1061 GLuint attr; 1062 GLbitfield64 inputsRead; 1063 struct ureg_program *ureg; 1064 1065 GLboolean write_all = GL_FALSE; 1066 1067 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; 1068 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; 1069 uint fs_num_inputs = 0; 1070 1071 ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; 1072 ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; 1073 uint fs_num_outputs = 0; 1074 1075 memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr)); 1076 1077 /* 1078 * Convert Mesa program inputs to TGSI input register semantics. 1079 */ 1080 inputsRead = stfp->Base.info.inputs_read; 1081 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { 1082 if ((inputsRead & BITFIELD64_BIT(attr)) != 0) { 1083 const GLuint slot = fs_num_inputs++; 1084 1085 inputMapping[attr] = slot; 1086 inputSlotToAttr[slot] = attr; 1087 1088 switch (attr) { 1089 case VARYING_SLOT_POS: 1090 input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; 1091 input_semantic_index[slot] = 0; 1092 interpMode[slot] = TGSI_INTERPOLATE_LINEAR; 1093 break; 1094 case VARYING_SLOT_COL0: 1095 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; 1096 input_semantic_index[slot] = 0; 1097 interpMode[slot] = stfp->glsl_to_tgsi ? 1098 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR; 1099 break; 1100 case VARYING_SLOT_COL1: 1101 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; 1102 input_semantic_index[slot] = 1; 1103 interpMode[slot] = stfp->glsl_to_tgsi ? 1104 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR; 1105 break; 1106 case VARYING_SLOT_FOGC: 1107 input_semantic_name[slot] = TGSI_SEMANTIC_FOG; 1108 input_semantic_index[slot] = 0; 1109 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; 1110 break; 1111 case VARYING_SLOT_FACE: 1112 input_semantic_name[slot] = TGSI_SEMANTIC_FACE; 1113 input_semantic_index[slot] = 0; 1114 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; 1115 break; 1116 case VARYING_SLOT_PRIMITIVE_ID: 1117 input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID; 1118 input_semantic_index[slot] = 0; 1119 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; 1120 break; 1121 case VARYING_SLOT_LAYER: 1122 input_semantic_name[slot] = TGSI_SEMANTIC_LAYER; 1123 input_semantic_index[slot] = 0; 1124 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; 1125 break; 1126 case VARYING_SLOT_VIEWPORT: 1127 input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX; 1128 input_semantic_index[slot] = 0; 1129 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; 1130 break; 1131 case VARYING_SLOT_CLIP_DIST0: 1132 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; 1133 input_semantic_index[slot] = 0; 1134 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; 1135 break; 1136 case VARYING_SLOT_CLIP_DIST1: 1137 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; 1138 input_semantic_index[slot] = 1; 1139 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; 1140 break; 1141 case VARYING_SLOT_CULL_DIST0: 1142 case VARYING_SLOT_CULL_DIST1: 1143 /* these should have been lowered by GLSL */ 1144 assert(0); 1145 break; 1146 /* In most cases, there is nothing special about these 1147 * inputs, so adopt a convention to use the generic 1148 * semantic name and the mesa VARYING_SLOT_ number as the 1149 * index. 1150 * 1151 * All that is required is that the vertex shader labels 1152 * its own outputs similarly, and that the vertex shader 1153 * generates at least every output required by the 1154 * fragment shader plus fixed-function hardware (such as 1155 * BFC). 1156 * 1157 * However, some drivers may need us to identify the PNTC and TEXi 1158 * varyings if, for example, their capability to replace them with 1159 * sprite coordinates is limited. 1160 */ 1161 case VARYING_SLOT_PNTC: 1162 if (st->needs_texcoord_semantic) { 1163 input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD; 1164 input_semantic_index[slot] = 0; 1165 interpMode[slot] = TGSI_INTERPOLATE_LINEAR; 1166 break; 1167 } 1168 FALLTHROUGH; 1169 case VARYING_SLOT_TEX0: 1170 case VARYING_SLOT_TEX1: 1171 case VARYING_SLOT_TEX2: 1172 case VARYING_SLOT_TEX3: 1173 case VARYING_SLOT_TEX4: 1174 case VARYING_SLOT_TEX5: 1175 case VARYING_SLOT_TEX6: 1176 case VARYING_SLOT_TEX7: 1177 if (st->needs_texcoord_semantic) { 1178 input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD; 1179 input_semantic_index[slot] = attr - VARYING_SLOT_TEX0; 1180 interpMode[slot] = stfp->glsl_to_tgsi ? 1181 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE; 1182 break; 1183 } 1184 FALLTHROUGH; 1185 case VARYING_SLOT_VAR0: 1186 default: 1187 /* Semantic indices should be zero-based because drivers may choose 1188 * to assign a fixed slot determined by that index. 1189 * This is useful because ARB_separate_shader_objects uses location 1190 * qualifiers for linkage, and if the semantic index corresponds to 1191 * these locations, linkage passes in the driver become unecessary. 1192 * 1193 * If needs_texcoord_semantic is true, no semantic indices will be 1194 * consumed for the TEXi varyings, and we can base the locations of 1195 * the user varyings on VAR0. Otherwise, we use TEX0 as base index. 1196 */ 1197 assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC || 1198 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7)); 1199 input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; 1200 input_semantic_index[slot] = st_get_generic_varying_index(st, attr); 1201 if (attr == VARYING_SLOT_PNTC) 1202 interpMode[slot] = TGSI_INTERPOLATE_LINEAR; 1203 else { 1204 interpMode[slot] = stfp->glsl_to_tgsi ? 1205 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE; 1206 } 1207 break; 1208 } 1209 } 1210 else { 1211 inputMapping[attr] = -1; 1212 } 1213 } 1214 1215 /* 1216 * Semantics and mapping for outputs 1217 */ 1218 GLbitfield64 outputsWritten = stfp->Base.info.outputs_written; 1219 1220 /* if z is written, emit that first */ 1221 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { 1222 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION; 1223 fs_output_semantic_index[fs_num_outputs] = 0; 1224 outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs; 1225 fs_num_outputs++; 1226 outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); 1227 } 1228 1229 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { 1230 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL; 1231 fs_output_semantic_index[fs_num_outputs] = 0; 1232 outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs; 1233 fs_num_outputs++; 1234 outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); 1235 } 1236 1237 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) { 1238 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK; 1239 fs_output_semantic_index[fs_num_outputs] = 0; 1240 outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs; 1241 fs_num_outputs++; 1242 outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK); 1243 } 1244 1245 /* handle remaining outputs (color) */ 1246 for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) { 1247 const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten : 1248 stfp->Base.SecondaryOutputsWritten; 1249 const unsigned loc = attr % FRAG_RESULT_MAX; 1250 1251 if (written & BITFIELD64_BIT(loc)) { 1252 switch (loc) { 1253 case FRAG_RESULT_DEPTH: 1254 case FRAG_RESULT_STENCIL: 1255 case FRAG_RESULT_SAMPLE_MASK: 1256 /* handled above */ 1257 assert(0); 1258 break; 1259 case FRAG_RESULT_COLOR: 1260 write_all = GL_TRUE; 1261 FALLTHROUGH; 1262 default: { 1263 int index; 1264 assert(loc == FRAG_RESULT_COLOR || 1265 (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX)); 1266 1267 index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0); 1268 1269 if (attr >= FRAG_RESULT_MAX) { 1270 /* Secondary color for dual source blending. */ 1271 assert(index == 0); 1272 index++; 1273 } 1274 1275 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; 1276 fs_output_semantic_index[fs_num_outputs] = index; 1277 outputMapping[attr] = fs_num_outputs; 1278 break; 1279 } 1280 } 1281 1282 fs_num_outputs++; 1283 } 1284 } 1285 1286 ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->screen); 1287 if (ureg == NULL) 1288 return false; 1289 1290 ureg_setup_shader_info(ureg, &stfp->Base.info); 1291 1292 if (ST_DEBUG & DEBUG_MESA) { 1293 _mesa_print_program(&stfp->Base); 1294 _mesa_print_program_parameters(st->ctx, &stfp->Base); 1295 debug_printf("\n"); 1296 } 1297 if (write_all == GL_TRUE) 1298 ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1); 1299 1300 if (stfp->glsl_to_tgsi) { 1301 st_translate_program(st->ctx, 1302 PIPE_SHADER_FRAGMENT, 1303 ureg, 1304 stfp->glsl_to_tgsi, 1305 &stfp->Base, 1306 /* inputs */ 1307 fs_num_inputs, 1308 inputMapping, 1309 inputSlotToAttr, 1310 input_semantic_name, 1311 input_semantic_index, 1312 interpMode, 1313 /* outputs */ 1314 fs_num_outputs, 1315 outputMapping, 1316 fs_output_semantic_name, 1317 fs_output_semantic_index); 1318 1319 free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi); 1320 } 1321 1322 stfp->state.tokens = ureg_get_tokens(ureg, NULL); 1323 ureg_destroy(ureg); 1324 1325 if (stfp->glsl_to_tgsi) { 1326 stfp->glsl_to_tgsi = NULL; 1327 st_store_ir_in_disk_cache(st, &stfp->Base, false); 1328 } 1329 1330 return stfp->state.tokens != NULL; 1331} 1332 1333static struct st_fp_variant * 1334st_create_fp_variant(struct st_context *st, 1335 struct st_program *stfp, 1336 const struct st_fp_variant_key *key) 1337{ 1338 struct pipe_context *pipe = st->pipe; 1339 struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant); 1340 struct pipe_shader_state state = {0}; 1341 struct gl_program_parameter_list *params = stfp->Base.Parameters; 1342 static const gl_state_index16 texcoord_state[STATE_LENGTH] = 1343 { STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 }; 1344 static const gl_state_index16 scale_state[STATE_LENGTH] = 1345 { STATE_PT_SCALE }; 1346 static const gl_state_index16 bias_state[STATE_LENGTH] = 1347 { STATE_PT_BIAS }; 1348 static const gl_state_index16 alpha_ref_state[STATE_LENGTH] = 1349 { STATE_ALPHA_REF }; 1350 1351 if (!variant) 1352 return NULL; 1353 1354 /* Translate ATI_fs to NIR at variant time because that's when we have the 1355 * texture types. 1356 */ 1357 if (stfp->ati_fs) { 1358 const struct nir_shader_compiler_options *options = 1359 st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT); 1360 1361 nir_shader *s = st_translate_atifs_program(stfp->ati_fs, key, &stfp->Base, options); 1362 1363 st_prog_to_nir_postprocess(st, s, &stfp->Base); 1364 1365 state.type = PIPE_SHADER_IR_NIR; 1366 state.ir.nir = s; 1367 } else if (stfp->state.type == PIPE_SHADER_IR_NIR) { 1368 state.type = PIPE_SHADER_IR_NIR; 1369 state.ir.nir = get_nir_shader(st, stfp); 1370 } 1371 1372 if (state.type == PIPE_SHADER_IR_NIR) { 1373 bool finalize = false; 1374 1375 if (key->clamp_color) { 1376 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs); 1377 finalize = true; 1378 } 1379 1380 if (key->lower_flatshade) { 1381 NIR_PASS_V(state.ir.nir, nir_lower_flatshade); 1382 finalize = true; 1383 } 1384 1385 if (key->lower_alpha_func != COMPARE_FUNC_ALWAYS) { 1386 _mesa_add_state_reference(params, alpha_ref_state); 1387 NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func, 1388 false, alpha_ref_state); 1389 finalize = true; 1390 } 1391 1392 if (key->lower_two_sided_color) { 1393 bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal; 1394 NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color, face_sysval); 1395 finalize = true; 1396 } 1397 1398 if (key->persample_shading) { 1399 nir_shader *shader = state.ir.nir; 1400 nir_foreach_shader_in_variable(var, shader) 1401 var->data.sample = true; 1402 finalize = true; 1403 } 1404 1405 if (key->lower_texcoord_replace) { 1406 bool point_coord_is_sysval = st->ctx->Const.GLSLPointCoordIsSysVal; 1407 NIR_PASS_V(state.ir.nir, nir_lower_texcoord_replace, 1408 key->lower_texcoord_replace, point_coord_is_sysval, false); 1409 finalize = true; 1410 } 1411 1412 if (st->emulate_gl_clamp && 1413 (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) { 1414 nir_lower_tex_options tex_opts = {0}; 1415 tex_opts.saturate_s = key->gl_clamp[0]; 1416 tex_opts.saturate_t = key->gl_clamp[1]; 1417 tex_opts.saturate_r = key->gl_clamp[2]; 1418 NIR_PASS_V(state.ir.nir, nir_lower_tex, &tex_opts); 1419 finalize = true; 1420 } 1421 1422 assert(!(key->bitmap && key->drawpixels)); 1423 1424 /* glBitmap */ 1425 if (key->bitmap) { 1426 nir_lower_bitmap_options options = {0}; 1427 1428 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1; 1429 options.sampler = variant->bitmap_sampler; 1430 options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM; 1431 1432 NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options); 1433 finalize = true; 1434 } 1435 1436 /* glDrawPixels (color only) */ 1437 if (key->drawpixels) { 1438 nir_lower_drawpixels_options options = {{0}}; 1439 unsigned samplers_used = stfp->Base.SamplersUsed; 1440 1441 /* Find the first unused slot. */ 1442 variant->drawpix_sampler = ffs(~samplers_used) - 1; 1443 options.drawpix_sampler = variant->drawpix_sampler; 1444 samplers_used |= (1 << variant->drawpix_sampler); 1445 1446 options.pixel_maps = key->pixelMaps; 1447 if (key->pixelMaps) { 1448 variant->pixelmap_sampler = ffs(~samplers_used) - 1; 1449 options.pixelmap_sampler = variant->pixelmap_sampler; 1450 } 1451 1452 options.scale_and_bias = key->scaleAndBias; 1453 if (key->scaleAndBias) { 1454 _mesa_add_state_reference(params, scale_state); 1455 memcpy(options.scale_state_tokens, scale_state, 1456 sizeof(options.scale_state_tokens)); 1457 _mesa_add_state_reference(params, bias_state); 1458 memcpy(options.bias_state_tokens, bias_state, 1459 sizeof(options.bias_state_tokens)); 1460 } 1461 1462 _mesa_add_state_reference(params, texcoord_state); 1463 memcpy(options.texcoord_state_tokens, texcoord_state, 1464 sizeof(options.texcoord_state_tokens)); 1465 1466 NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options); 1467 finalize = true; 1468 } 1469 1470 bool need_lower_tex_src_plane = false; 1471 1472 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv || 1473 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv || 1474 key->external.lower_ayuv || key->external.lower_xyuv || 1475 key->external.lower_yuv || key->external.lower_yu_yv || 1476 key->external.lower_y41x)) { 1477 1478 st_nir_lower_samplers(st->screen, state.ir.nir, 1479 stfp->shader_program, &stfp->Base); 1480 1481 nir_lower_tex_options options = {0}; 1482 options.lower_y_uv_external = key->external.lower_nv12; 1483 options.lower_y_u_v_external = key->external.lower_iyuv; 1484 options.lower_xy_uxvx_external = key->external.lower_xy_uxvx; 1485 options.lower_yx_xuxv_external = key->external.lower_yx_xuxv; 1486 options.lower_ayuv_external = key->external.lower_ayuv; 1487 options.lower_xyuv_external = key->external.lower_xyuv; 1488 options.lower_yuv_external = key->external.lower_yuv; 1489 options.lower_yu_yv_external = key->external.lower_yu_yv; 1490 options.lower_y41x_external = key->external.lower_y41x; 1491 NIR_PASS_V(state.ir.nir, nir_lower_tex, &options); 1492 finalize = true; 1493 need_lower_tex_src_plane = true; 1494 } 1495 1496 if (finalize || !st->allow_st_finalize_nir_twice) { 1497 char *msg = st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir, 1498 false, false); 1499 free(msg); 1500 } 1501 1502 /* This pass needs to happen *after* nir_lower_sampler */ 1503 if (unlikely(need_lower_tex_src_plane)) { 1504 NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane, 1505 ~stfp->Base.SamplersUsed, 1506 key->external.lower_nv12 | key->external.lower_xy_uxvx | 1507 key->external.lower_yx_xuxv, 1508 key->external.lower_iyuv); 1509 finalize = true; 1510 } 1511 1512 if (finalize || !st->allow_st_finalize_nir_twice) { 1513 /* Some of the lowering above may have introduced new varyings */ 1514 nir_shader_gather_info(state.ir.nir, 1515 nir_shader_get_entrypoint(state.ir.nir)); 1516 1517 struct pipe_screen *screen = st->screen; 1518 if (screen->finalize_nir) { 1519 char *msg = screen->finalize_nir(screen, state.ir.nir); 1520 free(msg); 1521 } 1522 } 1523 1524 variant->base.driver_shader = st_create_nir_shader(st, &state); 1525 variant->key = *key; 1526 1527 return variant; 1528 } 1529 1530 state.tokens = stfp->state.tokens; 1531 1532 assert(!(key->bitmap && key->drawpixels)); 1533 1534 /* Emulate features. */ 1535 if (key->clamp_color || key->persample_shading) { 1536 const struct tgsi_token *tokens; 1537 unsigned flags = 1538 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) | 1539 (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0); 1540 1541 tokens = tgsi_emulate(state.tokens, flags); 1542 1543 if (tokens) { 1544 if (state.tokens != stfp->state.tokens) 1545 tgsi_free_tokens(state.tokens); 1546 state.tokens = tokens; 1547 } else 1548 fprintf(stderr, "mesa: cannot emulate deprecated features\n"); 1549 } 1550 1551 /* glBitmap */ 1552 if (key->bitmap) { 1553 const struct tgsi_token *tokens; 1554 1555 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1; 1556 1557 tokens = st_get_bitmap_shader(state.tokens, 1558 st->internal_target, 1559 variant->bitmap_sampler, 1560 st->needs_texcoord_semantic, 1561 st->bitmap.tex_format == 1562 PIPE_FORMAT_R8_UNORM); 1563 1564 if (tokens) { 1565 if (state.tokens != stfp->state.tokens) 1566 tgsi_free_tokens(state.tokens); 1567 state.tokens = tokens; 1568 } else 1569 fprintf(stderr, "mesa: cannot create a shader for glBitmap\n"); 1570 } 1571 1572 /* glDrawPixels (color only) */ 1573 if (key->drawpixels) { 1574 const struct tgsi_token *tokens; 1575 unsigned scale_const = 0, bias_const = 0, texcoord_const = 0; 1576 1577 /* Find the first unused slot. */ 1578 variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1; 1579 1580 if (key->pixelMaps) { 1581 unsigned samplers_used = stfp->Base.SamplersUsed | 1582 (1 << variant->drawpix_sampler); 1583 1584 variant->pixelmap_sampler = ffs(~samplers_used) - 1; 1585 } 1586 1587 if (key->scaleAndBias) { 1588 scale_const = _mesa_add_state_reference(params, scale_state); 1589 bias_const = _mesa_add_state_reference(params, bias_state); 1590 } 1591 1592 texcoord_const = _mesa_add_state_reference(params, texcoord_state); 1593 1594 tokens = st_get_drawpix_shader(state.tokens, 1595 st->needs_texcoord_semantic, 1596 key->scaleAndBias, scale_const, 1597 bias_const, key->pixelMaps, 1598 variant->drawpix_sampler, 1599 variant->pixelmap_sampler, 1600 texcoord_const, st->internal_target); 1601 1602 if (tokens) { 1603 if (state.tokens != stfp->state.tokens) 1604 tgsi_free_tokens(state.tokens); 1605 state.tokens = tokens; 1606 } else 1607 fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n"); 1608 } 1609 1610 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv || 1611 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) { 1612 const struct tgsi_token *tokens; 1613 1614 /* samplers inserted would conflict, but this should be unpossible: */ 1615 assert(!(key->bitmap || key->drawpixels)); 1616 1617 tokens = st_tgsi_lower_yuv(state.tokens, 1618 ~stfp->Base.SamplersUsed, 1619 key->external.lower_nv12 || 1620 key->external.lower_xy_uxvx || 1621 key->external.lower_yx_xuxv, 1622 key->external.lower_iyuv); 1623 if (tokens) { 1624 if (state.tokens != stfp->state.tokens) 1625 tgsi_free_tokens(state.tokens); 1626 state.tokens = tokens; 1627 } else { 1628 fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n"); 1629 } 1630 } 1631 1632 if (key->lower_depth_clamp) { 1633 unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state); 1634 1635 const struct tgsi_token *tokens; 1636 tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const); 1637 if (state.tokens != stfp->state.tokens) 1638 tgsi_free_tokens(state.tokens); 1639 state.tokens = tokens; 1640 } 1641 1642 if (ST_DEBUG & DEBUG_PRINT_IR) 1643 tgsi_dump(state.tokens, 0); 1644 1645 /* fill in variant */ 1646 variant->base.driver_shader = pipe->create_fs_state(pipe, &state); 1647 variant->key = *key; 1648 1649 if (state.tokens != stfp->state.tokens) 1650 tgsi_free_tokens(state.tokens); 1651 return variant; 1652} 1653 1654/** 1655 * Translate fragment program if needed. 1656 */ 1657struct st_fp_variant * 1658st_get_fp_variant(struct st_context *st, 1659 struct st_program *stfp, 1660 const struct st_fp_variant_key *key) 1661{ 1662 struct st_fp_variant *fpv; 1663 1664 /* Search for existing variant */ 1665 for (fpv = st_fp_variant(stfp->variants); fpv; 1666 fpv = st_fp_variant(fpv->base.next)) { 1667 if (memcmp(&fpv->key, key, sizeof(*key)) == 0) { 1668 break; 1669 } 1670 } 1671 1672 if (!fpv) { 1673 /* create new */ 1674 1675 if (stfp->variants != NULL) { 1676 _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM, 1677 "Compiling fragment shader variant (%s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 1678 key->bitmap ? "bitmap," : "", 1679 key->drawpixels ? "drawpixels," : "", 1680 key->scaleAndBias ? "scale_bias," : "", 1681 key->pixelMaps ? "pixel_maps," : "", 1682 key->clamp_color ? "clamp_color," : "", 1683 key->persample_shading ? "persample_shading," : "", 1684 key->fog ? "fog," : "", 1685 key->lower_depth_clamp ? "depth_clamp," : "", 1686 key->lower_two_sided_color ? "twoside," : "", 1687 key->lower_flatshade ? "flatshade," : "", 1688 key->lower_texcoord_replace ? "texcoord_replace," : "", 1689 key->lower_alpha_func ? "alpha_compare," : "", 1690 /* skipped ATI_fs targets */ 1691 stfp->Base.ExternalSamplersUsed ? "external?," : "", 1692 key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : ""); 1693 } 1694 1695 fpv = st_create_fp_variant(st, stfp, key); 1696 if (fpv) { 1697 fpv->base.st = key->st; 1698 1699 st_add_variant(&stfp->variants, &fpv->base); 1700 } 1701 } 1702 1703 return fpv; 1704} 1705 1706/** 1707 * Translate a program. This is common code for geometry and tessellation 1708 * shaders. 1709 */ 1710bool 1711st_translate_common_program(struct st_context *st, 1712 struct st_program *stp) 1713{ 1714 struct gl_program *prog = &stp->Base; 1715 enum pipe_shader_type stage = 1716 pipe_shader_type_from_mesa(stp->Base.info.stage); 1717 struct ureg_program *ureg = ureg_create_with_screen(stage, st->screen); 1718 1719 if (ureg == NULL) 1720 return false; 1721 1722 ureg_setup_shader_info(ureg, &stp->Base.info); 1723 1724 ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX]; 1725 ubyte inputMapping[VARYING_SLOT_TESS_MAX]; 1726 ubyte outputMapping[VARYING_SLOT_TESS_MAX]; 1727 GLuint attr; 1728 1729 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; 1730 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; 1731 uint num_inputs = 0; 1732 1733 ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; 1734 ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; 1735 uint num_outputs = 0; 1736 1737 GLint i; 1738 1739 memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr)); 1740 memset(inputMapping, 0, sizeof(inputMapping)); 1741 memset(outputMapping, 0, sizeof(outputMapping)); 1742 memset(&stp->state, 0, sizeof(stp->state)); 1743 1744 /* 1745 * Convert Mesa program inputs to TGSI input register semantics. 1746 */ 1747 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { 1748 if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0) 1749 continue; 1750 1751 unsigned slot = num_inputs++; 1752 1753 inputMapping[attr] = slot; 1754 inputSlotToAttr[slot] = attr; 1755 1756 unsigned semantic_name, semantic_index; 1757 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic, 1758 &semantic_name, &semantic_index); 1759 input_semantic_name[slot] = semantic_name; 1760 input_semantic_index[slot] = semantic_index; 1761 } 1762 1763 /* Also add patch inputs. */ 1764 for (attr = 0; attr < 32; attr++) { 1765 if (prog->info.patch_inputs_read & (1u << attr)) { 1766 GLuint slot = num_inputs++; 1767 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr; 1768 1769 inputMapping[patch_attr] = slot; 1770 inputSlotToAttr[slot] = patch_attr; 1771 input_semantic_name[slot] = TGSI_SEMANTIC_PATCH; 1772 input_semantic_index[slot] = attr; 1773 } 1774 } 1775 1776 /* initialize output semantics to defaults */ 1777 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 1778 output_semantic_name[i] = TGSI_SEMANTIC_GENERIC; 1779 output_semantic_index[i] = 0; 1780 } 1781 1782 /* 1783 * Determine number of outputs, the (default) output register 1784 * mapping and the semantic information for each output. 1785 */ 1786 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { 1787 if (prog->info.outputs_written & BITFIELD64_BIT(attr)) { 1788 GLuint slot = num_outputs++; 1789 1790 outputMapping[attr] = slot; 1791 1792 unsigned semantic_name, semantic_index; 1793 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic, 1794 &semantic_name, &semantic_index); 1795 output_semantic_name[slot] = semantic_name; 1796 output_semantic_index[slot] = semantic_index; 1797 } 1798 } 1799 1800 /* Also add patch outputs. */ 1801 for (attr = 0; attr < 32; attr++) { 1802 if (prog->info.patch_outputs_written & (1u << attr)) { 1803 GLuint slot = num_outputs++; 1804 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr; 1805 1806 outputMapping[patch_attr] = slot; 1807 output_semantic_name[slot] = TGSI_SEMANTIC_PATCH; 1808 output_semantic_index[slot] = attr; 1809 } 1810 } 1811 1812 st_translate_program(st->ctx, 1813 stage, 1814 ureg, 1815 stp->glsl_to_tgsi, 1816 prog, 1817 /* inputs */ 1818 num_inputs, 1819 inputMapping, 1820 inputSlotToAttr, 1821 input_semantic_name, 1822 input_semantic_index, 1823 NULL, 1824 /* outputs */ 1825 num_outputs, 1826 outputMapping, 1827 output_semantic_name, 1828 output_semantic_index); 1829 1830 stp->state.tokens = ureg_get_tokens(ureg, NULL); 1831 1832 ureg_destroy(ureg); 1833 1834 st_translate_stream_output_info(prog); 1835 1836 st_store_ir_in_disk_cache(st, prog, false); 1837 1838 if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA) 1839 _mesa_print_program(prog); 1840 1841 free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi); 1842 stp->glsl_to_tgsi = NULL; 1843 return true; 1844} 1845 1846 1847/** 1848 * Vert/Geom/Frag programs have per-context variants. Free all the 1849 * variants attached to the given program which match the given context. 1850 */ 1851static void 1852destroy_program_variants(struct st_context *st, struct gl_program *target) 1853{ 1854 if (!target || target == &_mesa_DummyProgram) 1855 return; 1856 1857 struct st_program *p = st_program(target); 1858 struct st_variant *v, **prevPtr = &p->variants; 1859 bool unbound = false; 1860 1861 for (v = p->variants; v; ) { 1862 struct st_variant *next = v->next; 1863 if (v->st == st) { 1864 if (!unbound) { 1865 st_unbind_program(st, p); 1866 unbound = true; 1867 } 1868 1869 /* unlink from list */ 1870 *prevPtr = next; 1871 /* destroy this variant */ 1872 delete_variant(st, v, target->Target); 1873 } 1874 else { 1875 prevPtr = &v->next; 1876 } 1877 v = next; 1878 } 1879} 1880 1881 1882/** 1883 * Callback for _mesa_HashWalk. Free all the shader's program variants 1884 * which match the given context. 1885 */ 1886static void 1887destroy_shader_program_variants_cb(void *data, void *userData) 1888{ 1889 struct st_context *st = (struct st_context *) userData; 1890 struct gl_shader *shader = (struct gl_shader *) data; 1891 1892 switch (shader->Type) { 1893 case GL_SHADER_PROGRAM_MESA: 1894 { 1895 struct gl_shader_program *shProg = (struct gl_shader_program *) data; 1896 GLuint i; 1897 1898 for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) { 1899 if (shProg->_LinkedShaders[i]) 1900 destroy_program_variants(st, shProg->_LinkedShaders[i]->Program); 1901 } 1902 } 1903 break; 1904 case GL_VERTEX_SHADER: 1905 case GL_FRAGMENT_SHADER: 1906 case GL_GEOMETRY_SHADER: 1907 case GL_TESS_CONTROL_SHADER: 1908 case GL_TESS_EVALUATION_SHADER: 1909 case GL_COMPUTE_SHADER: 1910 break; 1911 default: 1912 assert(0); 1913 } 1914} 1915 1916 1917/** 1918 * Callback for _mesa_HashWalk. Free all the program variants which match 1919 * the given context. 1920 */ 1921static void 1922destroy_program_variants_cb(void *data, void *userData) 1923{ 1924 struct st_context *st = (struct st_context *) userData; 1925 struct gl_program *program = (struct gl_program *) data; 1926 destroy_program_variants(st, program); 1927} 1928 1929 1930/** 1931 * Walk over all shaders and programs to delete any variants which 1932 * belong to the given context. 1933 * This is called during context tear-down. 1934 */ 1935void 1936st_destroy_program_variants(struct st_context *st) 1937{ 1938 /* If shaders can be shared with other contexts, the last context will 1939 * call DeleteProgram on all shaders, releasing everything. 1940 */ 1941 if (st->has_shareable_shaders) 1942 return; 1943 1944 /* ARB vert/frag program */ 1945 _mesa_HashWalk(st->ctx->Shared->Programs, 1946 destroy_program_variants_cb, st); 1947 1948 /* GLSL vert/frag/geom shaders */ 1949 _mesa_HashWalk(st->ctx->Shared->ShaderObjects, 1950 destroy_shader_program_variants_cb, st); 1951} 1952 1953 1954/** 1955 * Compile one shader variant. 1956 */ 1957static void 1958st_precompile_shader_variant(struct st_context *st, 1959 struct gl_program *prog) 1960{ 1961 switch (prog->Target) { 1962 case GL_VERTEX_PROGRAM_ARB: 1963 case GL_TESS_CONTROL_PROGRAM_NV: 1964 case GL_TESS_EVALUATION_PROGRAM_NV: 1965 case GL_GEOMETRY_PROGRAM_NV: 1966 case GL_COMPUTE_PROGRAM_NV: { 1967 struct st_program *p = (struct st_program *)prog; 1968 struct st_common_variant_key key; 1969 1970 memset(&key, 0, sizeof(key)); 1971 1972 if (st->ctx->API == API_OPENGL_COMPAT && 1973 st->clamp_vert_color_in_shader && 1974 (prog->info.outputs_written & (VARYING_SLOT_COL0 | 1975 VARYING_SLOT_COL1 | 1976 VARYING_SLOT_BFC0 | 1977 VARYING_SLOT_BFC1))) { 1978 key.clamp_color = true; 1979 } 1980 1981 key.st = st->has_shareable_shaders ? NULL : st; 1982 st_get_common_variant(st, p, &key); 1983 break; 1984 } 1985 1986 case GL_FRAGMENT_PROGRAM_ARB: { 1987 struct st_program *p = (struct st_program *)prog; 1988 struct st_fp_variant_key key; 1989 1990 memset(&key, 0, sizeof(key)); 1991 1992 key.st = st->has_shareable_shaders ? NULL : st; 1993 key.lower_alpha_func = COMPARE_FUNC_ALWAYS; 1994 if (p->ati_fs) { 1995 for (int i = 0; i < ARRAY_SIZE(key.texture_index); i++) 1996 key.texture_index[i] = TEXTURE_2D_INDEX; 1997 } 1998 st_get_fp_variant(st, p, &key); 1999 break; 2000 } 2001 2002 default: 2003 assert(0); 2004 } 2005} 2006 2007void 2008st_serialize_nir(struct st_program *stp) 2009{ 2010 if (!stp->serialized_nir) { 2011 struct blob blob; 2012 size_t size; 2013 2014 blob_init(&blob); 2015 nir_serialize(&blob, stp->Base.nir, false); 2016 blob_finish_get_buffer(&blob, &stp->serialized_nir, &size); 2017 stp->serialized_nir_size = size; 2018 } 2019} 2020 2021void 2022st_finalize_program(struct st_context *st, struct gl_program *prog) 2023{ 2024 if (st->current_program[prog->info.stage] == prog) { 2025 if (prog->info.stage == MESA_SHADER_VERTEX) 2026 st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog); 2027 else 2028 st->dirty |= ((struct st_program *)prog)->affected_states; 2029 } 2030 2031 if (prog->nir) { 2032 nir_sweep(prog->nir); 2033 2034 /* This is only needed for ARB_vp/fp programs and when the disk cache 2035 * is disabled. If the disk cache is enabled, GLSL programs are 2036 * serialized in write_nir_to_cache. 2037 */ 2038 st_serialize_nir(st_program(prog)); 2039 } 2040 2041 /* Always create the default variant of the program. */ 2042 st_precompile_shader_variant(st, prog); 2043} 2044