1/**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  *   Brian Paul
31  */
32
33
34#include "main/errors.h"
35
36#include "main/hash.h"
37#include "main/mtypes.h"
38#include "program/prog_parameter.h"
39#include "program/prog_print.h"
40#include "program/prog_to_nir.h"
41#include "program/programopt.h"
42
43#include "compiler/glsl/gl_nir.h"
44#include "compiler/nir/nir.h"
45#include "compiler/nir/nir_serialize.h"
46#include "draw/draw_context.h"
47
48#include "pipe/p_context.h"
49#include "pipe/p_defines.h"
50#include "pipe/p_shader_tokens.h"
51#include "draw/draw_context.h"
52#include "tgsi/tgsi_dump.h"
53#include "tgsi/tgsi_emulate.h"
54#include "tgsi/tgsi_parse.h"
55#include "tgsi/tgsi_ureg.h"
56#include "nir/nir_to_tgsi.h"
57
58#include "util/u_memory.h"
59
60#include "st_debug.h"
61#include "st_cb_bitmap.h"
62#include "st_cb_drawpixels.h"
63#include "st_context.h"
64#include "st_tgsi_lower_depth_clamp.h"
65#include "st_tgsi_lower_yuv.h"
66#include "st_program.h"
67#include "st_atifs_to_nir.h"
68#include "st_nir.h"
69#include "st_shader_cache.h"
70#include "st_util.h"
71#include "cso_cache/cso_context.h"
72
73
74static void
75destroy_program_variants(struct st_context *st, struct gl_program *target);
76
77static void
78set_affected_state_flags(uint64_t *states,
79                         struct gl_program *prog,
80                         uint64_t new_constants,
81                         uint64_t new_sampler_views,
82                         uint64_t new_samplers,
83                         uint64_t new_images,
84                         uint64_t new_ubos,
85                         uint64_t new_ssbos,
86                         uint64_t new_atomics)
87{
88   if (prog->Parameters->NumParameters)
89      *states |= new_constants;
90
91   if (prog->info.num_textures)
92      *states |= new_sampler_views | new_samplers;
93
94   if (prog->info.num_images)
95      *states |= new_images;
96
97   if (prog->info.num_ubos)
98      *states |= new_ubos;
99
100   if (prog->info.num_ssbos)
101      *states |= new_ssbos;
102
103   if (prog->info.num_abos)
104      *states |= new_atomics;
105}
106
107/**
108 * This determines which states will be updated when the shader is bound.
109 */
110void
111st_set_prog_affected_state_flags(struct gl_program *prog)
112{
113   uint64_t *states;
114
115   switch (prog->info.stage) {
116   case MESA_SHADER_VERTEX:
117      states = &((struct st_program*)prog)->affected_states;
118
119      *states = ST_NEW_VS_STATE |
120                ST_NEW_RASTERIZER |
121                ST_NEW_VERTEX_ARRAYS;
122
123      set_affected_state_flags(states, prog,
124                               ST_NEW_VS_CONSTANTS,
125                               ST_NEW_VS_SAMPLER_VIEWS,
126                               ST_NEW_VS_SAMPLERS,
127                               ST_NEW_VS_IMAGES,
128                               ST_NEW_VS_UBOS,
129                               ST_NEW_VS_SSBOS,
130                               ST_NEW_VS_ATOMICS);
131      break;
132
133   case MESA_SHADER_TESS_CTRL:
134      states = &(st_program(prog))->affected_states;
135
136      *states = ST_NEW_TCS_STATE;
137
138      set_affected_state_flags(states, prog,
139                               ST_NEW_TCS_CONSTANTS,
140                               ST_NEW_TCS_SAMPLER_VIEWS,
141                               ST_NEW_TCS_SAMPLERS,
142                               ST_NEW_TCS_IMAGES,
143                               ST_NEW_TCS_UBOS,
144                               ST_NEW_TCS_SSBOS,
145                               ST_NEW_TCS_ATOMICS);
146      break;
147
148   case MESA_SHADER_TESS_EVAL:
149      states = &(st_program(prog))->affected_states;
150
151      *states = ST_NEW_TES_STATE |
152                ST_NEW_RASTERIZER;
153
154      set_affected_state_flags(states, prog,
155                               ST_NEW_TES_CONSTANTS,
156                               ST_NEW_TES_SAMPLER_VIEWS,
157                               ST_NEW_TES_SAMPLERS,
158                               ST_NEW_TES_IMAGES,
159                               ST_NEW_TES_UBOS,
160                               ST_NEW_TES_SSBOS,
161                               ST_NEW_TES_ATOMICS);
162      break;
163
164   case MESA_SHADER_GEOMETRY:
165      states = &(st_program(prog))->affected_states;
166
167      *states = ST_NEW_GS_STATE |
168                ST_NEW_RASTERIZER;
169
170      set_affected_state_flags(states, prog,
171                               ST_NEW_GS_CONSTANTS,
172                               ST_NEW_GS_SAMPLER_VIEWS,
173                               ST_NEW_GS_SAMPLERS,
174                               ST_NEW_GS_IMAGES,
175                               ST_NEW_GS_UBOS,
176                               ST_NEW_GS_SSBOS,
177                               ST_NEW_GS_ATOMICS);
178      break;
179
180   case MESA_SHADER_FRAGMENT:
181      states = &((struct st_program*)prog)->affected_states;
182
183      /* gl_FragCoord and glDrawPixels always use constants. */
184      *states = ST_NEW_FS_STATE |
185                ST_NEW_SAMPLE_SHADING |
186                ST_NEW_FS_CONSTANTS;
187
188      set_affected_state_flags(states, prog,
189                               ST_NEW_FS_CONSTANTS,
190                               ST_NEW_FS_SAMPLER_VIEWS,
191                               ST_NEW_FS_SAMPLERS,
192                               ST_NEW_FS_IMAGES,
193                               ST_NEW_FS_UBOS,
194                               ST_NEW_FS_SSBOS,
195                               ST_NEW_FS_ATOMICS);
196      break;
197
198   case MESA_SHADER_COMPUTE:
199      states = &((struct st_program*)prog)->affected_states;
200
201      *states = ST_NEW_CS_STATE;
202
203      set_affected_state_flags(states, prog,
204                               ST_NEW_CS_CONSTANTS,
205                               ST_NEW_CS_SAMPLER_VIEWS,
206                               ST_NEW_CS_SAMPLERS,
207                               ST_NEW_CS_IMAGES,
208                               ST_NEW_CS_UBOS,
209                               ST_NEW_CS_SSBOS,
210                               ST_NEW_CS_ATOMICS);
211      break;
212
213   default:
214      unreachable("unhandled shader stage");
215   }
216}
217
218
219/**
220 * Delete a shader variant.  Note the caller must unlink the variant from
221 * the linked list.
222 */
223static void
224delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
225{
226   if (v->driver_shader) {
227      if (target == GL_VERTEX_PROGRAM_ARB &&
228          ((struct st_common_variant*)v)->key.is_draw_shader) {
229         /* Draw shader. */
230         draw_delete_vertex_shader(st->draw, v->driver_shader);
231      } else if (st->has_shareable_shaders || v->st == st) {
232         /* The shader's context matches the calling context, or we
233          * don't care.
234          */
235         switch (target) {
236         case GL_VERTEX_PROGRAM_ARB:
237            st->pipe->delete_vs_state(st->pipe, v->driver_shader);
238            break;
239         case GL_TESS_CONTROL_PROGRAM_NV:
240            st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
241            break;
242         case GL_TESS_EVALUATION_PROGRAM_NV:
243            st->pipe->delete_tes_state(st->pipe, v->driver_shader);
244            break;
245         case GL_GEOMETRY_PROGRAM_NV:
246            st->pipe->delete_gs_state(st->pipe, v->driver_shader);
247            break;
248         case GL_FRAGMENT_PROGRAM_ARB:
249            st->pipe->delete_fs_state(st->pipe, v->driver_shader);
250            break;
251         case GL_COMPUTE_PROGRAM_NV:
252            st->pipe->delete_compute_state(st->pipe, v->driver_shader);
253            break;
254         default:
255            unreachable("bad shader type in delete_basic_variant");
256         }
257      } else {
258         /* We can't delete a shader with a context different from the one
259          * that created it.  Add it to the creating context's zombie list.
260          */
261         enum pipe_shader_type type =
262            pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
263
264         st_save_zombie_shader(v->st, type, v->driver_shader);
265      }
266   }
267
268   free(v);
269}
270
271static void
272st_unbind_program(struct st_context *st, struct st_program *p)
273{
274   /* Unbind the shader in cso_context and re-bind in st/mesa. */
275   switch (p->Base.info.stage) {
276   case MESA_SHADER_VERTEX:
277      cso_set_vertex_shader_handle(st->cso_context, NULL);
278      st->dirty |= ST_NEW_VS_STATE;
279      break;
280   case MESA_SHADER_TESS_CTRL:
281      cso_set_tessctrl_shader_handle(st->cso_context, NULL);
282      st->dirty |= ST_NEW_TCS_STATE;
283      break;
284   case MESA_SHADER_TESS_EVAL:
285      cso_set_tesseval_shader_handle(st->cso_context, NULL);
286      st->dirty |= ST_NEW_TES_STATE;
287      break;
288   case MESA_SHADER_GEOMETRY:
289      cso_set_geometry_shader_handle(st->cso_context, NULL);
290      st->dirty |= ST_NEW_GS_STATE;
291      break;
292   case MESA_SHADER_FRAGMENT:
293      cso_set_fragment_shader_handle(st->cso_context, NULL);
294      st->dirty |= ST_NEW_FS_STATE;
295      break;
296   case MESA_SHADER_COMPUTE:
297      cso_set_compute_shader_handle(st->cso_context, NULL);
298      st->dirty |= ST_NEW_CS_STATE;
299      break;
300   default:
301      unreachable("invalid shader type");
302   }
303}
304
305/**
306 * Free all basic program variants.
307 */
308void
309st_release_variants(struct st_context *st, struct st_program *p)
310{
311   struct st_variant *v;
312
313   /* If we are releasing shaders, re-bind them, because we don't
314    * know which shaders are bound in the driver.
315    */
316   if (p->variants)
317      st_unbind_program(st, p);
318
319   for (v = p->variants; v; ) {
320      struct st_variant *next = v->next;
321      delete_variant(st, v, p->Base.Target);
322      v = next;
323   }
324
325   p->variants = NULL;
326
327   if (p->state.tokens) {
328      ureg_free_tokens(p->state.tokens);
329      p->state.tokens = NULL;
330   }
331
332   /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
333    * it has resulted in the driver taking ownership of the NIR.  Those
334    * callers should be NULLing out the nir field in any pipe_shader_state
335    * that might have this called in order to indicate that.
336    *
337    * GLSL IR and ARB programs will have set gl_program->nir to the same
338    * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
339    */
340}
341
342/**
343 * Free all basic program variants and unref program.
344 */
345void
346st_release_program(struct st_context *st, struct st_program **p)
347{
348   if (!*p)
349      return;
350
351   destroy_program_variants(st, &((*p)->Base));
352   st_reference_prog(st, p, NULL);
353}
354
355void
356st_finalize_nir_before_variants(struct nir_shader *nir)
357{
358   NIR_PASS_V(nir, nir_split_var_copies);
359   NIR_PASS_V(nir, nir_lower_var_copies);
360   if (nir->options->lower_all_io_to_temps ||
361       nir->options->lower_all_io_to_elements ||
362       nir->info.stage == MESA_SHADER_VERTEX ||
363       nir->info.stage == MESA_SHADER_GEOMETRY) {
364      NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
365   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
366      NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
367   }
368
369   /* st_nir_assign_vs_in_locations requires correct shader info. */
370   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
371
372   st_nir_assign_vs_in_locations(nir);
373}
374
375static void
376st_prog_to_nir_postprocess(struct st_context *st, nir_shader *nir,
377                           struct gl_program *prog)
378{
379   struct pipe_screen *screen = st->screen;
380
381   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
382   nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
383
384   NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
385   NIR_PASS_V(nir, nir_lower_system_values);
386   NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
387
388   /* Optimise NIR */
389   NIR_PASS_V(nir, nir_opt_constant_folding);
390   st_nir_opts(nir);
391   st_finalize_nir_before_variants(nir);
392
393   if (st->allow_st_finalize_nir_twice) {
394      char *msg = st_finalize_nir(st, prog, NULL, nir, true, true);
395      free(msg);
396   }
397
398   nir_validate_shader(nir, "after st/glsl finalize_nir");
399}
400
401/**
402 * Translate ARB (asm) program to NIR
403 */
404static nir_shader *
405st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
406                         gl_shader_stage stage)
407{
408   const struct nir_shader_compiler_options *options =
409      st_get_nir_compiler_options(st, prog->info.stage);
410
411   /* Translate to NIR */
412   nir_shader *nir = prog_to_nir(prog, options);
413
414   st_prog_to_nir_postprocess(st, nir, prog);
415
416   return nir;
417}
418
419/**
420 * Prepare st_vertex_program info.
421 *
422 * attrib_to_index is an optional mapping from a vertex attrib to a shader
423 * input index.
424 */
425void
426st_prepare_vertex_program(struct st_program *stp, uint8_t *out_attrib_to_index)
427{
428   struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
429   uint8_t attrib_to_index[VERT_ATTRIB_MAX] = {0};
430
431   stvp->num_inputs = 0;
432   stvp->vert_attrib_mask = 0;
433   memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
434
435   /* Determine number of inputs, the mappings between VERT_ATTRIB_x
436    * and TGSI generic input indexes, plus input attrib semantic info.
437    */
438   for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
439      if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
440         attrib_to_index[attr] = stvp->num_inputs;
441         stvp->vert_attrib_mask |= BITFIELD_BIT(attr);
442         stvp->num_inputs++;
443      }
444   }
445
446   /* pre-setup potentially unused edgeflag input */
447   attrib_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
448
449   /* Compute mapping of vertex program outputs to slots. */
450   unsigned num_outputs = 0;
451   for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
452      if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
453         stvp->result_to_output[attr] = num_outputs++;
454   }
455   /* pre-setup potentially unused edgeflag output */
456   stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
457
458   if (out_attrib_to_index)
459      memcpy(out_attrib_to_index, attrib_to_index, sizeof(attrib_to_index));
460}
461
462void
463st_translate_stream_output_info(struct gl_program *prog)
464{
465   struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
466   if (!info)
467      return;
468
469   /* Determine the (default) output register mapping for each output. */
470   unsigned num_outputs = 0;
471   ubyte output_mapping[VARYING_SLOT_TESS_MAX];
472   memset(output_mapping, 0, sizeof(output_mapping));
473
474   for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
475      if (prog->info.outputs_written & BITFIELD64_BIT(attr))
476         output_mapping[attr] = num_outputs++;
477   }
478
479   /* Translate stream output info. */
480   struct pipe_stream_output_info *so_info =
481      &((struct st_program*)prog)->state.stream_output;
482
483   for (unsigned i = 0; i < info->NumOutputs; i++) {
484      so_info->output[i].register_index =
485         output_mapping[info->Outputs[i].OutputRegister];
486      so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
487      so_info->output[i].num_components = info->Outputs[i].NumComponents;
488      so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
489      so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
490      so_info->output[i].stream = info->Outputs[i].StreamId;
491   }
492
493   for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
494      so_info->stride[i] = info->Buffers[i].Stride;
495   }
496   so_info->num_outputs = info->NumOutputs;
497}
498
499/**
500 * Creates a driver shader from a NIR shader.  Takes ownership of the
501 * passed nir_shader.
502 */
503struct pipe_shader_state *
504st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state)
505{
506   struct pipe_context *pipe = st->pipe;
507   struct pipe_screen *screen = st->screen;
508
509   assert(state->type == PIPE_SHADER_IR_NIR);
510   nir_shader *nir = state->ir.nir;
511   gl_shader_stage stage = nir->info.stage;
512   enum pipe_shader_type sh = pipe_shader_type_from_mesa(stage);
513
514   if (ST_DEBUG & DEBUG_PRINT_IR) {
515      fprintf(stderr, "NIR before handing off to driver:\n");
516      nir_print_shader(nir, stderr);
517   }
518
519   if (PIPE_SHADER_IR_NIR !=
520       screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_PREFERRED_IR)) {
521      /* u_screen.c defaults to images as deref enabled for some reason (which
522       * is what radeonsi wants), but nir-to-tgsi requires lowered images.
523       */
524      if (screen->get_param(screen, PIPE_CAP_NIR_IMAGES_AS_DEREF))
525         NIR_PASS_V(nir, gl_nir_lower_images, false);
526
527      state->type = PIPE_SHADER_IR_TGSI;
528      state->tokens = nir_to_tgsi(nir, screen);
529
530      if (ST_DEBUG & DEBUG_PRINT_IR) {
531         fprintf(stderr, "TGSI for driver after nir-to-tgsi:\n");
532         tgsi_dump(state->tokens, 0);
533         fprintf(stderr, "\n");
534      }
535   }
536
537   struct pipe_shader_state *shader;
538   switch (stage) {
539   case MESA_SHADER_VERTEX:
540      shader = pipe->create_vs_state(pipe, state);
541      break;
542   case MESA_SHADER_TESS_CTRL:
543      shader = pipe->create_tcs_state(pipe, state);
544      break;
545   case MESA_SHADER_TESS_EVAL:
546      shader = pipe->create_tes_state(pipe, state);
547      break;
548   case MESA_SHADER_GEOMETRY:
549      shader = pipe->create_gs_state(pipe, state);
550      break;
551   case MESA_SHADER_FRAGMENT:
552      shader = pipe->create_fs_state(pipe, state);
553      break;
554   case MESA_SHADER_COMPUTE: {
555      struct pipe_compute_state cs = {0};
556      cs.ir_type = state->type;
557      cs.req_local_mem = nir->info.shared_size;
558
559      if (state->type == PIPE_SHADER_IR_NIR)
560         cs.prog = state->ir.nir;
561      else
562         cs.prog = state->tokens;
563
564      shader = pipe->create_compute_state(pipe, &cs);
565      break;
566   }
567   default:
568      unreachable("unsupported shader stage");
569      return NULL;
570   }
571
572   if (state->type == PIPE_SHADER_IR_TGSI)
573      tgsi_free_tokens(state->tokens);
574
575   return shader;
576}
577
578/**
579 * Translate a vertex program.
580 */
581bool
582st_translate_vertex_program(struct st_context *st,
583                            struct st_program *stp)
584{
585   struct ureg_program *ureg;
586   enum pipe_error error;
587   unsigned num_outputs = 0;
588   unsigned attr;
589   ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
590   ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
591
592   if (stp->Base.arb.IsPositionInvariant)
593      _mesa_insert_mvp_code(st->ctx, &stp->Base);
594
595   /* ARB_vp: */
596   if (!stp->glsl_to_tgsi) {
597      _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
598
599      /* This determines which states will be updated when the assembly
600       * shader is bound.
601       */
602      stp->affected_states = ST_NEW_VS_STATE |
603                              ST_NEW_RASTERIZER |
604                              ST_NEW_VERTEX_ARRAYS;
605
606      if (stp->Base.Parameters->NumParameters)
607         stp->affected_states |= ST_NEW_VS_CONSTANTS;
608
609      if (stp->Base.nir)
610         ralloc_free(stp->Base.nir);
611
612      if (stp->serialized_nir) {
613         free(stp->serialized_nir);
614         stp->serialized_nir = NULL;
615      }
616
617      stp->state.type = PIPE_SHADER_IR_NIR;
618      stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
619                                               MESA_SHADER_VERTEX);
620      stp->Base.info = stp->Base.nir->info;
621
622      st_prepare_vertex_program(stp, NULL);
623      return true;
624   }
625
626   uint8_t input_to_index[VERT_ATTRIB_MAX];
627   st_prepare_vertex_program(stp, input_to_index);
628
629   /* Get semantic names and indices. */
630   for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
631      if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
632         unsigned slot = num_outputs++;
633         unsigned semantic_name, semantic_index;
634         tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
635                                      &semantic_name, &semantic_index);
636         output_semantic_name[slot] = semantic_name;
637         output_semantic_index[slot] = semantic_index;
638      }
639   }
640   /* pre-setup potentially unused edgeflag output */
641   output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
642   output_semantic_index[num_outputs] = 0;
643
644   ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->screen);
645   if (ureg == NULL)
646      return false;
647
648   ureg_setup_shader_info(ureg, &stp->Base.info);
649
650   if (ST_DEBUG & DEBUG_MESA) {
651      _mesa_print_program(&stp->Base);
652      _mesa_print_program_parameters(st->ctx, &stp->Base);
653      debug_printf("\n");
654   }
655
656   struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
657
658   error = st_translate_program(st->ctx,
659                                PIPE_SHADER_VERTEX,
660                                ureg,
661                                stp->glsl_to_tgsi,
662                                &stp->Base,
663                                /* inputs */
664                                stvp->num_inputs,
665                                input_to_index,
666                                NULL, /* inputSlotToAttr */
667                                NULL, /* input semantic name */
668                                NULL, /* input semantic index */
669                                NULL, /* interp mode */
670                                /* outputs */
671                                num_outputs,
672                                stvp->result_to_output,
673                                output_semantic_name,
674                                output_semantic_index);
675
676   st_translate_stream_output_info(&stp->Base);
677
678   free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
679
680   if (error) {
681      debug_printf("%s: failed to translate GLSL IR program:\n", __func__);
682      _mesa_print_program(&stp->Base);
683      debug_assert(0);
684      return false;
685   }
686
687   stp->state.tokens = ureg_get_tokens(ureg, NULL);
688   ureg_destroy(ureg);
689
690   stp->glsl_to_tgsi = NULL;
691   st_store_ir_in_disk_cache(st, &stp->Base, false);
692
693   return stp->state.tokens != NULL;
694}
695
696static struct nir_shader *
697get_nir_shader(struct st_context *st, struct st_program *stp)
698{
699   if (stp->Base.nir) {
700      nir_shader *nir = stp->Base.nir;
701
702      /* The first shader variant takes ownership of NIR, so that there is
703       * no cloning. Additional shader variants are always generated from
704       * serialized NIR to save memory.
705       */
706      stp->Base.nir = NULL;
707      assert(stp->serialized_nir && stp->serialized_nir_size);
708      return nir;
709   }
710
711   struct blob_reader blob_reader;
712   const struct nir_shader_compiler_options *options =
713      st_get_nir_compiler_options(st, stp->Base.info.stage);
714
715   blob_reader_init(&blob_reader, stp->serialized_nir, stp->serialized_nir_size);
716   return nir_deserialize(NULL, options, &blob_reader);
717}
718
719static void
720lower_ucp(struct st_context *st,
721          struct nir_shader *nir,
722          unsigned ucp_enables,
723          struct gl_program_parameter_list *params)
724{
725   if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
726      NIR_PASS_V(nir, nir_lower_clip_disable, ucp_enables);
727   else {
728      struct pipe_screen *screen = st->screen;
729      bool can_compact = screen->get_param(screen,
730                                           PIPE_CAP_NIR_COMPACT_ARRAYS);
731      bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
732
733      gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH] = {{0}};
734      for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
735         if (use_eye) {
736            clipplane_state[i][0] = STATE_CLIPPLANE;
737            clipplane_state[i][1] = i;
738         } else {
739            clipplane_state[i][0] = STATE_CLIP_INTERNAL;
740            clipplane_state[i][1] = i;
741         }
742         _mesa_add_state_reference(params, clipplane_state[i]);
743      }
744
745      if (nir->info.stage == MESA_SHADER_VERTEX) {
746         NIR_PASS_V(nir, nir_lower_clip_vs, ucp_enables,
747                    true, can_compact, clipplane_state);
748      } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
749         NIR_PASS_V(nir, nir_lower_clip_gs, ucp_enables,
750                    can_compact, clipplane_state);
751      }
752
753      NIR_PASS_V(nir, nir_lower_io_to_temporaries,
754                 nir_shader_get_entrypoint(nir), true, false);
755      NIR_PASS_V(nir, nir_lower_global_vars_to_local);
756   }
757}
758
759static const gl_state_index16 depth_range_state[STATE_LENGTH] =
760   { STATE_DEPTH_RANGE };
761
762static struct st_common_variant *
763st_create_common_variant(struct st_context *st,
764                     struct st_program *stp,
765                     const struct st_common_variant_key *key)
766{
767   struct st_common_variant *v = CALLOC_STRUCT(st_common_variant);
768   struct pipe_context *pipe = st->pipe;
769   struct pipe_shader_state state = {0};
770
771   static const gl_state_index16 point_size_state[STATE_LENGTH] =
772      { STATE_POINT_SIZE_CLAMPED, 0 };
773   struct gl_program_parameter_list *params = stp->Base.Parameters;
774
775   v->key = *key;
776
777   state.stream_output = stp->state.stream_output;
778
779   if (stp->state.type == PIPE_SHADER_IR_NIR) {
780      bool finalize = false;
781
782      state.type = PIPE_SHADER_IR_NIR;
783      state.ir.nir = get_nir_shader(st, stp);
784      const nir_shader_compiler_options *options = ((nir_shader *)state.ir.nir)->options;
785
786      if (key->clamp_color) {
787         NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
788         finalize = true;
789      }
790      if (key->passthrough_edgeflags) {
791         NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
792         finalize = true;
793      }
794
795      if (key->lower_point_size) {
796         _mesa_add_state_reference(params, point_size_state);
797         NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
798                    point_size_state);
799
800         switch (stp->Base.info.stage) {
801         case MESA_SHADER_VERTEX:
802            stp->affected_states |= ST_NEW_VS_CONSTANTS;
803            break;
804         case MESA_SHADER_TESS_EVAL:
805            stp->affected_states |= ST_NEW_TES_CONSTANTS;
806            break;
807         case MESA_SHADER_GEOMETRY:
808            stp->affected_states |= ST_NEW_GS_CONSTANTS;
809            break;
810         default:
811            unreachable("bad shader stage");
812         }
813
814         finalize = true;
815      }
816
817      if (key->lower_ucp) {
818         assert(!options->unify_interfaces);
819         lower_ucp(st, state.ir.nir, key->lower_ucp, params);
820         finalize = true;
821      }
822
823      if (st->emulate_gl_clamp &&
824          (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
825         nir_lower_tex_options tex_opts = {0};
826         tex_opts.saturate_s = key->gl_clamp[0];
827         tex_opts.saturate_t = key->gl_clamp[1];
828         tex_opts.saturate_r = key->gl_clamp[2];
829         NIR_PASS_V(state.ir.nir, nir_lower_tex, &tex_opts);
830      }
831
832      if (finalize || !st->allow_st_finalize_nir_twice) {
833         char *msg = st_finalize_nir(st, &stp->Base, stp->shader_program, state.ir.nir,
834                                     true, false);
835         free(msg);
836
837         /* Clip lowering and edgeflags may have introduced new varyings, so
838          * update the inputs_read/outputs_written. However, with
839          * unify_interfaces set (aka iris) the non-SSO varyings layout is
840          * decided at link time with outputs_written updated so the two line
841          * up.  A driver with this flag set may not use any of the lowering
842          * passes that would change the varyings, so skip to make sure we don't
843          * break its linkage.
844          */
845         if (!options->unify_interfaces) {
846            nir_shader_gather_info(state.ir.nir,
847                                   nir_shader_get_entrypoint(state.ir.nir));
848         }
849      }
850
851      if (key->is_draw_shader)
852         v->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
853      else
854         v->base.driver_shader = st_create_nir_shader(st, &state);
855
856      return v;
857   }
858
859   state.type = PIPE_SHADER_IR_TGSI;
860   state.tokens = tgsi_dup_tokens(stp->state.tokens);
861
862   /* Emulate features. */
863   if (key->clamp_color || key->passthrough_edgeflags) {
864      const struct tgsi_token *tokens;
865      unsigned flags =
866         (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
867         (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
868
869      tokens = tgsi_emulate(state.tokens, flags);
870
871      if (tokens) {
872         tgsi_free_tokens(state.tokens);
873         state.tokens = tokens;
874      } else {
875         fprintf(stderr, "mesa: cannot emulate deprecated features\n");
876      }
877   }
878
879   if (key->lower_depth_clamp) {
880      unsigned depth_range_const =
881            _mesa_add_state_reference(params, depth_range_state);
882
883      const struct tgsi_token *tokens;
884      tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
885                                         key->clip_negative_one_to_one);
886      if (tokens != state.tokens)
887         tgsi_free_tokens(state.tokens);
888      state.tokens = tokens;
889   }
890
891   if (ST_DEBUG & DEBUG_PRINT_IR)
892      tgsi_dump(state.tokens, 0);
893
894   switch (stp->Base.info.stage) {
895   case MESA_SHADER_VERTEX:
896      if (key->is_draw_shader)
897         v->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
898      else
899         v->base.driver_shader = pipe->create_vs_state(pipe, &state);
900      break;
901   case MESA_SHADER_TESS_CTRL:
902      v->base.driver_shader = pipe->create_tcs_state(pipe, &state);
903      break;
904   case MESA_SHADER_TESS_EVAL:
905      v->base.driver_shader = pipe->create_tes_state(pipe, &state);
906      break;
907   case MESA_SHADER_GEOMETRY:
908      v->base.driver_shader = pipe->create_gs_state(pipe, &state);
909      break;
910   case MESA_SHADER_COMPUTE: {
911      struct pipe_compute_state cs = {0};
912      cs.ir_type = state.type;
913      cs.req_local_mem = stp->Base.info.shared_size;
914
915      if (state.type == PIPE_SHADER_IR_NIR)
916         cs.prog = state.ir.nir;
917      else
918         cs.prog = state.tokens;
919
920      v->base.driver_shader = pipe->create_compute_state(pipe, &cs);
921      break;
922   }
923   default:
924      assert(!"unhandled shader type");
925      free(v);
926      return NULL;
927   }
928
929   if (state.tokens) {
930      tgsi_free_tokens(state.tokens);
931   }
932
933   return v;
934}
935
936static void
937st_add_variant(struct st_variant **list, struct st_variant *v)
938{
939   struct st_variant *first = *list;
940
941   /* Make sure that the default variant stays the first in the list, and insert
942    * any later variants in as the second entry.
943    */
944   if (first) {
945      v->next = first->next;
946      first->next = v;
947   } else {
948      *list = v;
949   }
950}
951
952/**
953 * Find/create a vertex program variant.
954 */
955struct st_common_variant *
956st_get_common_variant(struct st_context *st,
957                  struct st_program *stp,
958                  const struct st_common_variant_key *key)
959{
960   struct st_common_variant *v;
961
962   /* Search for existing variant */
963   for (v = st_common_variant(stp->variants); v;
964        v = st_common_variant(v->base.next)) {
965      if (memcmp(&v->key, key, sizeof(*key)) == 0) {
966         break;
967      }
968   }
969
970   if (!v) {
971      if (stp->variants != NULL) {
972         _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
973                          "Compiling %s shader variant (%s%s%s%s%s%s%s%s)",
974                          _mesa_shader_stage_to_string(stp->Base.info.stage),
975                          key->passthrough_edgeflags ? "edgeflags," : "",
976                          key->clamp_color ? "clamp_color," : "",
977                          key->lower_depth_clamp ? "depth_clamp," : "",
978                          key->clip_negative_one_to_one ? "clip_negative_one," : "",
979                          key->lower_point_size ? "point_size," : "",
980                          key->lower_ucp ? "ucp," : "",
981                          key->is_draw_shader ? "draw," : "",
982                          key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "");
983      }
984
985      /* create now */
986      v = st_create_common_variant(st, stp, key);
987      if (v) {
988         v->base.st = key->st;
989
990         if (stp->Base.info.stage == MESA_SHADER_VERTEX) {
991            struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
992
993            v->vert_attrib_mask =
994               stvp->vert_attrib_mask |
995               (key->passthrough_edgeflags ? VERT_BIT_EDGEFLAG : 0);
996         }
997
998         st_add_variant(&stp->variants, &v->base);
999      }
1000   }
1001
1002   return v;
1003}
1004
1005
1006/**
1007 * Translate a Mesa fragment shader into a TGSI shader.
1008 */
1009bool
1010st_translate_fragment_program(struct st_context *st,
1011                              struct st_program *stfp)
1012{
1013   /* Non-GLSL programs: */
1014   if (!stfp->glsl_to_tgsi) {
1015      _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
1016      if (st->ctx->Const.GLSLFragCoordIsSysVal)
1017         _mesa_program_fragment_position_to_sysval(&stfp->Base);
1018
1019      /* This determines which states will be updated when the assembly
1020       * shader is bound.
1021       *
1022       * fragment.position and glDrawPixels always use constants.
1023       */
1024      stfp->affected_states = ST_NEW_FS_STATE |
1025                              ST_NEW_SAMPLE_SHADING |
1026                              ST_NEW_FS_CONSTANTS;
1027
1028      if (stfp->ati_fs) {
1029         /* Just set them for ATI_fs unconditionally. */
1030         stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
1031                                  ST_NEW_FS_SAMPLERS;
1032      } else {
1033         /* ARB_fp */
1034         if (stfp->Base.SamplersUsed)
1035            stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
1036                                     ST_NEW_FS_SAMPLERS;
1037      }
1038
1039      /* Translate to NIR.  ATI_fs translates at variant time. */
1040      if (!stfp->ati_fs) {
1041         nir_shader *nir =
1042            st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
1043
1044         if (stfp->Base.nir)
1045            ralloc_free(stfp->Base.nir);
1046         if (stfp->serialized_nir) {
1047            free(stfp->serialized_nir);
1048            stfp->serialized_nir = NULL;
1049         }
1050         stfp->state.type = PIPE_SHADER_IR_NIR;
1051         stfp->Base.nir = nir;
1052      }
1053
1054      return true;
1055   }
1056
1057   ubyte outputMapping[2 * FRAG_RESULT_MAX];
1058   ubyte inputMapping[VARYING_SLOT_MAX];
1059   ubyte inputSlotToAttr[VARYING_SLOT_MAX];
1060   ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
1061   GLuint attr;
1062   GLbitfield64 inputsRead;
1063   struct ureg_program *ureg;
1064
1065   GLboolean write_all = GL_FALSE;
1066
1067   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1068   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1069   uint fs_num_inputs = 0;
1070
1071   ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1072   ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1073   uint fs_num_outputs = 0;
1074
1075   memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
1076
1077   /*
1078    * Convert Mesa program inputs to TGSI input register semantics.
1079    */
1080   inputsRead = stfp->Base.info.inputs_read;
1081   for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1082      if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
1083         const GLuint slot = fs_num_inputs++;
1084
1085         inputMapping[attr] = slot;
1086         inputSlotToAttr[slot] = attr;
1087
1088         switch (attr) {
1089         case VARYING_SLOT_POS:
1090            input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
1091            input_semantic_index[slot] = 0;
1092            interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1093            break;
1094         case VARYING_SLOT_COL0:
1095            input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
1096            input_semantic_index[slot] = 0;
1097            interpMode[slot] = stfp->glsl_to_tgsi ?
1098               TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
1099            break;
1100         case VARYING_SLOT_COL1:
1101            input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
1102            input_semantic_index[slot] = 1;
1103            interpMode[slot] = stfp->glsl_to_tgsi ?
1104               TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
1105            break;
1106         case VARYING_SLOT_FOGC:
1107            input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
1108            input_semantic_index[slot] = 0;
1109            interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1110            break;
1111         case VARYING_SLOT_FACE:
1112            input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
1113            input_semantic_index[slot] = 0;
1114            interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
1115            break;
1116         case VARYING_SLOT_PRIMITIVE_ID:
1117            input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
1118            input_semantic_index[slot] = 0;
1119            interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
1120            break;
1121         case VARYING_SLOT_LAYER:
1122            input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
1123            input_semantic_index[slot] = 0;
1124            interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
1125            break;
1126         case VARYING_SLOT_VIEWPORT:
1127            input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
1128            input_semantic_index[slot] = 0;
1129            interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
1130            break;
1131         case VARYING_SLOT_CLIP_DIST0:
1132            input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1133            input_semantic_index[slot] = 0;
1134            interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1135            break;
1136         case VARYING_SLOT_CLIP_DIST1:
1137            input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1138            input_semantic_index[slot] = 1;
1139            interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1140            break;
1141         case VARYING_SLOT_CULL_DIST0:
1142         case VARYING_SLOT_CULL_DIST1:
1143            /* these should have been lowered by GLSL */
1144            assert(0);
1145            break;
1146            /* In most cases, there is nothing special about these
1147             * inputs, so adopt a convention to use the generic
1148             * semantic name and the mesa VARYING_SLOT_ number as the
1149             * index.
1150             *
1151             * All that is required is that the vertex shader labels
1152             * its own outputs similarly, and that the vertex shader
1153             * generates at least every output required by the
1154             * fragment shader plus fixed-function hardware (such as
1155             * BFC).
1156             *
1157             * However, some drivers may need us to identify the PNTC and TEXi
1158             * varyings if, for example, their capability to replace them with
1159             * sprite coordinates is limited.
1160             */
1161         case VARYING_SLOT_PNTC:
1162            if (st->needs_texcoord_semantic) {
1163               input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
1164               input_semantic_index[slot] = 0;
1165               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1166               break;
1167            }
1168            FALLTHROUGH;
1169         case VARYING_SLOT_TEX0:
1170         case VARYING_SLOT_TEX1:
1171         case VARYING_SLOT_TEX2:
1172         case VARYING_SLOT_TEX3:
1173         case VARYING_SLOT_TEX4:
1174         case VARYING_SLOT_TEX5:
1175         case VARYING_SLOT_TEX6:
1176         case VARYING_SLOT_TEX7:
1177            if (st->needs_texcoord_semantic) {
1178               input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1179               input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1180               interpMode[slot] = stfp->glsl_to_tgsi ?
1181                  TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1182               break;
1183            }
1184            FALLTHROUGH;
1185         case VARYING_SLOT_VAR0:
1186         default:
1187            /* Semantic indices should be zero-based because drivers may choose
1188             * to assign a fixed slot determined by that index.
1189             * This is useful because ARB_separate_shader_objects uses location
1190             * qualifiers for linkage, and if the semantic index corresponds to
1191             * these locations, linkage passes in the driver become unecessary.
1192             *
1193             * If needs_texcoord_semantic is true, no semantic indices will be
1194             * consumed for the TEXi varyings, and we can base the locations of
1195             * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
1196             */
1197            assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1198                   (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1199            input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1200            input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1201            if (attr == VARYING_SLOT_PNTC)
1202               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1203            else {
1204               interpMode[slot] = stfp->glsl_to_tgsi ?
1205                  TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1206            }
1207            break;
1208         }
1209      }
1210      else {
1211         inputMapping[attr] = -1;
1212      }
1213   }
1214
1215   /*
1216    * Semantics and mapping for outputs
1217    */
1218   GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1219
1220   /* if z is written, emit that first */
1221   if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1222      fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1223      fs_output_semantic_index[fs_num_outputs] = 0;
1224      outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1225      fs_num_outputs++;
1226      outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1227   }
1228
1229   if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1230      fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1231      fs_output_semantic_index[fs_num_outputs] = 0;
1232      outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1233      fs_num_outputs++;
1234      outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1235   }
1236
1237   if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1238      fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1239      fs_output_semantic_index[fs_num_outputs] = 0;
1240      outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1241      fs_num_outputs++;
1242      outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1243   }
1244
1245   /* handle remaining outputs (color) */
1246   for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1247      const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1248         stfp->Base.SecondaryOutputsWritten;
1249      const unsigned loc = attr % FRAG_RESULT_MAX;
1250
1251      if (written & BITFIELD64_BIT(loc)) {
1252         switch (loc) {
1253         case FRAG_RESULT_DEPTH:
1254         case FRAG_RESULT_STENCIL:
1255         case FRAG_RESULT_SAMPLE_MASK:
1256            /* handled above */
1257            assert(0);
1258            break;
1259         case FRAG_RESULT_COLOR:
1260            write_all = GL_TRUE;
1261            FALLTHROUGH;
1262         default: {
1263            int index;
1264            assert(loc == FRAG_RESULT_COLOR ||
1265                   (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1266
1267            index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1268
1269            if (attr >= FRAG_RESULT_MAX) {
1270               /* Secondary color for dual source blending. */
1271               assert(index == 0);
1272               index++;
1273            }
1274
1275            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1276            fs_output_semantic_index[fs_num_outputs] = index;
1277            outputMapping[attr] = fs_num_outputs;
1278            break;
1279         }
1280         }
1281
1282         fs_num_outputs++;
1283      }
1284   }
1285
1286   ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->screen);
1287   if (ureg == NULL)
1288      return false;
1289
1290   ureg_setup_shader_info(ureg, &stfp->Base.info);
1291
1292   if (ST_DEBUG & DEBUG_MESA) {
1293      _mesa_print_program(&stfp->Base);
1294      _mesa_print_program_parameters(st->ctx, &stfp->Base);
1295      debug_printf("\n");
1296   }
1297   if (write_all == GL_TRUE)
1298      ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1299
1300   if (stfp->glsl_to_tgsi) {
1301      st_translate_program(st->ctx,
1302                           PIPE_SHADER_FRAGMENT,
1303                           ureg,
1304                           stfp->glsl_to_tgsi,
1305                           &stfp->Base,
1306                           /* inputs */
1307                           fs_num_inputs,
1308                           inputMapping,
1309                           inputSlotToAttr,
1310                           input_semantic_name,
1311                           input_semantic_index,
1312                           interpMode,
1313                           /* outputs */
1314                           fs_num_outputs,
1315                           outputMapping,
1316                           fs_output_semantic_name,
1317                           fs_output_semantic_index);
1318
1319      free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1320   }
1321
1322   stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1323   ureg_destroy(ureg);
1324
1325   if (stfp->glsl_to_tgsi) {
1326      stfp->glsl_to_tgsi = NULL;
1327      st_store_ir_in_disk_cache(st, &stfp->Base, false);
1328   }
1329
1330   return stfp->state.tokens != NULL;
1331}
1332
1333static struct st_fp_variant *
1334st_create_fp_variant(struct st_context *st,
1335                     struct st_program *stfp,
1336                     const struct st_fp_variant_key *key)
1337{
1338   struct pipe_context *pipe = st->pipe;
1339   struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1340   struct pipe_shader_state state = {0};
1341   struct gl_program_parameter_list *params = stfp->Base.Parameters;
1342   static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1343      { STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1344   static const gl_state_index16 scale_state[STATE_LENGTH] =
1345      { STATE_PT_SCALE };
1346   static const gl_state_index16 bias_state[STATE_LENGTH] =
1347      { STATE_PT_BIAS };
1348   static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1349      { STATE_ALPHA_REF };
1350
1351   if (!variant)
1352      return NULL;
1353
1354   /* Translate ATI_fs to NIR at variant time because that's when we have the
1355    * texture types.
1356    */
1357   if (stfp->ati_fs) {
1358      const struct nir_shader_compiler_options *options =
1359         st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT);
1360
1361      nir_shader *s = st_translate_atifs_program(stfp->ati_fs, key, &stfp->Base, options);
1362
1363      st_prog_to_nir_postprocess(st, s, &stfp->Base);
1364
1365      state.type = PIPE_SHADER_IR_NIR;
1366      state.ir.nir = s;
1367   } else if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1368      state.type = PIPE_SHADER_IR_NIR;
1369      state.ir.nir = get_nir_shader(st, stfp);
1370   }
1371
1372   if (state.type == PIPE_SHADER_IR_NIR) {
1373      bool finalize = false;
1374
1375      if (key->clamp_color) {
1376         NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1377         finalize = true;
1378      }
1379
1380      if (key->lower_flatshade) {
1381         NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1382         finalize = true;
1383      }
1384
1385      if (key->lower_alpha_func != COMPARE_FUNC_ALWAYS) {
1386         _mesa_add_state_reference(params, alpha_ref_state);
1387         NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1388                    false, alpha_ref_state);
1389         finalize = true;
1390      }
1391
1392      if (key->lower_two_sided_color) {
1393         bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal;
1394         NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color, face_sysval);
1395         finalize = true;
1396      }
1397
1398      if (key->persample_shading) {
1399          nir_shader *shader = state.ir.nir;
1400          nir_foreach_shader_in_variable(var, shader)
1401             var->data.sample = true;
1402          finalize = true;
1403      }
1404
1405      if (key->lower_texcoord_replace) {
1406         bool point_coord_is_sysval = st->ctx->Const.GLSLPointCoordIsSysVal;
1407         NIR_PASS_V(state.ir.nir, nir_lower_texcoord_replace,
1408                    key->lower_texcoord_replace, point_coord_is_sysval, false);
1409         finalize = true;
1410      }
1411
1412      if (st->emulate_gl_clamp &&
1413          (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
1414         nir_lower_tex_options tex_opts = {0};
1415         tex_opts.saturate_s = key->gl_clamp[0];
1416         tex_opts.saturate_t = key->gl_clamp[1];
1417         tex_opts.saturate_r = key->gl_clamp[2];
1418         NIR_PASS_V(state.ir.nir, nir_lower_tex, &tex_opts);
1419         finalize = true;
1420      }
1421
1422      assert(!(key->bitmap && key->drawpixels));
1423
1424      /* glBitmap */
1425      if (key->bitmap) {
1426         nir_lower_bitmap_options options = {0};
1427
1428         variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1429         options.sampler = variant->bitmap_sampler;
1430         options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1431
1432         NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1433         finalize = true;
1434      }
1435
1436      /* glDrawPixels (color only) */
1437      if (key->drawpixels) {
1438         nir_lower_drawpixels_options options = {{0}};
1439         unsigned samplers_used = stfp->Base.SamplersUsed;
1440
1441         /* Find the first unused slot. */
1442         variant->drawpix_sampler = ffs(~samplers_used) - 1;
1443         options.drawpix_sampler = variant->drawpix_sampler;
1444         samplers_used |= (1 << variant->drawpix_sampler);
1445
1446         options.pixel_maps = key->pixelMaps;
1447         if (key->pixelMaps) {
1448            variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1449            options.pixelmap_sampler = variant->pixelmap_sampler;
1450         }
1451
1452         options.scale_and_bias = key->scaleAndBias;
1453         if (key->scaleAndBias) {
1454            _mesa_add_state_reference(params, scale_state);
1455            memcpy(options.scale_state_tokens, scale_state,
1456                   sizeof(options.scale_state_tokens));
1457            _mesa_add_state_reference(params, bias_state);
1458            memcpy(options.bias_state_tokens, bias_state,
1459                   sizeof(options.bias_state_tokens));
1460         }
1461
1462         _mesa_add_state_reference(params, texcoord_state);
1463         memcpy(options.texcoord_state_tokens, texcoord_state,
1464                sizeof(options.texcoord_state_tokens));
1465
1466         NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1467         finalize = true;
1468      }
1469
1470      bool need_lower_tex_src_plane = false;
1471
1472      if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1473                   key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1474                   key->external.lower_ayuv || key->external.lower_xyuv ||
1475                   key->external.lower_yuv || key->external.lower_yu_yv ||
1476                   key->external.lower_y41x)) {
1477
1478         st_nir_lower_samplers(st->screen, state.ir.nir,
1479                               stfp->shader_program, &stfp->Base);
1480
1481         nir_lower_tex_options options = {0};
1482         options.lower_y_uv_external = key->external.lower_nv12;
1483         options.lower_y_u_v_external = key->external.lower_iyuv;
1484         options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1485         options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1486         options.lower_ayuv_external = key->external.lower_ayuv;
1487         options.lower_xyuv_external = key->external.lower_xyuv;
1488         options.lower_yuv_external = key->external.lower_yuv;
1489         options.lower_yu_yv_external = key->external.lower_yu_yv;
1490         options.lower_y41x_external = key->external.lower_y41x;
1491         NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1492         finalize = true;
1493         need_lower_tex_src_plane = true;
1494      }
1495
1496      if (finalize || !st->allow_st_finalize_nir_twice) {
1497         char *msg = st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1498                                     false, false);
1499         free(msg);
1500      }
1501
1502      /* This pass needs to happen *after* nir_lower_sampler */
1503      if (unlikely(need_lower_tex_src_plane)) {
1504         NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1505                    ~stfp->Base.SamplersUsed,
1506                    key->external.lower_nv12 | key->external.lower_xy_uxvx |
1507                       key->external.lower_yx_xuxv,
1508                    key->external.lower_iyuv);
1509         finalize = true;
1510      }
1511
1512      if (finalize || !st->allow_st_finalize_nir_twice) {
1513         /* Some of the lowering above may have introduced new varyings */
1514         nir_shader_gather_info(state.ir.nir,
1515                                nir_shader_get_entrypoint(state.ir.nir));
1516
1517         struct pipe_screen *screen = st->screen;
1518         if (screen->finalize_nir) {
1519            char *msg = screen->finalize_nir(screen, state.ir.nir);
1520            free(msg);
1521         }
1522      }
1523
1524      variant->base.driver_shader = st_create_nir_shader(st, &state);
1525      variant->key = *key;
1526
1527      return variant;
1528   }
1529
1530   state.tokens = stfp->state.tokens;
1531
1532   assert(!(key->bitmap && key->drawpixels));
1533
1534   /* Emulate features. */
1535   if (key->clamp_color || key->persample_shading) {
1536      const struct tgsi_token *tokens;
1537      unsigned flags =
1538         (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1539         (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1540
1541      tokens = tgsi_emulate(state.tokens, flags);
1542
1543      if (tokens) {
1544         if (state.tokens != stfp->state.tokens)
1545            tgsi_free_tokens(state.tokens);
1546         state.tokens = tokens;
1547      } else
1548         fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1549   }
1550
1551   /* glBitmap */
1552   if (key->bitmap) {
1553      const struct tgsi_token *tokens;
1554
1555      variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1556
1557      tokens = st_get_bitmap_shader(state.tokens,
1558                                    st->internal_target,
1559                                    variant->bitmap_sampler,
1560                                    st->needs_texcoord_semantic,
1561                                    st->bitmap.tex_format ==
1562                                    PIPE_FORMAT_R8_UNORM);
1563
1564      if (tokens) {
1565         if (state.tokens != stfp->state.tokens)
1566            tgsi_free_tokens(state.tokens);
1567         state.tokens = tokens;
1568      } else
1569         fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1570   }
1571
1572   /* glDrawPixels (color only) */
1573   if (key->drawpixels) {
1574      const struct tgsi_token *tokens;
1575      unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1576
1577      /* Find the first unused slot. */
1578      variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1579
1580      if (key->pixelMaps) {
1581         unsigned samplers_used = stfp->Base.SamplersUsed |
1582                                  (1 << variant->drawpix_sampler);
1583
1584         variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1585      }
1586
1587      if (key->scaleAndBias) {
1588         scale_const = _mesa_add_state_reference(params, scale_state);
1589         bias_const = _mesa_add_state_reference(params, bias_state);
1590      }
1591
1592      texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1593
1594      tokens = st_get_drawpix_shader(state.tokens,
1595                                     st->needs_texcoord_semantic,
1596                                     key->scaleAndBias, scale_const,
1597                                     bias_const, key->pixelMaps,
1598                                     variant->drawpix_sampler,
1599                                     variant->pixelmap_sampler,
1600                                     texcoord_const, st->internal_target);
1601
1602      if (tokens) {
1603         if (state.tokens != stfp->state.tokens)
1604            tgsi_free_tokens(state.tokens);
1605         state.tokens = tokens;
1606      } else
1607         fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1608   }
1609
1610   if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1611                key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1612      const struct tgsi_token *tokens;
1613
1614      /* samplers inserted would conflict, but this should be unpossible: */
1615      assert(!(key->bitmap || key->drawpixels));
1616
1617      tokens = st_tgsi_lower_yuv(state.tokens,
1618                                 ~stfp->Base.SamplersUsed,
1619                                 key->external.lower_nv12 ||
1620                                    key->external.lower_xy_uxvx ||
1621                                    key->external.lower_yx_xuxv,
1622                                 key->external.lower_iyuv);
1623      if (tokens) {
1624         if (state.tokens != stfp->state.tokens)
1625            tgsi_free_tokens(state.tokens);
1626         state.tokens = tokens;
1627      } else {
1628         fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1629      }
1630   }
1631
1632   if (key->lower_depth_clamp) {
1633      unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1634
1635      const struct tgsi_token *tokens;
1636      tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1637      if (state.tokens != stfp->state.tokens)
1638         tgsi_free_tokens(state.tokens);
1639      state.tokens = tokens;
1640   }
1641
1642   if (ST_DEBUG & DEBUG_PRINT_IR)
1643      tgsi_dump(state.tokens, 0);
1644
1645   /* fill in variant */
1646   variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1647   variant->key = *key;
1648
1649   if (state.tokens != stfp->state.tokens)
1650      tgsi_free_tokens(state.tokens);
1651   return variant;
1652}
1653
1654/**
1655 * Translate fragment program if needed.
1656 */
1657struct st_fp_variant *
1658st_get_fp_variant(struct st_context *st,
1659                  struct st_program *stfp,
1660                  const struct st_fp_variant_key *key)
1661{
1662   struct st_fp_variant *fpv;
1663
1664   /* Search for existing variant */
1665   for (fpv = st_fp_variant(stfp->variants); fpv;
1666        fpv = st_fp_variant(fpv->base.next)) {
1667      if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1668         break;
1669      }
1670   }
1671
1672   if (!fpv) {
1673      /* create new */
1674
1675      if (stfp->variants != NULL) {
1676         _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
1677                          "Compiling fragment shader variant (%s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
1678                          key->bitmap ? "bitmap," : "",
1679                          key->drawpixels ? "drawpixels," : "",
1680                          key->scaleAndBias ? "scale_bias," : "",
1681                          key->pixelMaps ? "pixel_maps," : "",
1682                          key->clamp_color ? "clamp_color," : "",
1683                          key->persample_shading ? "persample_shading," : "",
1684                          key->fog ? "fog," : "",
1685                          key->lower_depth_clamp ? "depth_clamp," : "",
1686                          key->lower_two_sided_color ? "twoside," : "",
1687                          key->lower_flatshade ? "flatshade," : "",
1688                          key->lower_texcoord_replace ? "texcoord_replace," : "",
1689                          key->lower_alpha_func ? "alpha_compare," : "",
1690                          /* skipped ATI_fs targets */
1691                          stfp->Base.ExternalSamplersUsed ? "external?," : "",
1692                          key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "");
1693      }
1694
1695      fpv = st_create_fp_variant(st, stfp, key);
1696      if (fpv) {
1697         fpv->base.st = key->st;
1698
1699         st_add_variant(&stfp->variants, &fpv->base);
1700      }
1701   }
1702
1703   return fpv;
1704}
1705
1706/**
1707 * Translate a program. This is common code for geometry and tessellation
1708 * shaders.
1709 */
1710bool
1711st_translate_common_program(struct st_context *st,
1712                            struct st_program *stp)
1713{
1714   struct gl_program *prog = &stp->Base;
1715   enum pipe_shader_type stage =
1716      pipe_shader_type_from_mesa(stp->Base.info.stage);
1717   struct ureg_program *ureg = ureg_create_with_screen(stage, st->screen);
1718
1719   if (ureg == NULL)
1720      return false;
1721
1722   ureg_setup_shader_info(ureg, &stp->Base.info);
1723
1724   ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1725   ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1726   ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1727   GLuint attr;
1728
1729   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1730   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1731   uint num_inputs = 0;
1732
1733   ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1734   ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1735   uint num_outputs = 0;
1736
1737   GLint i;
1738
1739   memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1740   memset(inputMapping, 0, sizeof(inputMapping));
1741   memset(outputMapping, 0, sizeof(outputMapping));
1742   memset(&stp->state, 0, sizeof(stp->state));
1743
1744   /*
1745    * Convert Mesa program inputs to TGSI input register semantics.
1746    */
1747   for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1748      if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1749         continue;
1750
1751      unsigned slot = num_inputs++;
1752
1753      inputMapping[attr] = slot;
1754      inputSlotToAttr[slot] = attr;
1755
1756      unsigned semantic_name, semantic_index;
1757      tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1758                                   &semantic_name, &semantic_index);
1759      input_semantic_name[slot] = semantic_name;
1760      input_semantic_index[slot] = semantic_index;
1761   }
1762
1763   /* Also add patch inputs. */
1764   for (attr = 0; attr < 32; attr++) {
1765      if (prog->info.patch_inputs_read & (1u << attr)) {
1766         GLuint slot = num_inputs++;
1767         GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1768
1769         inputMapping[patch_attr] = slot;
1770         inputSlotToAttr[slot] = patch_attr;
1771         input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1772         input_semantic_index[slot] = attr;
1773      }
1774   }
1775
1776   /* initialize output semantics to defaults */
1777   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1778      output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1779      output_semantic_index[i] = 0;
1780   }
1781
1782   /*
1783    * Determine number of outputs, the (default) output register
1784    * mapping and the semantic information for each output.
1785    */
1786   for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1787      if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1788         GLuint slot = num_outputs++;
1789
1790         outputMapping[attr] = slot;
1791
1792         unsigned semantic_name, semantic_index;
1793         tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1794                                      &semantic_name, &semantic_index);
1795         output_semantic_name[slot] = semantic_name;
1796         output_semantic_index[slot] = semantic_index;
1797      }
1798   }
1799
1800   /* Also add patch outputs. */
1801   for (attr = 0; attr < 32; attr++) {
1802      if (prog->info.patch_outputs_written & (1u << attr)) {
1803         GLuint slot = num_outputs++;
1804         GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1805
1806         outputMapping[patch_attr] = slot;
1807         output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1808         output_semantic_index[slot] = attr;
1809      }
1810   }
1811
1812   st_translate_program(st->ctx,
1813                        stage,
1814                        ureg,
1815                        stp->glsl_to_tgsi,
1816                        prog,
1817                        /* inputs */
1818                        num_inputs,
1819                        inputMapping,
1820                        inputSlotToAttr,
1821                        input_semantic_name,
1822                        input_semantic_index,
1823                        NULL,
1824                        /* outputs */
1825                        num_outputs,
1826                        outputMapping,
1827                        output_semantic_name,
1828                        output_semantic_index);
1829
1830   stp->state.tokens = ureg_get_tokens(ureg, NULL);
1831
1832   ureg_destroy(ureg);
1833
1834   st_translate_stream_output_info(prog);
1835
1836   st_store_ir_in_disk_cache(st, prog, false);
1837
1838   if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1839      _mesa_print_program(prog);
1840
1841   free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1842   stp->glsl_to_tgsi = NULL;
1843   return true;
1844}
1845
1846
1847/**
1848 * Vert/Geom/Frag programs have per-context variants.  Free all the
1849 * variants attached to the given program which match the given context.
1850 */
1851static void
1852destroy_program_variants(struct st_context *st, struct gl_program *target)
1853{
1854   if (!target || target == &_mesa_DummyProgram)
1855      return;
1856
1857   struct st_program *p = st_program(target);
1858   struct st_variant *v, **prevPtr = &p->variants;
1859   bool unbound = false;
1860
1861   for (v = p->variants; v; ) {
1862      struct st_variant *next = v->next;
1863      if (v->st == st) {
1864         if (!unbound) {
1865            st_unbind_program(st, p);
1866            unbound = true;
1867         }
1868
1869         /* unlink from list */
1870         *prevPtr = next;
1871         /* destroy this variant */
1872         delete_variant(st, v, target->Target);
1873      }
1874      else {
1875         prevPtr = &v->next;
1876      }
1877      v = next;
1878   }
1879}
1880
1881
1882/**
1883 * Callback for _mesa_HashWalk.  Free all the shader's program variants
1884 * which match the given context.
1885 */
1886static void
1887destroy_shader_program_variants_cb(void *data, void *userData)
1888{
1889   struct st_context *st = (struct st_context *) userData;
1890   struct gl_shader *shader = (struct gl_shader *) data;
1891
1892   switch (shader->Type) {
1893   case GL_SHADER_PROGRAM_MESA:
1894      {
1895         struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1896         GLuint i;
1897
1898         for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1899            if (shProg->_LinkedShaders[i])
1900               destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1901         }
1902      }
1903      break;
1904   case GL_VERTEX_SHADER:
1905   case GL_FRAGMENT_SHADER:
1906   case GL_GEOMETRY_SHADER:
1907   case GL_TESS_CONTROL_SHADER:
1908   case GL_TESS_EVALUATION_SHADER:
1909   case GL_COMPUTE_SHADER:
1910      break;
1911   default:
1912      assert(0);
1913   }
1914}
1915
1916
1917/**
1918 * Callback for _mesa_HashWalk.  Free all the program variants which match
1919 * the given context.
1920 */
1921static void
1922destroy_program_variants_cb(void *data, void *userData)
1923{
1924   struct st_context *st = (struct st_context *) userData;
1925   struct gl_program *program = (struct gl_program *) data;
1926   destroy_program_variants(st, program);
1927}
1928
1929
1930/**
1931 * Walk over all shaders and programs to delete any variants which
1932 * belong to the given context.
1933 * This is called during context tear-down.
1934 */
1935void
1936st_destroy_program_variants(struct st_context *st)
1937{
1938   /* If shaders can be shared with other contexts, the last context will
1939    * call DeleteProgram on all shaders, releasing everything.
1940    */
1941   if (st->has_shareable_shaders)
1942      return;
1943
1944   /* ARB vert/frag program */
1945   _mesa_HashWalk(st->ctx->Shared->Programs,
1946                  destroy_program_variants_cb, st);
1947
1948   /* GLSL vert/frag/geom shaders */
1949   _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1950                  destroy_shader_program_variants_cb, st);
1951}
1952
1953
1954/**
1955 * Compile one shader variant.
1956 */
1957static void
1958st_precompile_shader_variant(struct st_context *st,
1959                             struct gl_program *prog)
1960{
1961   switch (prog->Target) {
1962   case GL_VERTEX_PROGRAM_ARB:
1963   case GL_TESS_CONTROL_PROGRAM_NV:
1964   case GL_TESS_EVALUATION_PROGRAM_NV:
1965   case GL_GEOMETRY_PROGRAM_NV:
1966   case GL_COMPUTE_PROGRAM_NV: {
1967      struct st_program *p = (struct st_program *)prog;
1968      struct st_common_variant_key key;
1969
1970      memset(&key, 0, sizeof(key));
1971
1972      if (st->ctx->API == API_OPENGL_COMPAT &&
1973          st->clamp_vert_color_in_shader &&
1974          (prog->info.outputs_written & (VARYING_SLOT_COL0 |
1975                                         VARYING_SLOT_COL1 |
1976                                         VARYING_SLOT_BFC0 |
1977                                         VARYING_SLOT_BFC1))) {
1978         key.clamp_color = true;
1979      }
1980
1981      key.st = st->has_shareable_shaders ? NULL : st;
1982      st_get_common_variant(st, p, &key);
1983      break;
1984   }
1985
1986   case GL_FRAGMENT_PROGRAM_ARB: {
1987      struct st_program *p = (struct st_program *)prog;
1988      struct st_fp_variant_key key;
1989
1990      memset(&key, 0, sizeof(key));
1991
1992      key.st = st->has_shareable_shaders ? NULL : st;
1993      key.lower_alpha_func = COMPARE_FUNC_ALWAYS;
1994      if (p->ati_fs) {
1995         for (int i = 0; i < ARRAY_SIZE(key.texture_index); i++)
1996            key.texture_index[i] = TEXTURE_2D_INDEX;
1997      }
1998      st_get_fp_variant(st, p, &key);
1999      break;
2000   }
2001
2002   default:
2003      assert(0);
2004   }
2005}
2006
2007void
2008st_serialize_nir(struct st_program *stp)
2009{
2010   if (!stp->serialized_nir) {
2011      struct blob blob;
2012      size_t size;
2013
2014      blob_init(&blob);
2015      nir_serialize(&blob, stp->Base.nir, false);
2016      blob_finish_get_buffer(&blob, &stp->serialized_nir, &size);
2017      stp->serialized_nir_size = size;
2018   }
2019}
2020
2021void
2022st_finalize_program(struct st_context *st, struct gl_program *prog)
2023{
2024   if (st->current_program[prog->info.stage] == prog) {
2025      if (prog->info.stage == MESA_SHADER_VERTEX)
2026         st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
2027      else
2028         st->dirty |= ((struct st_program *)prog)->affected_states;
2029   }
2030
2031   if (prog->nir) {
2032      nir_sweep(prog->nir);
2033
2034      /* This is only needed for ARB_vp/fp programs and when the disk cache
2035       * is disabled. If the disk cache is enabled, GLSL programs are
2036       * serialized in write_nir_to_cache.
2037       */
2038      st_serialize_nir(st_program(prog));
2039   }
2040
2041   /* Always create the default variant of the program. */
2042   st_precompile_shader_variant(st, prog);
2043}
2044