iris_program.c revision 7ec681f3
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23/**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32#include <stdio.h>
33#include <errno.h>
34#include "pipe/p_defines.h"
35#include "pipe/p_state.h"
36#include "pipe/p_context.h"
37#include "pipe/p_screen.h"
38#include "util/u_atomic.h"
39#include "util/u_upload_mgr.h"
40#include "util/debug.h"
41#include "util/u_async_debug.h"
42#include "compiler/nir/nir.h"
43#include "compiler/nir/nir_builder.h"
44#include "compiler/nir/nir_serialize.h"
45#include "intel/compiler/brw_compiler.h"
46#include "intel/compiler/brw_nir.h"
47#include "iris_context.h"
48#include "nir/tgsi_to_nir.h"
49
50#define KEY_ID(prefix) .prefix.program_string_id = ish->program_id
51#define BRW_KEY_INIT(gen, prog_id)                       \
52   .base.program_string_id = prog_id,                    \
53   .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
54   .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688,   \
55   .base.tex.compressed_multisample_layout_mask = ~0,    \
56   .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
57
58struct iris_threaded_compile_job {
59   struct iris_screen *screen;
60   struct u_upload_mgr *uploader;
61   struct pipe_debug_callback *dbg;
62   struct iris_uncompiled_shader *ish;
63   struct iris_compiled_shader *shader;
64};
65
66static unsigned
67get_new_program_id(struct iris_screen *screen)
68{
69   return p_atomic_inc_return(&screen->program_id);
70}
71
72void
73iris_finalize_program(struct iris_compiled_shader *shader,
74                      struct brw_stage_prog_data *prog_data,
75                      uint32_t *streamout,
76                      enum brw_param_builtin *system_values,
77                      unsigned num_system_values,
78                      unsigned kernel_input_size,
79                      unsigned num_cbufs,
80                      const struct iris_binding_table *bt)
81{
82   shader->prog_data = prog_data;
83   shader->streamout = streamout;
84   shader->system_values = system_values;
85   shader->num_system_values = num_system_values;
86   shader->kernel_input_size = kernel_input_size;
87   shader->num_cbufs = num_cbufs;
88   shader->bt = *bt;
89
90   ralloc_steal(shader, shader->prog_data);
91   ralloc_steal(shader->prog_data, (void *)prog_data->relocs);
92   ralloc_steal(shader->prog_data, prog_data->param);
93   ralloc_steal(shader->prog_data, prog_data->pull_param);
94   ralloc_steal(shader, shader->streamout);
95   ralloc_steal(shader, shader->system_values);
96}
97
98static struct brw_vs_prog_key
99iris_to_brw_vs_key(const struct intel_device_info *devinfo,
100                   const struct iris_vs_prog_key *key)
101{
102   return (struct brw_vs_prog_key) {
103      BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
104
105      /* Don't tell the backend about our clip plane constants, we've
106       * already lowered them in NIR and don't want it doing it again.
107       */
108      .nr_userclip_plane_consts = 0,
109   };
110}
111
112static struct brw_tcs_prog_key
113iris_to_brw_tcs_key(const struct intel_device_info *devinfo,
114                    const struct iris_tcs_prog_key *key)
115{
116   return (struct brw_tcs_prog_key) {
117      BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
118      .tes_primitive_mode = key->tes_primitive_mode,
119      .input_vertices = key->input_vertices,
120      .patch_outputs_written = key->patch_outputs_written,
121      .outputs_written = key->outputs_written,
122      .quads_workaround = key->quads_workaround,
123   };
124}
125
126static struct brw_tes_prog_key
127iris_to_brw_tes_key(const struct intel_device_info *devinfo,
128                    const struct iris_tes_prog_key *key)
129{
130   return (struct brw_tes_prog_key) {
131      BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
132      .patch_inputs_read = key->patch_inputs_read,
133      .inputs_read = key->inputs_read,
134   };
135}
136
137static struct brw_gs_prog_key
138iris_to_brw_gs_key(const struct intel_device_info *devinfo,
139                   const struct iris_gs_prog_key *key)
140{
141   return (struct brw_gs_prog_key) {
142      BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
143   };
144}
145
146static struct brw_wm_prog_key
147iris_to_brw_fs_key(const struct intel_device_info *devinfo,
148                   const struct iris_fs_prog_key *key)
149{
150   return (struct brw_wm_prog_key) {
151      BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),
152      .nr_color_regions = key->nr_color_regions,
153      .flat_shade = key->flat_shade,
154      .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
155      .alpha_to_coverage = key->alpha_to_coverage,
156      .clamp_fragment_color = key->clamp_fragment_color,
157      .persample_interp = key->persample_interp,
158      .multisample_fbo = key->multisample_fbo,
159      .force_dual_color_blend = key->force_dual_color_blend,
160      .coherent_fb_fetch = key->coherent_fb_fetch,
161      .color_outputs_valid = key->color_outputs_valid,
162      .input_slots_valid = key->input_slots_valid,
163      .ignore_sample_mask_out = !key->multisample_fbo,
164   };
165}
166
167static struct brw_cs_prog_key
168iris_to_brw_cs_key(const struct intel_device_info *devinfo,
169                   const struct iris_cs_prog_key *key)
170{
171   return (struct brw_cs_prog_key) {
172      BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),
173   };
174}
175
176static void *
177upload_state(struct u_upload_mgr *uploader,
178             struct iris_state_ref *ref,
179             unsigned size,
180             unsigned alignment)
181{
182   void *p = NULL;
183   u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
184   return p;
185}
186
187void
188iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
189                                struct pipe_shader_buffer *buf,
190                                struct iris_state_ref *surf_state,
191                                isl_surf_usage_flags_t usage)
192{
193   struct pipe_context *ctx = &ice->ctx;
194   struct iris_screen *screen = (struct iris_screen *) ctx->screen;
195   bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
196
197   void *map =
198      upload_state(ice->state.surface_uploader, surf_state,
199                   screen->isl_dev.ss.size, 64);
200   if (!unlikely(map)) {
201      surf_state->res = NULL;
202      return;
203   }
204
205   struct iris_resource *res = (void *) buf->buffer;
206   struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
207   surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
208
209   const bool dataport = ssbo || !screen->compiler->indirect_ubos_use_sampler;
210
211   isl_buffer_fill_state(&screen->isl_dev, map,
212                         .address = res->bo->address + res->offset +
213                                    buf->buffer_offset,
214                         .size_B = buf->buffer_size - res->offset,
215                         .format = dataport ? ISL_FORMAT_RAW
216                                            : ISL_FORMAT_R32G32B32A32_FLOAT,
217                         .swizzle = ISL_SWIZZLE_IDENTITY,
218                         .stride_B = 1,
219                         .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
220}
221
222static nir_ssa_def *
223get_aoa_deref_offset(nir_builder *b,
224                     nir_deref_instr *deref,
225                     unsigned elem_size)
226{
227   unsigned array_size = elem_size;
228   nir_ssa_def *offset = nir_imm_int(b, 0);
229
230   while (deref->deref_type != nir_deref_type_var) {
231      assert(deref->deref_type == nir_deref_type_array);
232
233      /* This level's element size is the previous level's array size */
234      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
235      assert(deref->arr.index.ssa);
236      offset = nir_iadd(b, offset,
237                           nir_imul(b, index, nir_imm_int(b, array_size)));
238
239      deref = nir_deref_instr_parent(deref);
240      assert(glsl_type_is_array(deref->type));
241      array_size *= glsl_get_length(deref->type);
242   }
243
244   /* Accessing an invalid surface index with the dataport can result in a
245    * hang.  According to the spec "if the index used to select an individual
246    * element is negative or greater than or equal to the size of the array,
247    * the results of the operation are undefined but may not lead to
248    * termination" -- which is one of the possible outcomes of the hang.
249    * Clamp the index to prevent access outside of the array bounds.
250    */
251   return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
252}
253
254static void
255iris_lower_storage_image_derefs(nir_shader *nir)
256{
257   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
258
259   nir_builder b;
260   nir_builder_init(&b, impl);
261
262   nir_foreach_block(block, impl) {
263      nir_foreach_instr_safe(instr, block) {
264         if (instr->type != nir_instr_type_intrinsic)
265            continue;
266
267         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
268         switch (intrin->intrinsic) {
269         case nir_intrinsic_image_deref_load:
270         case nir_intrinsic_image_deref_store:
271         case nir_intrinsic_image_deref_atomic_add:
272         case nir_intrinsic_image_deref_atomic_imin:
273         case nir_intrinsic_image_deref_atomic_umin:
274         case nir_intrinsic_image_deref_atomic_imax:
275         case nir_intrinsic_image_deref_atomic_umax:
276         case nir_intrinsic_image_deref_atomic_and:
277         case nir_intrinsic_image_deref_atomic_or:
278         case nir_intrinsic_image_deref_atomic_xor:
279         case nir_intrinsic_image_deref_atomic_exchange:
280         case nir_intrinsic_image_deref_atomic_comp_swap:
281         case nir_intrinsic_image_deref_size:
282         case nir_intrinsic_image_deref_samples:
283         case nir_intrinsic_image_deref_load_raw_intel:
284         case nir_intrinsic_image_deref_store_raw_intel: {
285            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
286            nir_variable *var = nir_deref_instr_get_variable(deref);
287
288            b.cursor = nir_before_instr(&intrin->instr);
289            nir_ssa_def *index =
290               nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
291                            get_aoa_deref_offset(&b, deref, 1));
292            nir_rewrite_image_intrinsic(intrin, index, false);
293            break;
294         }
295
296         default:
297            break;
298         }
299      }
300   }
301}
302
303static bool
304iris_uses_image_atomic(const nir_shader *shader)
305{
306   nir_foreach_function(function, shader) {
307      if (function->impl == NULL)
308         continue;
309
310      nir_foreach_block(block, function->impl) {
311         nir_foreach_instr(instr, block) {
312            if (instr->type != nir_instr_type_intrinsic)
313               continue;
314
315            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
316            switch (intrin->intrinsic) {
317            case nir_intrinsic_image_deref_atomic_add:
318            case nir_intrinsic_image_deref_atomic_imin:
319            case nir_intrinsic_image_deref_atomic_umin:
320            case nir_intrinsic_image_deref_atomic_imax:
321            case nir_intrinsic_image_deref_atomic_umax:
322            case nir_intrinsic_image_deref_atomic_and:
323            case nir_intrinsic_image_deref_atomic_or:
324            case nir_intrinsic_image_deref_atomic_xor:
325            case nir_intrinsic_image_deref_atomic_exchange:
326            case nir_intrinsic_image_deref_atomic_comp_swap:
327               unreachable("Should have been lowered in "
328                           "iris_lower_storage_image_derefs");
329
330            case nir_intrinsic_image_atomic_add:
331            case nir_intrinsic_image_atomic_imin:
332            case nir_intrinsic_image_atomic_umin:
333            case nir_intrinsic_image_atomic_imax:
334            case nir_intrinsic_image_atomic_umax:
335            case nir_intrinsic_image_atomic_and:
336            case nir_intrinsic_image_atomic_or:
337            case nir_intrinsic_image_atomic_xor:
338            case nir_intrinsic_image_atomic_exchange:
339            case nir_intrinsic_image_atomic_comp_swap:
340               return true;
341
342            default:
343               break;
344            }
345         }
346      }
347   }
348
349   return false;
350}
351
352/**
353 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
354 */
355static bool
356iris_fix_edge_flags(nir_shader *nir)
357{
358   if (nir->info.stage != MESA_SHADER_VERTEX) {
359      nir_shader_preserve_all_metadata(nir);
360      return false;
361   }
362
363   nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
364                                                       VARYING_SLOT_EDGE);
365   if (!var) {
366      nir_shader_preserve_all_metadata(nir);
367      return false;
368   }
369
370   var->data.mode = nir_var_shader_temp;
371   nir->info.outputs_written &= ~VARYING_BIT_EDGE;
372   nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
373   nir_fixup_deref_modes(nir);
374
375   nir_foreach_function(f, nir) {
376      if (f->impl) {
377         nir_metadata_preserve(f->impl, nir_metadata_block_index |
378                                        nir_metadata_dominance |
379                                        nir_metadata_live_ssa_defs |
380                                        nir_metadata_loop_analysis);
381      } else {
382         nir_metadata_preserve(f->impl, nir_metadata_all);
383      }
384   }
385
386   return true;
387}
388
389/**
390 * Fix an uncompiled shader's stream output info.
391 *
392 * Core Gallium stores output->register_index as a "slot" number, where
393 * slots are assigned consecutively to all outputs in info->outputs_written.
394 * This naive packing of outputs doesn't work for us - we too have slots,
395 * but the layout is defined by the VUE map, which we won't have until we
396 * compile a specific shader variant.  So, we remap these and simply store
397 * VARYING_SLOT_* in our copy's output->register_index fields.
398 *
399 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
400 * components of our VUE header.  See brw_vue_map.c for the layout.
401 */
402static void
403update_so_info(struct pipe_stream_output_info *so_info,
404               uint64_t outputs_written)
405{
406   uint8_t reverse_map[64] = {};
407   unsigned slot = 0;
408   while (outputs_written) {
409      reverse_map[slot++] = u_bit_scan64(&outputs_written);
410   }
411
412   for (unsigned i = 0; i < so_info->num_outputs; i++) {
413      struct pipe_stream_output *output = &so_info->output[i];
414
415      /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
416      output->register_index = reverse_map[output->register_index];
417
418      /* The VUE header contains three scalar fields packed together:
419       * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
420       * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
421       * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
422       */
423      switch (output->register_index) {
424      case VARYING_SLOT_LAYER:
425         assert(output->num_components == 1);
426         output->register_index = VARYING_SLOT_PSIZ;
427         output->start_component = 1;
428         break;
429      case VARYING_SLOT_VIEWPORT:
430         assert(output->num_components == 1);
431         output->register_index = VARYING_SLOT_PSIZ;
432         output->start_component = 2;
433         break;
434      case VARYING_SLOT_PSIZ:
435         assert(output->num_components == 1);
436         output->start_component = 3;
437         break;
438      }
439
440      //info->outputs_written |= 1ull << output->register_index;
441   }
442}
443
444static void
445setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
446                        unsigned offset, unsigned n)
447{
448   assert(offset % sizeof(uint32_t) == 0);
449
450   for (unsigned i = 0; i < n; ++i)
451      sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
452
453   for (unsigned i = n; i < 4; ++i)
454      sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
455}
456
457/**
458 * Associate NIR uniform variables with the prog_data->param[] mechanism
459 * used by the backend.  Also, decide which UBOs we'd like to push in an
460 * ideal situation (though the backend can reduce this).
461 */
462static void
463iris_setup_uniforms(const struct brw_compiler *compiler,
464                    void *mem_ctx,
465                    nir_shader *nir,
466                    struct brw_stage_prog_data *prog_data,
467                    unsigned kernel_input_size,
468                    enum brw_param_builtin **out_system_values,
469                    unsigned *out_num_system_values,
470                    unsigned *out_num_cbufs)
471{
472   UNUSED const struct intel_device_info *devinfo = compiler->devinfo;
473
474   unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
475
476   const unsigned IRIS_MAX_SYSTEM_VALUES =
477      PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
478   enum brw_param_builtin *system_values =
479      rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
480   unsigned num_system_values = 0;
481
482   unsigned patch_vert_idx = -1;
483   unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
484   unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
485   unsigned variable_group_size_idx = -1;
486   unsigned work_dim_idx = -1;
487   memset(ucp_idx, -1, sizeof(ucp_idx));
488   memset(img_idx, -1, sizeof(img_idx));
489
490   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
491
492   nir_builder b;
493   nir_builder_init(&b, impl);
494
495   b.cursor = nir_before_block(nir_start_block(impl));
496   nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
497
498   /* Turn system value intrinsics into uniforms */
499   nir_foreach_block(block, impl) {
500      nir_foreach_instr_safe(instr, block) {
501         if (instr->type != nir_instr_type_intrinsic)
502            continue;
503
504         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
505         nir_ssa_def *offset;
506
507         switch (intrin->intrinsic) {
508         case nir_intrinsic_load_constant: {
509            unsigned load_size = intrin->dest.ssa.num_components *
510                                 intrin->dest.ssa.bit_size / 8;
511            unsigned load_align = intrin->dest.ssa.bit_size / 8;
512
513            /* This one is special because it reads from the shader constant
514             * data and not cbuf0 which gallium uploads for us.
515             */
516            b.cursor = nir_instr_remove(&intrin->instr);
517
518            nir_ssa_def *offset =
519               nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
520                                nir_intrinsic_base(intrin));
521
522            assert(load_size < b.shader->constant_data_size);
523            unsigned max_offset = b.shader->constant_data_size - load_size;
524            offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
525
526            nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
527               nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
528               nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
529
530            nir_ssa_def *data =
531               nir_load_global(&b, nir_iadd(&b, const_data_base_addr,
532                                                nir_u2u64(&b, offset)),
533                               load_align,
534                               intrin->dest.ssa.num_components,
535                               intrin->dest.ssa.bit_size);
536
537            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
538                                     data);
539            continue;
540         }
541         case nir_intrinsic_load_user_clip_plane: {
542            unsigned ucp = nir_intrinsic_ucp_id(intrin);
543
544            if (ucp_idx[ucp] == -1) {
545               ucp_idx[ucp] = num_system_values;
546               num_system_values += 4;
547            }
548
549            for (int i = 0; i < 4; i++) {
550               system_values[ucp_idx[ucp] + i] =
551                  BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
552            }
553
554            b.cursor = nir_before_instr(instr);
555            offset = nir_imm_int(&b, system_values_start +
556                                     ucp_idx[ucp] * sizeof(uint32_t));
557            break;
558         }
559         case nir_intrinsic_load_patch_vertices_in:
560            if (patch_vert_idx == -1)
561               patch_vert_idx = num_system_values++;
562
563            system_values[patch_vert_idx] =
564               BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
565
566            b.cursor = nir_before_instr(instr);
567            offset = nir_imm_int(&b, system_values_start +
568                                     patch_vert_idx * sizeof(uint32_t));
569            break;
570         case nir_intrinsic_image_deref_load_param_intel: {
571            assert(devinfo->ver < 9);
572            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
573            nir_variable *var = nir_deref_instr_get_variable(deref);
574
575            if (img_idx[var->data.binding] == -1) {
576               /* GL only allows arrays of arrays of images. */
577               assert(glsl_type_is_image(glsl_without_array(var->type)));
578               unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
579
580               for (int i = 0; i < num_images; i++) {
581                  const unsigned img = var->data.binding + i;
582
583                  img_idx[img] = num_system_values;
584                  num_system_values += BRW_IMAGE_PARAM_SIZE;
585
586                  uint32_t *img_sv = &system_values[img_idx[img]];
587
588                  setup_vec4_image_sysval(
589                     img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
590                     offsetof(struct brw_image_param, offset), 2);
591                  setup_vec4_image_sysval(
592                     img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
593                     offsetof(struct brw_image_param, size), 3);
594                  setup_vec4_image_sysval(
595                     img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
596                     offsetof(struct brw_image_param, stride), 4);
597                  setup_vec4_image_sysval(
598                     img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
599                     offsetof(struct brw_image_param, tiling), 3);
600                  setup_vec4_image_sysval(
601                     img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
602                     offsetof(struct brw_image_param, swizzling), 2);
603               }
604            }
605
606            b.cursor = nir_before_instr(instr);
607            offset = nir_iadd(&b,
608               get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
609               nir_imm_int(&b, system_values_start +
610                               img_idx[var->data.binding] * 4 +
611                               nir_intrinsic_base(intrin) * 16));
612            break;
613         }
614         case nir_intrinsic_load_workgroup_size: {
615            assert(nir->info.workgroup_size_variable);
616            if (variable_group_size_idx == -1) {
617               variable_group_size_idx = num_system_values;
618               num_system_values += 3;
619               for (int i = 0; i < 3; i++) {
620                  system_values[variable_group_size_idx + i] =
621                     BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
622               }
623            }
624
625            b.cursor = nir_before_instr(instr);
626            offset = nir_imm_int(&b, system_values_start +
627                                     variable_group_size_idx * sizeof(uint32_t));
628            break;
629         }
630         case nir_intrinsic_load_work_dim: {
631            if (work_dim_idx == -1) {
632               work_dim_idx = num_system_values++;
633               system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
634            }
635            b.cursor = nir_before_instr(instr);
636            offset = nir_imm_int(&b, system_values_start +
637                                     work_dim_idx * sizeof(uint32_t));
638            break;
639         }
640         case nir_intrinsic_load_kernel_input: {
641            assert(nir_intrinsic_base(intrin) +
642                   nir_intrinsic_range(intrin) <= kernel_input_size);
643            b.cursor = nir_before_instr(instr);
644            offset = nir_iadd_imm(&b, intrin->src[0].ssa,
645                                      nir_intrinsic_base(intrin));
646            break;
647         }
648         default:
649            continue;
650         }
651
652         nir_ssa_def *load =
653            nir_load_ubo(&b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size,
654                         temp_ubo_name, offset,
655                         .align_mul = 4,
656                         .align_offset = 0,
657                         .range_base = 0,
658                         .range = ~0);
659
660         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
661                                  load);
662         nir_instr_remove(instr);
663      }
664   }
665
666   nir_validate_shader(nir, "before remapping");
667
668   /* Uniforms are stored in constant buffer 0, the
669    * user-facing UBOs are indexed by one.  So if any constant buffer is
670    * needed, the constant buffer 0 will be needed, so account for it.
671    */
672   unsigned num_cbufs = nir->info.num_ubos;
673   if (num_cbufs || nir->num_uniforms)
674      num_cbufs++;
675
676   /* Place the new params in a new cbuf. */
677   if (num_system_values > 0 || kernel_input_size > 0) {
678      unsigned sysval_cbuf_index = num_cbufs;
679      num_cbufs++;
680
681      system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
682                               num_system_values);
683
684      nir_foreach_block(block, impl) {
685         nir_foreach_instr_safe(instr, block) {
686            if (instr->type != nir_instr_type_intrinsic)
687               continue;
688
689            nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
690
691            if (load->intrinsic != nir_intrinsic_load_ubo)
692               continue;
693
694            b.cursor = nir_before_instr(instr);
695
696            assert(load->src[0].is_ssa);
697
698            if (load->src[0].ssa == temp_ubo_name) {
699               nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);
700               nir_instr_rewrite_src(instr, &load->src[0],
701                                     nir_src_for_ssa(imm));
702            }
703         }
704      }
705
706      /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
707      nir_opt_constant_folding(nir);
708   } else {
709      ralloc_free(system_values);
710      system_values = NULL;
711   }
712
713   assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
714   nir_validate_shader(nir, "after remap");
715
716   /* We don't use params[] but gallium leaves num_uniforms set.  We use this
717    * to detect when cbuf0 exists but we don't need it anymore when we get
718    * here.  Instead, zero it out so that the back-end doesn't get confused
719    * when nr_params * 4 != num_uniforms != nr_params * 4.
720    */
721   nir->num_uniforms = 0;
722
723   *out_system_values = system_values;
724   *out_num_system_values = num_system_values;
725   *out_num_cbufs = num_cbufs;
726}
727
728static const char *surface_group_names[] = {
729   [IRIS_SURFACE_GROUP_RENDER_TARGET]      = "render target",
730   [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
731   [IRIS_SURFACE_GROUP_CS_WORK_GROUPS]     = "CS work groups",
732   [IRIS_SURFACE_GROUP_TEXTURE]            = "texture",
733   [IRIS_SURFACE_GROUP_UBO]                = "ubo",
734   [IRIS_SURFACE_GROUP_SSBO]               = "ssbo",
735   [IRIS_SURFACE_GROUP_IMAGE]              = "image",
736};
737
738static void
739iris_print_binding_table(FILE *fp, const char *name,
740                         const struct iris_binding_table *bt)
741{
742   STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
743
744   uint32_t total = 0;
745   uint32_t compacted = 0;
746
747   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
748      uint32_t size = bt->sizes[i];
749      total += size;
750      if (size)
751         compacted += util_bitcount64(bt->used_mask[i]);
752   }
753
754   if (total == 0) {
755      fprintf(fp, "Binding table for %s is empty\n\n", name);
756      return;
757   }
758
759   if (total != compacted) {
760      fprintf(fp, "Binding table for %s "
761              "(compacted to %u entries from %u entries)\n",
762              name, compacted, total);
763   } else {
764      fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
765   }
766
767   uint32_t entry = 0;
768   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
769      uint64_t mask = bt->used_mask[i];
770      while (mask) {
771         int index = u_bit_scan64(&mask);
772         fprintf(fp, "  [%u] %s #%d\n", entry++, surface_group_names[i], index);
773      }
774   }
775   fprintf(fp, "\n");
776}
777
778enum {
779   /* Max elements in a surface group. */
780   SURFACE_GROUP_MAX_ELEMENTS = 64,
781};
782
783/**
784 * Map a <group, index> pair to a binding table index.
785 *
786 * For example: <UBO, 5> => binding table index 12
787 */
788uint32_t
789iris_group_index_to_bti(const struct iris_binding_table *bt,
790                        enum iris_surface_group group, uint32_t index)
791{
792   assert(index < bt->sizes[group]);
793   uint64_t mask = bt->used_mask[group];
794   uint64_t bit = 1ull << index;
795   if (bit & mask) {
796      return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
797   } else {
798      return IRIS_SURFACE_NOT_USED;
799   }
800}
801
802/**
803 * Map a binding table index back to a <group, index> pair.
804 *
805 * For example: binding table index 12 => <UBO, 5>
806 */
807uint32_t
808iris_bti_to_group_index(const struct iris_binding_table *bt,
809                        enum iris_surface_group group, uint32_t bti)
810{
811   uint64_t used_mask = bt->used_mask[group];
812   assert(bti >= bt->offsets[group]);
813
814   uint32_t c = bti - bt->offsets[group];
815   while (used_mask) {
816      int i = u_bit_scan64(&used_mask);
817      if (c == 0)
818         return i;
819      c--;
820   }
821
822   return IRIS_SURFACE_NOT_USED;
823}
824
825static void
826rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
827                     nir_instr *instr, nir_src *src,
828                     enum iris_surface_group group)
829{
830   assert(bt->sizes[group] > 0);
831
832   b->cursor = nir_before_instr(instr);
833   nir_ssa_def *bti;
834   if (nir_src_is_const(*src)) {
835      uint32_t index = nir_src_as_uint(*src);
836      bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
837                           src->ssa->bit_size);
838   } else {
839      /* Indirect usage makes all the surfaces of the group to be available,
840       * so we can just add the base.
841       */
842      assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
843      bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
844   }
845   nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
846}
847
848static void
849mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
850                   enum iris_surface_group group)
851{
852   assert(bt->sizes[group] > 0);
853
854   if (nir_src_is_const(*src)) {
855      uint64_t index = nir_src_as_uint(*src);
856      assert(index < bt->sizes[group]);
857      bt->used_mask[group] |= 1ull << index;
858   } else {
859      /* There's an indirect usage, we need all the surfaces. */
860      bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
861   }
862}
863
864static bool
865skip_compacting_binding_tables(void)
866{
867   static int skip = -1;
868   if (skip < 0)
869      skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
870   return skip;
871}
872
873/**
874 * Set up the binding table indices and apply to the shader.
875 */
876static void
877iris_setup_binding_table(const struct intel_device_info *devinfo,
878                         struct nir_shader *nir,
879                         struct iris_binding_table *bt,
880                         unsigned num_render_targets,
881                         unsigned num_system_values,
882                         unsigned num_cbufs)
883{
884   const struct shader_info *info = &nir->info;
885
886   memset(bt, 0, sizeof(*bt));
887
888   /* Set the sizes for each surface group.  For some groups, we already know
889    * upfront how many will be used, so mark them.
890    */
891   if (info->stage == MESA_SHADER_FRAGMENT) {
892      bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
893      /* All render targets used. */
894      bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
895         BITFIELD64_MASK(num_render_targets);
896
897      /* Setup render target read surface group in order to support non-coherent
898       * framebuffer fetch on Gfx8
899       */
900      if (devinfo->ver == 8 && info->outputs_read) {
901         bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
902         bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
903            BITFIELD64_MASK(num_render_targets);
904      }
905   } else if (info->stage == MESA_SHADER_COMPUTE) {
906      bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
907   }
908
909   bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = BITSET_LAST_BIT(info->textures_used);
910   bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used[0];
911
912   bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
913
914   /* Allocate an extra slot in the UBO section for NIR constants.
915    * Binding table compaction will remove it if unnecessary.
916    *
917    * We don't include them in iris_compiled_shader::num_cbufs because
918    * they are uploaded separately from shs->constbuf[], but from a shader
919    * point of view, they're another UBO (at the end of the section).
920    */
921   bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
922
923   bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
924
925   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
926      assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
927
928   /* Mark surfaces used for the cases we don't have the information available
929    * upfront.
930    */
931   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
932   nir_foreach_block (block, impl) {
933      nir_foreach_instr (instr, block) {
934         if (instr->type != nir_instr_type_intrinsic)
935            continue;
936
937         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
938         switch (intrin->intrinsic) {
939         case nir_intrinsic_load_num_workgroups:
940            bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
941            break;
942
943         case nir_intrinsic_load_output:
944            if (devinfo->ver == 8) {
945               mark_used_with_src(bt, &intrin->src[0],
946                                  IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
947            }
948            break;
949
950         case nir_intrinsic_image_size:
951         case nir_intrinsic_image_load:
952         case nir_intrinsic_image_store:
953         case nir_intrinsic_image_atomic_add:
954         case nir_intrinsic_image_atomic_imin:
955         case nir_intrinsic_image_atomic_umin:
956         case nir_intrinsic_image_atomic_imax:
957         case nir_intrinsic_image_atomic_umax:
958         case nir_intrinsic_image_atomic_and:
959         case nir_intrinsic_image_atomic_or:
960         case nir_intrinsic_image_atomic_xor:
961         case nir_intrinsic_image_atomic_exchange:
962         case nir_intrinsic_image_atomic_comp_swap:
963         case nir_intrinsic_image_load_raw_intel:
964         case nir_intrinsic_image_store_raw_intel:
965            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
966            break;
967
968         case nir_intrinsic_load_ubo:
969            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
970            break;
971
972         case nir_intrinsic_store_ssbo:
973            mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
974            break;
975
976         case nir_intrinsic_get_ssbo_size:
977         case nir_intrinsic_ssbo_atomic_add:
978         case nir_intrinsic_ssbo_atomic_imin:
979         case nir_intrinsic_ssbo_atomic_umin:
980         case nir_intrinsic_ssbo_atomic_imax:
981         case nir_intrinsic_ssbo_atomic_umax:
982         case nir_intrinsic_ssbo_atomic_and:
983         case nir_intrinsic_ssbo_atomic_or:
984         case nir_intrinsic_ssbo_atomic_xor:
985         case nir_intrinsic_ssbo_atomic_exchange:
986         case nir_intrinsic_ssbo_atomic_comp_swap:
987         case nir_intrinsic_ssbo_atomic_fmin:
988         case nir_intrinsic_ssbo_atomic_fmax:
989         case nir_intrinsic_ssbo_atomic_fcomp_swap:
990         case nir_intrinsic_load_ssbo:
991            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
992            break;
993
994         default:
995            break;
996         }
997      }
998   }
999
1000   /* When disable we just mark everything as used. */
1001   if (unlikely(skip_compacting_binding_tables())) {
1002      for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1003         bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1004   }
1005
1006   /* Calculate the offsets and the binding table size based on the used
1007    * surfaces.  After this point, the functions to go between "group indices"
1008    * and binding table indices can be used.
1009    */
1010   uint32_t next = 0;
1011   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1012      if (bt->used_mask[i] != 0) {
1013         bt->offsets[i] = next;
1014         next += util_bitcount64(bt->used_mask[i]);
1015      }
1016   }
1017   bt->size_bytes = next * 4;
1018
1019   if (INTEL_DEBUG(DEBUG_BT)) {
1020      iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1021   }
1022
1023   /* Apply the binding table indices.  The backend compiler is not expected
1024    * to change those, as we haven't set any of the *_start entries in brw
1025    * binding_table.
1026    */
1027   nir_builder b;
1028   nir_builder_init(&b, impl);
1029
1030   nir_foreach_block (block, impl) {
1031      nir_foreach_instr (instr, block) {
1032         if (instr->type == nir_instr_type_tex) {
1033            nir_tex_instr *tex = nir_instr_as_tex(instr);
1034            tex->texture_index =
1035               iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
1036                                       tex->texture_index);
1037            continue;
1038         }
1039
1040         if (instr->type != nir_instr_type_intrinsic)
1041            continue;
1042
1043         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1044         switch (intrin->intrinsic) {
1045         case nir_intrinsic_image_size:
1046         case nir_intrinsic_image_load:
1047         case nir_intrinsic_image_store:
1048         case nir_intrinsic_image_atomic_add:
1049         case nir_intrinsic_image_atomic_imin:
1050         case nir_intrinsic_image_atomic_umin:
1051         case nir_intrinsic_image_atomic_imax:
1052         case nir_intrinsic_image_atomic_umax:
1053         case nir_intrinsic_image_atomic_and:
1054         case nir_intrinsic_image_atomic_or:
1055         case nir_intrinsic_image_atomic_xor:
1056         case nir_intrinsic_image_atomic_exchange:
1057         case nir_intrinsic_image_atomic_comp_swap:
1058         case nir_intrinsic_image_load_raw_intel:
1059         case nir_intrinsic_image_store_raw_intel:
1060            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1061                                 IRIS_SURFACE_GROUP_IMAGE);
1062            break;
1063
1064         case nir_intrinsic_load_ubo:
1065            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1066                                 IRIS_SURFACE_GROUP_UBO);
1067            break;
1068
1069         case nir_intrinsic_store_ssbo:
1070            rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1071                                 IRIS_SURFACE_GROUP_SSBO);
1072            break;
1073
1074         case nir_intrinsic_load_output:
1075            if (devinfo->ver == 8) {
1076               rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1077                                    IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1078            }
1079            break;
1080
1081         case nir_intrinsic_get_ssbo_size:
1082         case nir_intrinsic_ssbo_atomic_add:
1083         case nir_intrinsic_ssbo_atomic_imin:
1084         case nir_intrinsic_ssbo_atomic_umin:
1085         case nir_intrinsic_ssbo_atomic_imax:
1086         case nir_intrinsic_ssbo_atomic_umax:
1087         case nir_intrinsic_ssbo_atomic_and:
1088         case nir_intrinsic_ssbo_atomic_or:
1089         case nir_intrinsic_ssbo_atomic_xor:
1090         case nir_intrinsic_ssbo_atomic_exchange:
1091         case nir_intrinsic_ssbo_atomic_comp_swap:
1092         case nir_intrinsic_ssbo_atomic_fmin:
1093         case nir_intrinsic_ssbo_atomic_fmax:
1094         case nir_intrinsic_ssbo_atomic_fcomp_swap:
1095         case nir_intrinsic_load_ssbo:
1096            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1097                                 IRIS_SURFACE_GROUP_SSBO);
1098            break;
1099
1100         default:
1101            break;
1102         }
1103      }
1104   }
1105}
1106
1107static void
1108iris_debug_recompile(struct iris_screen *screen,
1109                     struct pipe_debug_callback *dbg,
1110                     struct iris_uncompiled_shader *ish,
1111                     const struct brw_base_prog_key *key)
1112{
1113   if (!ish || list_is_empty(&ish->variants)
1114            || list_is_singular(&ish->variants))
1115      return;
1116
1117   const struct intel_device_info *devinfo = &screen->devinfo;
1118   const struct brw_compiler *c = screen->compiler;
1119   const struct shader_info *info = &ish->nir->info;
1120
1121   brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1122                       _mesa_shader_stage_to_string(info->stage),
1123                       info->name ? info->name : "(no identifier)",
1124                       info->label ? info->label : "");
1125
1126   struct iris_compiled_shader *shader =
1127      list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1128   const void *old_iris_key = &shader->key;
1129
1130   union brw_any_prog_key old_key;
1131
1132   switch (info->stage) {
1133   case MESA_SHADER_VERTEX:
1134      old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key);
1135      break;
1136   case MESA_SHADER_TESS_CTRL:
1137      old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key);
1138      break;
1139   case MESA_SHADER_TESS_EVAL:
1140      old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key);
1141      break;
1142   case MESA_SHADER_GEOMETRY:
1143      old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key);
1144      break;
1145   case MESA_SHADER_FRAGMENT:
1146      old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key);
1147      break;
1148   case MESA_SHADER_COMPUTE:
1149      old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key);
1150      break;
1151   default:
1152      unreachable("invalid shader stage");
1153   }
1154
1155   brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1156}
1157
1158static void
1159check_urb_size(struct iris_context *ice,
1160               unsigned needed_size,
1161               gl_shader_stage stage)
1162{
1163   unsigned last_allocated_size = ice->shaders.urb.size[stage];
1164
1165   /* If the last URB allocation wasn't large enough for our needs,
1166    * flag it as needing to be reconfigured.  Otherwise, we can use
1167    * the existing config.  However, if the URB is constrained, and
1168    * we can shrink our size for this stage, we may be able to gain
1169    * extra concurrency by reconfiguring it to be smaller.  Do so.
1170    */
1171   if (last_allocated_size < needed_size ||
1172       (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1173      ice->state.dirty |= IRIS_DIRTY_URB;
1174   }
1175}
1176
1177/**
1178 * Get the shader for the last enabled geometry stage.
1179 *
1180 * This stage is the one which will feed stream output and the rasterizer.
1181 */
1182static gl_shader_stage
1183last_vue_stage(struct iris_context *ice)
1184{
1185   if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1186      return MESA_SHADER_GEOMETRY;
1187
1188   if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1189      return MESA_SHADER_TESS_EVAL;
1190
1191   return MESA_SHADER_VERTEX;
1192}
1193
1194/**
1195 * \param added  Set to \c true if the variant was added to the list (i.e., a
1196 *               variant matching \c key was not found).  Set to \c false
1197 *               otherwise.
1198 */
1199static inline struct iris_compiled_shader *
1200find_or_add_variant(const struct iris_screen *screen,
1201                    struct iris_uncompiled_shader *ish,
1202                    enum iris_program_cache_id cache_id,
1203                    const void *key, unsigned key_size,
1204                    bool *added)
1205{
1206   struct list_head *start = ish->variants.next;
1207
1208   *added = false;
1209
1210   if (screen->precompile) {
1211      /* Check the first list entry.  There will always be at least one
1212       * variant in the list (most likely the precompile variant), and
1213       * other contexts only append new variants, so we can safely check
1214       * it without locking, saving that cost in the common case.
1215       */
1216      struct iris_compiled_shader *first =
1217         list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1218
1219      if (memcmp(&first->key, key, key_size) == 0) {
1220         util_queue_fence_wait(&first->ready);
1221         return first;
1222      }
1223
1224      /* Skip this one in the loop below */
1225      start = first->link.next;
1226   }
1227
1228   struct iris_compiled_shader *variant = NULL;
1229
1230   /* If it doesn't match, we have to walk the list; other contexts may be
1231    * concurrently appending shaders to it, so we need to lock here.
1232    */
1233   simple_mtx_lock(&ish->lock);
1234
1235   list_for_each_entry_from(struct iris_compiled_shader, v, start,
1236                            &ish->variants, link) {
1237      if (memcmp(&v->key, key, key_size) == 0) {
1238         variant = v;
1239         break;
1240      }
1241   }
1242
1243   if (variant == NULL) {
1244      variant = iris_create_shader_variant(screen, NULL, cache_id,
1245                                           key_size, key);
1246
1247      /* Append our new variant to the shader's variant list. */
1248      list_addtail(&variant->link, &ish->variants);
1249      *added = true;
1250
1251      simple_mtx_unlock(&ish->lock);
1252   } else {
1253      simple_mtx_unlock(&ish->lock);
1254
1255      util_queue_fence_wait(&variant->ready);
1256   }
1257
1258   return variant;
1259}
1260
1261static void
1262iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1263                                 UNUSED int thread_index)
1264{
1265   free(_job);
1266}
1267
1268static void
1269iris_schedule_compile(struct iris_screen *screen,
1270                      struct util_queue_fence *ready_fence,
1271                      struct pipe_debug_callback *dbg,
1272                      struct iris_threaded_compile_job *job,
1273                      util_queue_execute_func execute)
1274
1275{
1276   util_queue_fence_init(ready_fence);
1277
1278   struct util_async_debug_callback async_debug;
1279
1280   if (dbg) {
1281      u_async_debug_init(&async_debug);
1282      job->dbg = &async_debug.base;
1283   }
1284
1285   util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1286                      iris_threaded_compile_job_delete, 0);
1287
1288   if (screen->driconf.sync_compile || dbg)
1289      util_queue_fence_wait(ready_fence);
1290
1291   if (dbg) {
1292      u_async_debug_drain(&async_debug, dbg);
1293      u_async_debug_cleanup(&async_debug);
1294   }
1295}
1296
1297/**
1298 * Compile a vertex shader, and upload the assembly.
1299 */
1300static void
1301iris_compile_vs(struct iris_screen *screen,
1302                struct u_upload_mgr *uploader,
1303                struct pipe_debug_callback *dbg,
1304                struct iris_uncompiled_shader *ish,
1305                struct iris_compiled_shader *shader)
1306{
1307   const struct brw_compiler *compiler = screen->compiler;
1308   const struct intel_device_info *devinfo = &screen->devinfo;
1309   void *mem_ctx = ralloc_context(NULL);
1310   struct brw_vs_prog_data *vs_prog_data =
1311      rzalloc(mem_ctx, struct brw_vs_prog_data);
1312   struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
1313   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1314   enum brw_param_builtin *system_values;
1315   unsigned num_system_values;
1316   unsigned num_cbufs;
1317
1318   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1319   const struct iris_vs_prog_key *const key = &shader->key.vs;
1320
1321   if (key->vue.nr_userclip_plane_consts) {
1322      nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1323      nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1324                        true, false, NULL);
1325      nir_lower_io_to_temporaries(nir, impl, true, false);
1326      nir_lower_global_vars_to_local(nir);
1327      nir_lower_vars_to_ssa(nir);
1328      nir_shader_gather_info(nir, impl);
1329   }
1330
1331   prog_data->use_alt_mode = nir->info.is_arb_asm;
1332
1333   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1334                       &num_system_values, &num_cbufs);
1335
1336   struct iris_binding_table bt;
1337   iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1338                            num_system_values, num_cbufs);
1339
1340   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1341
1342   brw_compute_vue_map(devinfo,
1343                       &vue_prog_data->vue_map, nir->info.outputs_written,
1344                       nir->info.separate_shader, /* pos_slots */ 1);
1345
1346   struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
1347
1348   struct brw_compile_vs_params params = {
1349      .nir = nir,
1350      .key = &brw_key,
1351      .prog_data = vs_prog_data,
1352      .log_data = dbg,
1353   };
1354
1355   const unsigned *program = brw_compile_vs(compiler, mem_ctx, &params);
1356   if (program == NULL) {
1357      dbg_printf("Failed to compile vertex shader: %s\n", params.error_str);
1358      ralloc_free(mem_ctx);
1359
1360      shader->compilation_failed = true;
1361      util_queue_fence_signal(&shader->ready);
1362
1363      return;
1364   }
1365
1366   shader->compilation_failed = false;
1367
1368   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1369
1370   uint32_t *so_decls =
1371      screen->vtbl.create_so_decl_list(&ish->stream_output,
1372                                    &vue_prog_data->vue_map);
1373
1374   iris_finalize_program(shader, prog_data, so_decls, system_values,
1375                         num_system_values, 0, num_cbufs, &bt);
1376
1377   iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1378                      sizeof(*key), key, program);
1379
1380   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1381
1382   ralloc_free(mem_ctx);
1383}
1384
1385/**
1386 * Update the current vertex shader variant.
1387 *
1388 * Fill out the key, look in the cache, compile and bind if needed.
1389 */
1390static void
1391iris_update_compiled_vs(struct iris_context *ice)
1392{
1393   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1394   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1395   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1396   struct iris_uncompiled_shader *ish =
1397      ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1398
1399   struct iris_vs_prog_key key = { KEY_ID(vue.base) };
1400   screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1401
1402   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1403   bool added;
1404   struct iris_compiled_shader *shader =
1405      find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
1406
1407   if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1408                                          &key, sizeof(key))) {
1409      iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
1410   }
1411
1412   if (shader->compilation_failed)
1413      shader = NULL;
1414
1415   if (old != shader) {
1416      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
1417                                    shader);
1418      ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1419      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1420                                IRIS_STAGE_DIRTY_BINDINGS_VS |
1421                                IRIS_STAGE_DIRTY_CONSTANTS_VS;
1422      shs->sysvals_need_upload = true;
1423
1424      unsigned urb_entry_size = shader ?
1425         ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1426      check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
1427   }
1428}
1429
1430/**
1431 * Get the shader_info for a given stage, or NULL if the stage is disabled.
1432 */
1433const struct shader_info *
1434iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
1435{
1436   const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
1437
1438   if (!ish)
1439      return NULL;
1440
1441   const nir_shader *nir = ish->nir;
1442   return &nir->info;
1443}
1444
1445/**
1446 * Get the union of TCS output and TES input slots.
1447 *
1448 * TCS and TES need to agree on a common URB entry layout.  In particular,
1449 * the data for all patch vertices is stored in a single URB entry (unlike
1450 * GS which has one entry per input vertex).  This means that per-vertex
1451 * array indexing needs a stride.
1452 *
1453 * SSO requires locations to match, but doesn't require the number of
1454 * outputs/inputs to match (in fact, the TCS often has extra outputs).
1455 * So, we need to take the extra step of unifying these on the fly.
1456 */
1457static void
1458get_unified_tess_slots(const struct iris_context *ice,
1459                       uint64_t *per_vertex_slots,
1460                       uint32_t *per_patch_slots)
1461{
1462   const struct shader_info *tcs =
1463      iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
1464   const struct shader_info *tes =
1465      iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1466
1467   *per_vertex_slots = tes->inputs_read;
1468   *per_patch_slots = tes->patch_inputs_read;
1469
1470   if (tcs) {
1471      *per_vertex_slots |= tcs->outputs_written;
1472      *per_patch_slots |= tcs->patch_outputs_written;
1473   }
1474}
1475
1476/**
1477 * Compile a tessellation control shader, and upload the assembly.
1478 */
1479static void
1480iris_compile_tcs(struct iris_screen *screen,
1481                 struct hash_table *passthrough_ht,
1482                 struct u_upload_mgr *uploader,
1483                 struct pipe_debug_callback *dbg,
1484                 struct iris_uncompiled_shader *ish,
1485                 struct iris_compiled_shader *shader)
1486{
1487   const struct brw_compiler *compiler = screen->compiler;
1488   const struct nir_shader_compiler_options *options =
1489      compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
1490   void *mem_ctx = ralloc_context(NULL);
1491   struct brw_tcs_prog_data *tcs_prog_data =
1492      rzalloc(mem_ctx, struct brw_tcs_prog_data);
1493   struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
1494   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1495   const struct intel_device_info *devinfo = &screen->devinfo;
1496   enum brw_param_builtin *system_values = NULL;
1497   unsigned num_system_values = 0;
1498   unsigned num_cbufs = 0;
1499
1500   nir_shader *nir;
1501
1502   struct iris_binding_table bt;
1503
1504   const struct iris_tcs_prog_key *const key = &shader->key.tcs;
1505   struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key);
1506
1507   if (ish) {
1508      nir = nir_shader_clone(mem_ctx, ish->nir);
1509
1510      iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1511                          &num_system_values, &num_cbufs);
1512      iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1513                               num_system_values, num_cbufs);
1514      brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1515   } else {
1516      nir =
1517         brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, &brw_key);
1518
1519      /* Reserve space for passing the default tess levels as constants. */
1520      num_cbufs = 1;
1521      num_system_values = 8;
1522      system_values =
1523         rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
1524      prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
1525      prog_data->nr_params = num_system_values;
1526
1527      if (key->tes_primitive_mode == GL_QUADS) {
1528         for (int i = 0; i < 4; i++)
1529            system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1530
1531         system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1532         system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
1533      } else if (key->tes_primitive_mode == GL_TRIANGLES) {
1534         for (int i = 0; i < 3; i++)
1535            system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1536
1537         system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1538      } else {
1539         assert(key->tes_primitive_mode == GL_ISOLINES);
1540         system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
1541         system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
1542      }
1543
1544      /* Manually setup the TCS binding table. */
1545      memset(&bt, 0, sizeof(bt));
1546      bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;
1547      bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;
1548      bt.size_bytes = 4;
1549
1550      prog_data->ubo_ranges[0].length = 1;
1551   }
1552
1553   char *error_str = NULL;
1554   const unsigned *program =
1555      brw_compile_tcs(compiler, dbg, mem_ctx, &brw_key, tcs_prog_data,
1556                      nir, -1, NULL, &error_str);
1557   if (program == NULL) {
1558      dbg_printf("Failed to compile control shader: %s\n", error_str);
1559      ralloc_free(mem_ctx);
1560
1561      shader->compilation_failed = true;
1562      util_queue_fence_signal(&shader->ready);
1563
1564      return;
1565   }
1566
1567   shader->compilation_failed = false;
1568
1569   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1570
1571   iris_finalize_program(shader, prog_data, NULL, system_values,
1572                         num_system_values, 0, num_cbufs, &bt);
1573
1574   iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
1575                      IRIS_CACHE_TCS, sizeof(*key), key, program);
1576
1577   if (ish)
1578      iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1579
1580   ralloc_free(mem_ctx);
1581}
1582
1583/**
1584 * Update the current tessellation control shader variant.
1585 *
1586 * Fill out the key, look in the cache, compile and bind if needed.
1587 */
1588static void
1589iris_update_compiled_tcs(struct iris_context *ice)
1590{
1591   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
1592   struct iris_uncompiled_shader *tcs =
1593      ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
1594   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1595   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1596   const struct brw_compiler *compiler = screen->compiler;
1597   const struct intel_device_info *devinfo = &screen->devinfo;
1598
1599   const struct shader_info *tes_info =
1600      iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1601   struct iris_tcs_prog_key key = {
1602      .vue.base.program_string_id = tcs ? tcs->program_id : 0,
1603      .tes_primitive_mode = tes_info->tess.primitive_mode,
1604      .input_vertices =
1605         !tcs || compiler->use_tcs_8_patch ? ice->state.vertices_per_patch : 0,
1606      .quads_workaround = devinfo->ver < 9 &&
1607                          tes_info->tess.primitive_mode == GL_QUADS &&
1608                          tes_info->tess.spacing == TESS_SPACING_EQUAL,
1609   };
1610   get_unified_tess_slots(ice, &key.outputs_written,
1611                          &key.patch_outputs_written);
1612   screen->vtbl.populate_tcs_key(ice, &key);
1613
1614   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
1615   struct iris_compiled_shader *shader;
1616   bool added = false;
1617
1618   if (tcs != NULL) {
1619      shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
1620                                   sizeof(key), &added);
1621   } else {
1622      /* Look for and possibly create a passthrough TCS */
1623      shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
1624
1625
1626      if (shader == NULL) {
1627         shader = iris_create_shader_variant(screen, ice->shaders.cache,
1628                                             IRIS_CACHE_TCS, sizeof(key), &key);
1629         added = true;
1630      }
1631
1632   }
1633
1634   /* If the shader was not found in (whichever cache), call iris_compile_tcs
1635    * if either ish is NULL or the shader could not be found in the disk
1636    * cache.
1637    */
1638   if (added &&
1639       (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
1640                                                 &key, sizeof(key)))) {
1641      iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
1642                       shader);
1643   }
1644
1645   if (shader->compilation_failed)
1646      shader = NULL;
1647
1648   if (old != shader) {
1649      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
1650                                    shader);
1651      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
1652                                IRIS_STAGE_DIRTY_BINDINGS_TCS |
1653                                IRIS_STAGE_DIRTY_CONSTANTS_TCS;
1654      shs->sysvals_need_upload = true;
1655
1656      unsigned urb_entry_size = shader ?
1657         ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1658      check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
1659   }
1660}
1661
1662/**
1663 * Compile a tessellation evaluation shader, and upload the assembly.
1664 */
1665static void
1666iris_compile_tes(struct iris_screen *screen,
1667                 struct u_upload_mgr *uploader,
1668                 struct pipe_debug_callback *dbg,
1669                 struct iris_uncompiled_shader *ish,
1670                 struct iris_compiled_shader *shader)
1671{
1672   const struct brw_compiler *compiler = screen->compiler;
1673   void *mem_ctx = ralloc_context(NULL);
1674   struct brw_tes_prog_data *tes_prog_data =
1675      rzalloc(mem_ctx, struct brw_tes_prog_data);
1676   struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
1677   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1678   enum brw_param_builtin *system_values;
1679   const struct intel_device_info *devinfo = &screen->devinfo;
1680   unsigned num_system_values;
1681   unsigned num_cbufs;
1682
1683   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1684   const struct iris_tes_prog_key *const key = &shader->key.tes;
1685
1686   if (key->vue.nr_userclip_plane_consts) {
1687      nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1688      nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1689                        true, false, NULL);
1690      nir_lower_io_to_temporaries(nir, impl, true, false);
1691      nir_lower_global_vars_to_local(nir);
1692      nir_lower_vars_to_ssa(nir);
1693      nir_shader_gather_info(nir, impl);
1694   }
1695
1696   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1697                       &num_system_values, &num_cbufs);
1698
1699   struct iris_binding_table bt;
1700   iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1701                            num_system_values, num_cbufs);
1702
1703   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1704
1705   struct brw_vue_map input_vue_map;
1706   brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
1707                            key->patch_inputs_read);
1708
1709   struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(devinfo, key);
1710
1711   char *error_str = NULL;
1712   const unsigned *program =
1713      brw_compile_tes(compiler, dbg, mem_ctx, &brw_key, &input_vue_map,
1714                      tes_prog_data, nir, -1, NULL, &error_str);
1715   if (program == NULL) {
1716      dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
1717      ralloc_free(mem_ctx);
1718
1719      shader->compilation_failed = true;
1720      util_queue_fence_signal(&shader->ready);
1721
1722      return;
1723   }
1724
1725   shader->compilation_failed = false;
1726
1727   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1728
1729   uint32_t *so_decls =
1730      screen->vtbl.create_so_decl_list(&ish->stream_output,
1731                                    &vue_prog_data->vue_map);
1732
1733   iris_finalize_program(shader, prog_data, so_decls, system_values,
1734                         num_system_values, 0, num_cbufs, &bt);
1735
1736   iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
1737                      sizeof(*key), key, program);
1738
1739   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1740
1741   ralloc_free(mem_ctx);
1742}
1743
1744/**
1745 * Update the current tessellation evaluation shader variant.
1746 *
1747 * Fill out the key, look in the cache, compile and bind if needed.
1748 */
1749static void
1750iris_update_compiled_tes(struct iris_context *ice)
1751{
1752   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1753   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1754   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
1755   struct iris_uncompiled_shader *ish =
1756      ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1757
1758   struct iris_tes_prog_key key = { KEY_ID(vue.base) };
1759   get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
1760   screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1761
1762   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
1763   bool added;
1764   struct iris_compiled_shader *shader =
1765      find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
1766
1767   if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1768                                          &key, sizeof(key))) {
1769      iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
1770   }
1771
1772   if (shader->compilation_failed)
1773      shader = NULL;
1774
1775   if (old != shader) {
1776      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
1777                                    shader);
1778      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
1779                                IRIS_STAGE_DIRTY_BINDINGS_TES |
1780                                IRIS_STAGE_DIRTY_CONSTANTS_TES;
1781      shs->sysvals_need_upload = true;
1782
1783      unsigned urb_entry_size = shader ?
1784         ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1785      check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
1786   }
1787
1788   /* TODO: Could compare and avoid flagging this. */
1789   const struct shader_info *tes_info = &ish->nir->info;
1790   if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
1791      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
1792      ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
1793   }
1794}
1795
1796/**
1797 * Compile a geometry shader, and upload the assembly.
1798 */
1799static void
1800iris_compile_gs(struct iris_screen *screen,
1801                struct u_upload_mgr *uploader,
1802                struct pipe_debug_callback *dbg,
1803                struct iris_uncompiled_shader *ish,
1804                struct iris_compiled_shader *shader)
1805{
1806   const struct brw_compiler *compiler = screen->compiler;
1807   const struct intel_device_info *devinfo = &screen->devinfo;
1808   void *mem_ctx = ralloc_context(NULL);
1809   struct brw_gs_prog_data *gs_prog_data =
1810      rzalloc(mem_ctx, struct brw_gs_prog_data);
1811   struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
1812   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1813   enum brw_param_builtin *system_values;
1814   unsigned num_system_values;
1815   unsigned num_cbufs;
1816
1817   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1818   const struct iris_gs_prog_key *const key = &shader->key.gs;
1819
1820   if (key->vue.nr_userclip_plane_consts) {
1821      nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1822      nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1823                        false, NULL);
1824      nir_lower_io_to_temporaries(nir, impl, true, false);
1825      nir_lower_global_vars_to_local(nir);
1826      nir_lower_vars_to_ssa(nir);
1827      nir_shader_gather_info(nir, impl);
1828   }
1829
1830   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1831                       &num_system_values, &num_cbufs);
1832
1833   struct iris_binding_table bt;
1834   iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1835                            num_system_values, num_cbufs);
1836
1837   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1838
1839   brw_compute_vue_map(devinfo,
1840                       &vue_prog_data->vue_map, nir->info.outputs_written,
1841                       nir->info.separate_shader, /* pos_slots */ 1);
1842
1843   struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key);
1844
1845   char *error_str = NULL;
1846   const unsigned *program =
1847      brw_compile_gs(compiler, dbg, mem_ctx, &brw_key, gs_prog_data,
1848                     nir, -1, NULL, &error_str);
1849   if (program == NULL) {
1850      dbg_printf("Failed to compile geometry shader: %s\n", error_str);
1851      ralloc_free(mem_ctx);
1852
1853      shader->compilation_failed = true;
1854      util_queue_fence_signal(&shader->ready);
1855
1856      return;
1857   }
1858
1859   shader->compilation_failed = false;
1860
1861   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1862
1863   uint32_t *so_decls =
1864      screen->vtbl.create_so_decl_list(&ish->stream_output,
1865                                    &vue_prog_data->vue_map);
1866
1867   iris_finalize_program(shader, prog_data, so_decls, system_values,
1868                         num_system_values, 0, num_cbufs, &bt);
1869
1870   iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
1871                      sizeof(*key), key, program);
1872
1873   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1874
1875   ralloc_free(mem_ctx);
1876}
1877
1878/**
1879 * Update the current geometry shader variant.
1880 *
1881 * Fill out the key, look in the cache, compile and bind if needed.
1882 */
1883static void
1884iris_update_compiled_gs(struct iris_context *ice)
1885{
1886   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
1887   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1888   struct iris_uncompiled_shader *ish =
1889      ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1890   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
1891   struct iris_compiled_shader *shader = NULL;
1892   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1893
1894   if (ish) {
1895      struct iris_gs_prog_key key = { KEY_ID(vue.base) };
1896      screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1897
1898      bool added;
1899
1900      shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
1901                                   sizeof(key), &added);
1902
1903      if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1904                                             &key, sizeof(key))) {
1905         iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
1906      }
1907
1908      if (shader->compilation_failed)
1909         shader = NULL;
1910   }
1911
1912   if (old != shader) {
1913      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
1914                                    shader);
1915      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
1916                                IRIS_STAGE_DIRTY_BINDINGS_GS |
1917                                IRIS_STAGE_DIRTY_CONSTANTS_GS;
1918      shs->sysvals_need_upload = true;
1919
1920      unsigned urb_entry_size = shader ?
1921         ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1922      check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
1923   }
1924}
1925
1926/**
1927 * Compile a fragment (pixel) shader, and upload the assembly.
1928 */
1929static void
1930iris_compile_fs(struct iris_screen *screen,
1931                struct u_upload_mgr *uploader,
1932                struct pipe_debug_callback *dbg,
1933                struct iris_uncompiled_shader *ish,
1934                struct iris_compiled_shader *shader,
1935                struct brw_vue_map *vue_map)
1936{
1937   const struct brw_compiler *compiler = screen->compiler;
1938   void *mem_ctx = ralloc_context(NULL);
1939   struct brw_wm_prog_data *fs_prog_data =
1940      rzalloc(mem_ctx, struct brw_wm_prog_data);
1941   struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1942   enum brw_param_builtin *system_values;
1943   const struct intel_device_info *devinfo = &screen->devinfo;
1944   unsigned num_system_values;
1945   unsigned num_cbufs;
1946
1947   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1948   const struct iris_fs_prog_key *const key = &shader->key.fs;
1949
1950   prog_data->use_alt_mode = nir->info.is_arb_asm;
1951
1952   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1953                       &num_system_values, &num_cbufs);
1954
1955   /* Lower output variables to load_output intrinsics before setting up
1956    * binding tables, so iris_setup_binding_table can map any load_output
1957    * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
1958    * non-coherent framebuffer fetches.
1959    */
1960   brw_nir_lower_fs_outputs(nir);
1961
1962   /* On Gfx11+, shader RT write messages have a "Null Render Target" bit
1963    * and do not need a binding table entry with a null surface.  Earlier
1964    * generations need an entry for a null surface.
1965    */
1966   int null_rts = devinfo->ver < 11 ? 1 : 0;
1967
1968   struct iris_binding_table bt;
1969   iris_setup_binding_table(devinfo, nir, &bt,
1970                            MAX2(key->nr_color_regions, null_rts),
1971                            num_system_values, num_cbufs);
1972
1973   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1974
1975   struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(devinfo, key);
1976
1977   struct brw_compile_fs_params params = {
1978      .nir = nir,
1979      .key = &brw_key,
1980      .prog_data = fs_prog_data,
1981
1982      .allow_spilling = true,
1983      .vue_map = vue_map,
1984
1985      .log_data = dbg,
1986   };
1987
1988   const unsigned *program = brw_compile_fs(compiler, mem_ctx, &params);
1989   if (program == NULL) {
1990      dbg_printf("Failed to compile fragment shader: %s\n", params.error_str);
1991      ralloc_free(mem_ctx);
1992
1993      shader->compilation_failed = true;
1994      util_queue_fence_signal(&shader->ready);
1995
1996      return;
1997   }
1998
1999   shader->compilation_failed = false;
2000
2001   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
2002
2003   iris_finalize_program(shader, prog_data, NULL, system_values,
2004                         num_system_values, 0, num_cbufs, &bt);
2005
2006   iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2007                      sizeof(*key), key, program);
2008
2009   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2010
2011   ralloc_free(mem_ctx);
2012}
2013
2014/**
2015 * Update the current fragment shader variant.
2016 *
2017 * Fill out the key, look in the cache, compile and bind if needed.
2018 */
2019static void
2020iris_update_compiled_fs(struct iris_context *ice)
2021{
2022   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2023   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2024   struct iris_uncompiled_shader *ish =
2025      ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2026   struct iris_fs_prog_key key = { KEY_ID(base) };
2027   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2028   screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2029
2030   struct brw_vue_map *last_vue_map =
2031      &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
2032
2033   if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2034      key.input_slots_valid = last_vue_map->slots_valid;
2035
2036   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2037   bool added;
2038   struct iris_compiled_shader *shader =
2039      find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2040                          sizeof(key), &added);
2041
2042   if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2043                                          &key, sizeof(key))) {
2044      iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2045   }
2046
2047   if (shader->compilation_failed)
2048      shader = NULL;
2049
2050   if (old != shader) {
2051      // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2052      // toggles.  might be able to avoid flagging SBE too.
2053      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2054                                    shader);
2055      ice->state.dirty |= IRIS_DIRTY_WM |
2056                          IRIS_DIRTY_CLIP |
2057                          IRIS_DIRTY_SBE;
2058      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2059                                IRIS_STAGE_DIRTY_BINDINGS_FS |
2060                                IRIS_STAGE_DIRTY_CONSTANTS_FS;
2061      shs->sysvals_need_upload = true;
2062   }
2063}
2064
2065/**
2066 * Update the last enabled stage's VUE map.
2067 *
2068 * When the shader feeding the rasterizer's output interface changes, we
2069 * need to re-emit various packets.
2070 */
2071static void
2072update_last_vue_map(struct iris_context *ice,
2073                    struct iris_compiled_shader *shader)
2074{
2075   struct brw_vue_prog_data *vue_prog_data = (void *) shader->prog_data;
2076   struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
2077   struct brw_vue_map *old_map = !ice->shaders.last_vue_shader ? NULL :
2078      &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
2079   const uint64_t changed_slots =
2080      (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2081
2082   if (changed_slots & VARYING_BIT_VIEWPORT) {
2083      ice->state.num_viewports =
2084         (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2085      ice->state.dirty |= IRIS_DIRTY_CLIP |
2086                          IRIS_DIRTY_SF_CL_VIEWPORT |
2087                          IRIS_DIRTY_CC_VIEWPORT |
2088                          IRIS_DIRTY_SCISSOR_RECT;
2089      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2090         ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2091   }
2092
2093   if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2094      ice->state.dirty |= IRIS_DIRTY_SBE;
2095   }
2096
2097   iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2098}
2099
2100static void
2101iris_update_pull_constant_descriptors(struct iris_context *ice,
2102                                      gl_shader_stage stage)
2103{
2104   struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2105
2106   if (!shader || !shader->prog_data->has_ubo_pull)
2107      return;
2108
2109   struct iris_shader_state *shs = &ice->state.shaders[stage];
2110   bool any_new_descriptors =
2111      shader->num_system_values > 0 && shs->sysvals_need_upload;
2112
2113   unsigned bound_cbufs = shs->bound_cbufs;
2114
2115   while (bound_cbufs) {
2116      const int i = u_bit_scan(&bound_cbufs);
2117      struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2118      struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2119      if (!surf_state->res && cbuf->buffer) {
2120         iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2121                                         ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2122         any_new_descriptors = true;
2123      }
2124   }
2125
2126   if (any_new_descriptors)
2127      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2128}
2129
2130/**
2131 * Update the current shader variants for the given state.
2132 *
2133 * This should be called on every draw call to ensure that the correct
2134 * shaders are bound.  It will also flag any dirty state triggered by
2135 * swapping out those shaders.
2136 */
2137void
2138iris_update_compiled_shaders(struct iris_context *ice)
2139{
2140   const uint64_t stage_dirty = ice->state.stage_dirty;
2141
2142   if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2143                      IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2144       struct iris_uncompiled_shader *tes =
2145          ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2146       if (tes) {
2147          iris_update_compiled_tcs(ice);
2148          iris_update_compiled_tes(ice);
2149       } else {
2150         iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2151         iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2152          ice->state.stage_dirty |=
2153             IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2154             IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2155             IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2156
2157          if (ice->shaders.urb.constrained)
2158             ice->state.dirty |= IRIS_DIRTY_URB;
2159       }
2160   }
2161
2162   if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2163      iris_update_compiled_vs(ice);
2164   if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2165      iris_update_compiled_gs(ice);
2166
2167   if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2168                      IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2169      const struct iris_compiled_shader *gs =
2170         ice->shaders.prog[MESA_SHADER_GEOMETRY];
2171      const struct iris_compiled_shader *tes =
2172         ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2173
2174      bool points_or_lines = false;
2175
2176      if (gs) {
2177         const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;
2178         points_or_lines =
2179            gs_prog_data->output_topology == _3DPRIM_POINTLIST ||
2180            gs_prog_data->output_topology == _3DPRIM_LINESTRIP;
2181      } else if (tes) {
2182         const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;
2183         points_or_lines =
2184            tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||
2185            tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
2186      }
2187
2188      if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2189         /* Outbound to XY Clip enables */
2190         ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2191         ice->state.dirty |= IRIS_DIRTY_CLIP;
2192      }
2193   }
2194
2195   gl_shader_stage last_stage = last_vue_stage(ice);
2196   struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2197   struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2198   update_last_vue_map(ice, shader);
2199   if (ice->state.streamout != shader->streamout) {
2200      ice->state.streamout = shader->streamout;
2201      ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2202   }
2203
2204   if (ice->state.streamout_active) {
2205      for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2206         struct iris_stream_output_target *so =
2207            (void *) ice->state.so_target[i];
2208         if (so)
2209            so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2210      }
2211   }
2212
2213   if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2214      iris_update_compiled_fs(ice);
2215
2216   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2217      if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2218         iris_update_pull_constant_descriptors(ice, i);
2219   }
2220}
2221
2222static void
2223iris_compile_cs(struct iris_screen *screen,
2224                struct u_upload_mgr *uploader,
2225                struct pipe_debug_callback *dbg,
2226                struct iris_uncompiled_shader *ish,
2227                struct iris_compiled_shader *shader)
2228{
2229   const struct brw_compiler *compiler = screen->compiler;
2230   void *mem_ctx = ralloc_context(NULL);
2231   struct brw_cs_prog_data *cs_prog_data =
2232      rzalloc(mem_ctx, struct brw_cs_prog_data);
2233   struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
2234   enum brw_param_builtin *system_values;
2235   const struct intel_device_info *devinfo = &screen->devinfo;
2236   unsigned num_system_values;
2237   unsigned num_cbufs;
2238
2239   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2240   const struct iris_cs_prog_key *const key = &shader->key.cs;
2241
2242   NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
2243
2244   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data,
2245                       ish->kernel_input_size,
2246                       &system_values, &num_system_values, &num_cbufs);
2247
2248   struct iris_binding_table bt;
2249   iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2250                            num_system_values, num_cbufs);
2251
2252   struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(devinfo, key);
2253
2254   struct brw_compile_cs_params params = {
2255      .nir = nir,
2256      .key = &brw_key,
2257      .prog_data = cs_prog_data,
2258      .log_data = dbg,
2259   };
2260
2261   const unsigned *program = brw_compile_cs(compiler, mem_ctx, &params);
2262   if (program == NULL) {
2263      dbg_printf("Failed to compile compute shader: %s\n", params.error_str);
2264
2265      shader->compilation_failed = true;
2266      util_queue_fence_signal(&shader->ready);
2267
2268      return;
2269   }
2270
2271   shader->compilation_failed = false;
2272
2273   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
2274
2275   iris_finalize_program(shader, prog_data, NULL, system_values,
2276                         num_system_values, ish->kernel_input_size, num_cbufs,
2277                         &bt);
2278
2279   iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
2280                      sizeof(*key), key, program);
2281
2282   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2283
2284   ralloc_free(mem_ctx);
2285}
2286
2287static void
2288iris_update_compiled_cs(struct iris_context *ice)
2289{
2290   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
2291   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2292   struct iris_uncompiled_shader *ish =
2293      ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
2294
2295   struct iris_cs_prog_key key = { KEY_ID(base) };
2296   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2297   screen->vtbl.populate_cs_key(ice, &key);
2298
2299   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
2300   bool added;
2301   struct iris_compiled_shader *shader =
2302      find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
2303                          sizeof(key), &added);
2304
2305   if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2306                                          &key, sizeof(key))) {
2307      iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
2308   }
2309
2310   if (shader->compilation_failed)
2311      shader = NULL;
2312
2313   if (old != shader) {
2314      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
2315                                    shader);
2316      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
2317                                IRIS_STAGE_DIRTY_BINDINGS_CS |
2318                                IRIS_STAGE_DIRTY_CONSTANTS_CS;
2319      shs->sysvals_need_upload = true;
2320   }
2321}
2322
2323void
2324iris_update_compiled_compute_shader(struct iris_context *ice)
2325{
2326   if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
2327      iris_update_compiled_cs(ice);
2328
2329   if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
2330      iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
2331}
2332
2333void
2334iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
2335                               unsigned threads,
2336                               uint32_t *dst)
2337{
2338   assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);
2339   assert(cs_prog_data->push.cross_thread.size == 0);
2340   assert(cs_prog_data->push.per_thread.dwords == 1);
2341   assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
2342   for (unsigned t = 0; t < threads; t++)
2343      dst[8 * t] = t;
2344}
2345
2346/**
2347 * Allocate scratch BOs as needed for the given per-thread size and stage.
2348 */
2349struct iris_bo *
2350iris_get_scratch_space(struct iris_context *ice,
2351                       unsigned per_thread_scratch,
2352                       gl_shader_stage stage)
2353{
2354   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2355   struct iris_bufmgr *bufmgr = screen->bufmgr;
2356   const struct intel_device_info *devinfo = &screen->devinfo;
2357
2358   unsigned encoded_size = ffs(per_thread_scratch) - 11;
2359   assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
2360   assert(per_thread_scratch == 1 << (encoded_size + 10));
2361
2362   /* On GFX version 12.5, scratch access changed to a surface-based model.
2363    * Instead of each shader type having its own layout based on IDs passed
2364    * from the relevant fixed-function unit, all scratch access is based on
2365    * thread IDs like it always has been for compute.
2366    */
2367   if (devinfo->verx10 >= 125)
2368      stage = MESA_SHADER_COMPUTE;
2369
2370   struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
2371
2372   if (!*bop) {
2373      assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
2374      uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
2375      *bop = iris_bo_alloc(bufmgr, "scratch", size, 1, IRIS_MEMZONE_SHADER, 0);
2376   }
2377
2378   return *bop;
2379}
2380
2381const struct iris_state_ref *
2382iris_get_scratch_surf(struct iris_context *ice,
2383                      unsigned per_thread_scratch)
2384{
2385   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2386   ASSERTED const struct intel_device_info *devinfo = &screen->devinfo;
2387
2388   assert(devinfo->verx10 >= 125);
2389
2390   unsigned encoded_size = ffs(per_thread_scratch) - 11;
2391   assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
2392   assert(per_thread_scratch == 1 << (encoded_size + 10));
2393
2394   struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
2395
2396   if (ref->res)
2397      return ref;
2398
2399   struct iris_bo *scratch_bo =
2400      iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
2401
2402   void *map = upload_state(ice->state.bindless_uploader, ref,
2403                            screen->isl_dev.ss.size, 64);
2404
2405   isl_buffer_fill_state(&screen->isl_dev, map,
2406                         .address = scratch_bo->address,
2407                         .size_B = scratch_bo->size,
2408                         .format = ISL_FORMAT_RAW,
2409                         .swizzle = ISL_SWIZZLE_IDENTITY,
2410                         .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
2411                         .stride_B = per_thread_scratch,
2412                         .is_scratch = true);
2413
2414   return ref;
2415}
2416
2417/* ------------------------------------------------------------------- */
2418
2419/**
2420 * The pipe->create_[stage]_state() driver hooks.
2421 *
2422 * Performs basic NIR preprocessing, records any state dependencies, and
2423 * returns an iris_uncompiled_shader as the Gallium CSO.
2424 *
2425 * Actual shader compilation to assembly happens later, at first use.
2426 */
2427static void *
2428iris_create_uncompiled_shader(struct iris_screen *screen,
2429                              nir_shader *nir,
2430                              const struct pipe_stream_output_info *so_info)
2431{
2432   struct iris_uncompiled_shader *ish =
2433      calloc(1, sizeof(struct iris_uncompiled_shader));
2434   if (!ish)
2435      return NULL;
2436
2437   pipe_reference_init(&ish->ref, 1);
2438   list_inithead(&ish->variants);
2439   simple_mtx_init(&ish->lock, mtx_plain);
2440
2441   ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
2442
2443   ish->program_id = get_new_program_id(screen);
2444   ish->nir = nir;
2445   if (so_info) {
2446      memcpy(&ish->stream_output, so_info, sizeof(*so_info));
2447      update_so_info(&ish->stream_output, nir->info.outputs_written);
2448   }
2449
2450   if (screen->disk_cache) {
2451      /* Serialize the NIR to a binary blob that we can hash for the disk
2452       * cache.  Drop unnecessary information (like variable names)
2453       * so the serialized NIR is smaller, and also to let us detect more
2454       * isomorphic shaders when hashing, increasing cache hits.
2455       */
2456      struct blob blob;
2457      blob_init(&blob);
2458      nir_serialize(&blob, nir, true);
2459      _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
2460      blob_finish(&blob);
2461   }
2462
2463   return ish;
2464}
2465
2466static void *
2467iris_create_compute_state(struct pipe_context *ctx,
2468                          const struct pipe_compute_state *state)
2469{
2470   struct iris_context *ice = (void *) ctx;
2471   struct iris_screen *screen = (void *) ctx->screen;
2472   struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
2473   const nir_shader_compiler_options *options =
2474      screen->compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions;
2475
2476   nir_shader *nir;
2477   switch (state->ir_type) {
2478   case PIPE_SHADER_IR_NIR:
2479      nir = (void *)state->prog;
2480      break;
2481
2482   case PIPE_SHADER_IR_NIR_SERIALIZED: {
2483      struct blob_reader reader;
2484      const struct pipe_binary_program_header *hdr = state->prog;
2485      blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
2486      nir = nir_deserialize(NULL, options, &reader);
2487      break;
2488   }
2489
2490   default:
2491      unreachable("Unsupported IR");
2492   }
2493
2494   /* Most of iris doesn't really care about the difference between compute
2495    * shaders and kernels.  We also tend to hard-code COMPUTE everywhere so
2496    * it's way easier if we just normalize to COMPUTE here.
2497    */
2498   assert(nir->info.stage == MESA_SHADER_COMPUTE ||
2499          nir->info.stage == MESA_SHADER_KERNEL);
2500   nir->info.stage = MESA_SHADER_COMPUTE;
2501
2502   struct iris_uncompiled_shader *ish =
2503      iris_create_uncompiled_shader(screen, nir, NULL);
2504   ish->kernel_input_size = state->req_input_mem;
2505   ish->kernel_shared_size = state->req_local_mem;
2506
2507   // XXX: disallow more than 64KB of shared variables
2508
2509   if (screen->precompile) {
2510      struct iris_cs_prog_key key = { KEY_ID(base) };
2511
2512      struct iris_compiled_shader *shader =
2513         iris_create_shader_variant(screen, NULL, IRIS_CACHE_CS,
2514                                    sizeof(key), &key);
2515
2516      /* Append our new variant to the shader's variant list. */
2517      list_addtail(&shader->link, &ish->variants);
2518
2519      if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
2520                                    &key, sizeof(key))) {
2521         iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
2522      }
2523   }
2524
2525   return ish;
2526}
2527
2528static void
2529iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
2530{
2531   const struct iris_threaded_compile_job *job =
2532      (struct iris_threaded_compile_job *) _job;
2533
2534   struct iris_screen *screen = job->screen;
2535   struct u_upload_mgr *uploader = job->uploader;
2536   struct pipe_debug_callback *dbg = job->dbg;
2537   struct iris_uncompiled_shader *ish = job->ish;
2538   struct iris_compiled_shader *shader = job->shader;
2539
2540   switch (ish->nir->info.stage) {
2541   case MESA_SHADER_VERTEX:
2542      iris_compile_vs(screen, uploader, dbg, ish, shader);
2543      break;
2544   case MESA_SHADER_TESS_CTRL:
2545      iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
2546      break;
2547   case MESA_SHADER_TESS_EVAL:
2548      iris_compile_tes(screen, uploader, dbg, ish, shader);
2549      break;
2550   case MESA_SHADER_GEOMETRY:
2551      iris_compile_gs(screen, uploader, dbg, ish, shader);
2552      break;
2553   case MESA_SHADER_FRAGMENT:
2554      iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
2555      break;
2556
2557   default:
2558      unreachable("Invalid shader stage.");
2559   }
2560}
2561
2562static void *
2563iris_create_shader_state(struct pipe_context *ctx,
2564                         const struct pipe_shader_state *state)
2565{
2566   struct iris_context *ice = (void *) ctx;
2567   struct iris_screen *screen = (void *) ctx->screen;
2568   struct nir_shader *nir;
2569
2570   if (state->type == PIPE_SHADER_IR_TGSI)
2571      nir = tgsi_to_nir(state->tokens, ctx->screen, false);
2572   else
2573      nir = state->ir.nir;
2574
2575   const struct shader_info *const info = &nir->info;
2576   struct iris_uncompiled_shader *ish =
2577      iris_create_uncompiled_shader(screen, nir, &state->stream_output);
2578
2579   union iris_any_prog_key key;
2580   unsigned key_size = 0;
2581
2582   memset(&key, 0, sizeof(key));
2583
2584   switch (info->stage) {
2585   case MESA_SHADER_VERTEX:
2586      /* User clip planes */
2587      if (info->clip_distance_array_size == 0)
2588         ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2589
2590      key.vs = (struct iris_vs_prog_key) { KEY_ID(vue.base) };
2591      key_size = sizeof(key.vs);
2592      break;
2593
2594   case MESA_SHADER_TESS_CTRL: {
2595      const unsigned _GL_TRIANGLES = 0x0004;
2596
2597      key.tcs = (struct iris_tcs_prog_key) {
2598         KEY_ID(vue.base),
2599         // XXX: make sure the linker fills this out from the TES...
2600         .tes_primitive_mode =
2601         info->tess.primitive_mode ? info->tess.primitive_mode
2602                                   : _GL_TRIANGLES,
2603         .outputs_written = info->outputs_written,
2604         .patch_outputs_written = info->patch_outputs_written,
2605      };
2606
2607      /* 8_PATCH mode needs the key to contain the input patch dimensionality.
2608       * We don't have that information, so we randomly guess that the input
2609       * and output patches are the same size.  This is a bad guess, but we
2610       * can't do much better.
2611       */
2612      if (screen->compiler->use_tcs_8_patch)
2613         key.tcs.input_vertices = info->tess.tcs_vertices_out;
2614
2615      key_size = sizeof(key.tcs);
2616      break;
2617   }
2618
2619   case MESA_SHADER_TESS_EVAL:
2620      /* User clip planes */
2621      if (info->clip_distance_array_size == 0)
2622         ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2623
2624      key.tes = (struct iris_tes_prog_key) {
2625         KEY_ID(vue.base),
2626         // XXX: not ideal, need TCS output/TES input unification
2627         .inputs_read = info->inputs_read,
2628         .patch_inputs_read = info->patch_inputs_read,
2629      };
2630
2631      key_size = sizeof(key.tes);
2632      break;
2633
2634   case MESA_SHADER_GEOMETRY:
2635      /* User clip planes */
2636      if (info->clip_distance_array_size == 0)
2637         ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2638
2639      key.gs = (struct iris_gs_prog_key) { KEY_ID(vue.base) };
2640      key_size = sizeof(key.gs);
2641      break;
2642
2643   case MESA_SHADER_FRAGMENT:
2644      ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
2645                  (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
2646                  (1ull << IRIS_NOS_RASTERIZER) |
2647                  (1ull << IRIS_NOS_BLEND);
2648
2649      /* The program key needs the VUE map if there are > 16 inputs */
2650      if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
2651         ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
2652      }
2653
2654      const uint64_t color_outputs = info->outputs_written &
2655         ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
2656           BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
2657           BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
2658
2659      bool can_rearrange_varyings =
2660         util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
2661
2662      const struct intel_device_info *devinfo = &screen->devinfo;
2663
2664      key.fs = (struct iris_fs_prog_key) {
2665         KEY_ID(base),
2666         .nr_color_regions = util_bitcount(color_outputs),
2667         .coherent_fb_fetch = devinfo->ver >= 9,
2668         .input_slots_valid =
2669            can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
2670      };
2671
2672      key_size = sizeof(key.fs);
2673      break;
2674
2675   default:
2676      unreachable("Invalid shader stage.");
2677   }
2678
2679   if (screen->precompile) {
2680      struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
2681
2682      struct iris_compiled_shader *shader =
2683         iris_create_shader_variant(screen, NULL,
2684                                    (enum iris_program_cache_id) info->stage,
2685                                    key_size, &key);
2686
2687      /* Append our new variant to the shader's variant list. */
2688      list_addtail(&shader->link, &ish->variants);
2689
2690      if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
2691                                    &key, key_size)) {
2692         assert(!util_queue_fence_is_signalled(&shader->ready));
2693
2694         struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
2695
2696         job->screen = screen;
2697         job->uploader = uploader;
2698         job->ish = ish;
2699         job->shader = shader;
2700
2701         iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
2702                               iris_compile_shader);
2703      }
2704   }
2705
2706   return ish;
2707}
2708
2709/**
2710 * Called when the refcount on the iris_uncompiled_shader reaches 0.
2711 *
2712 * Frees the iris_uncompiled_shader.
2713 *
2714 * \sa iris_delete_shader_state
2715 */
2716void
2717iris_destroy_shader_state(struct pipe_context *ctx, void *state)
2718{
2719   struct iris_uncompiled_shader *ish = state;
2720
2721   /* No need to take ish->lock; we hold the last reference to ish */
2722   list_for_each_entry_safe(struct iris_compiled_shader, shader,
2723                            &ish->variants, link) {
2724      list_del(&shader->link);
2725
2726      iris_shader_variant_reference(&shader, NULL);
2727   }
2728
2729   simple_mtx_destroy(&ish->lock);
2730   util_queue_fence_destroy(&ish->ready);
2731
2732   ralloc_free(ish->nir);
2733   free(ish);
2734}
2735
2736/**
2737 * The pipe->delete_[stage]_state() driver hooks.
2738 *
2739 * \sa iris_destroy_shader_state
2740 */
2741static void
2742iris_delete_shader_state(struct pipe_context *ctx, void *state)
2743{
2744   struct iris_uncompiled_shader *ish = state;
2745   struct iris_context *ice = (void *) ctx;
2746
2747   const gl_shader_stage stage = ish->nir->info.stage;
2748
2749   if (ice->shaders.uncompiled[stage] == ish) {
2750      ice->shaders.uncompiled[stage] = NULL;
2751      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2752   }
2753
2754   if (pipe_reference(&ish->ref, NULL))
2755      iris_destroy_shader_state(ctx, state);
2756}
2757
2758/**
2759 * The pipe->bind_[stage]_state() driver hook.
2760 *
2761 * Binds an uncompiled shader as the current one for a particular stage.
2762 * Updates dirty tracking to account for the shader's NOS.
2763 */
2764static void
2765bind_shader_state(struct iris_context *ice,
2766                  struct iris_uncompiled_shader *ish,
2767                  gl_shader_stage stage)
2768{
2769   uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2770   const uint64_t nos = ish ? ish->nos : 0;
2771
2772   const struct shader_info *old_info = iris_get_shader_info(ice, stage);
2773   const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
2774
2775   if ((old_info ? BITSET_LAST_BIT(old_info->textures_used) : 0) !=
2776       (new_info ? BITSET_LAST_BIT(new_info->textures_used) : 0)) {
2777      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
2778   }
2779
2780   ice->shaders.uncompiled[stage] = ish;
2781   ice->state.stage_dirty |= stage_dirty_bit;
2782
2783   /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
2784    * (or that they no longer need to do so).
2785    */
2786   for (int i = 0; i < IRIS_NOS_COUNT; i++) {
2787      if (nos & (1 << i))
2788         ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
2789      else
2790         ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
2791   }
2792}
2793
2794static void
2795iris_bind_vs_state(struct pipe_context *ctx, void *state)
2796{
2797   struct iris_context *ice = (struct iris_context *)ctx;
2798   struct iris_uncompiled_shader *ish = state;
2799
2800   if (ish) {
2801      const struct shader_info *info = &ish->nir->info;
2802      if (ice->state.window_space_position != info->vs.window_space_position) {
2803         ice->state.window_space_position = info->vs.window_space_position;
2804
2805         ice->state.dirty |= IRIS_DIRTY_CLIP |
2806                             IRIS_DIRTY_RASTER |
2807                             IRIS_DIRTY_CC_VIEWPORT;
2808      }
2809
2810      const bool uses_draw_params =
2811         BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
2812         BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
2813      const bool uses_derived_draw_params =
2814         BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
2815         BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
2816      const bool needs_sgvs_element = uses_draw_params ||
2817         BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
2818         BITSET_TEST(info->system_values_read,
2819                     SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
2820
2821      if (ice->state.vs_uses_draw_params != uses_draw_params ||
2822          ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
2823          ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag) {
2824         ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
2825                             IRIS_DIRTY_VERTEX_ELEMENTS;
2826      }
2827
2828      ice->state.vs_uses_draw_params = uses_draw_params;
2829      ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
2830      ice->state.vs_needs_sgvs_element = needs_sgvs_element;
2831      ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
2832   }
2833
2834   bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
2835}
2836
2837static void
2838iris_bind_tcs_state(struct pipe_context *ctx, void *state)
2839{
2840   bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
2841}
2842
2843static void
2844iris_bind_tes_state(struct pipe_context *ctx, void *state)
2845{
2846   struct iris_context *ice = (struct iris_context *)ctx;
2847
2848   /* Enabling/disabling optional stages requires a URB reconfiguration. */
2849   if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
2850      ice->state.dirty |= IRIS_DIRTY_URB;
2851
2852   bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
2853}
2854
2855static void
2856iris_bind_gs_state(struct pipe_context *ctx, void *state)
2857{
2858   struct iris_context *ice = (struct iris_context *)ctx;
2859
2860   /* Enabling/disabling optional stages requires a URB reconfiguration. */
2861   if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
2862      ice->state.dirty |= IRIS_DIRTY_URB;
2863
2864   bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
2865}
2866
2867static void
2868iris_bind_fs_state(struct pipe_context *ctx, void *state)
2869{
2870   struct iris_context *ice = (struct iris_context *) ctx;
2871   struct iris_screen *screen = (struct iris_screen *) ctx->screen;
2872   const struct intel_device_info *devinfo = &screen->devinfo;
2873   struct iris_uncompiled_shader *old_ish =
2874      ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2875   struct iris_uncompiled_shader *new_ish = state;
2876
2877   const unsigned color_bits =
2878      BITFIELD64_BIT(FRAG_RESULT_COLOR) |
2879      BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
2880
2881   /* Fragment shader outputs influence HasWriteableRT */
2882   if (!old_ish || !new_ish ||
2883       (old_ish->nir->info.outputs_written & color_bits) !=
2884       (new_ish->nir->info.outputs_written & color_bits))
2885      ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
2886
2887   if (devinfo->ver == 8)
2888      ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
2889
2890   bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
2891}
2892
2893static void
2894iris_bind_cs_state(struct pipe_context *ctx, void *state)
2895{
2896   bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
2897}
2898
2899static char *
2900iris_finalize_nir(struct pipe_screen *_screen, void *nirptr)
2901{
2902   struct iris_screen *screen = (struct iris_screen *)_screen;
2903   struct nir_shader *nir = (struct nir_shader *) nirptr;
2904   const struct intel_device_info *devinfo = &screen->devinfo;
2905
2906   NIR_PASS_V(nir, iris_fix_edge_flags);
2907
2908   brw_preprocess_nir(screen->compiler, nir, NULL);
2909
2910   NIR_PASS_V(nir, brw_nir_lower_storage_image, devinfo);
2911   NIR_PASS_V(nir, iris_lower_storage_image_derefs);
2912
2913   nir_sweep(nir);
2914
2915   return NULL;
2916}
2917
2918static void
2919iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
2920                                     unsigned max_threads)
2921{
2922   struct iris_screen *screen = (struct iris_screen *) pscreen;
2923   util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads);
2924}
2925
2926static bool
2927iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
2928                                             void *v_shader,
2929                                             enum pipe_shader_type p_stage)
2930{
2931   struct iris_screen *screen = (struct iris_screen *) pscreen;
2932
2933   /* Threaded compilation is only used for the precompile.  If precompile is
2934    * disabled, threaded compilation is "done."
2935    */
2936   if (!screen->precompile)
2937      return true;
2938
2939   struct iris_uncompiled_shader *ish = v_shader;
2940
2941   /* When precompile is enabled, the first entry is the precompile variant.
2942    * Check the ready fence of the precompile variant.
2943    */
2944   struct iris_compiled_shader *first =
2945      list_first_entry(&ish->variants, struct iris_compiled_shader, link);
2946
2947   return util_queue_fence_is_signalled(&first->ready);
2948}
2949
2950void
2951iris_init_screen_program_functions(struct pipe_screen *pscreen)
2952{
2953   pscreen->is_parallel_shader_compilation_finished =
2954      iris_is_parallel_shader_compilation_finished;
2955   pscreen->set_max_shader_compiler_threads =
2956      iris_set_max_shader_compiler_threads;
2957   pscreen->finalize_nir = iris_finalize_nir;
2958}
2959
2960void
2961iris_init_program_functions(struct pipe_context *ctx)
2962{
2963   ctx->create_vs_state  = iris_create_shader_state;
2964   ctx->create_tcs_state = iris_create_shader_state;
2965   ctx->create_tes_state = iris_create_shader_state;
2966   ctx->create_gs_state  = iris_create_shader_state;
2967   ctx->create_fs_state  = iris_create_shader_state;
2968   ctx->create_compute_state = iris_create_compute_state;
2969
2970   ctx->delete_vs_state  = iris_delete_shader_state;
2971   ctx->delete_tcs_state = iris_delete_shader_state;
2972   ctx->delete_tes_state = iris_delete_shader_state;
2973   ctx->delete_gs_state  = iris_delete_shader_state;
2974   ctx->delete_fs_state  = iris_delete_shader_state;
2975   ctx->delete_compute_state = iris_delete_shader_state;
2976
2977   ctx->bind_vs_state  = iris_bind_vs_state;
2978   ctx->bind_tcs_state = iris_bind_tcs_state;
2979   ctx->bind_tes_state = iris_bind_tes_state;
2980   ctx->bind_gs_state  = iris_bind_gs_state;
2981   ctx->bind_fs_state  = iris_bind_fs_state;
2982   ctx->bind_compute_state = iris_bind_cs_state;
2983}
2984