1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32#include <pthread.h>
33#include "main/glspirv.h"
34#include "program/prog_parameter.h"
35#include "program/prog_print.h"
36#include "program/prog_to_nir.h"
37#include "program/program.h"
38#include "program/programopt.h"
39#include "tnl/tnl.h"
40#include "util/ralloc.h"
41#include "compiler/glsl/ir.h"
42#include "compiler/glsl/program.h"
43#include "compiler/glsl/gl_nir.h"
44#include "compiler/glsl/glsl_to_nir.h"
45
46#include "brw_program.h"
47#include "brw_context.h"
48#include "compiler/brw_nir.h"
49#include "brw_defines.h"
50#include "brw_batch.h"
51
52#include "brw_cs.h"
53#include "brw_gs.h"
54#include "brw_vs.h"
55#include "brw_wm.h"
56#include "brw_state.h"
57
58#include "main/shaderapi.h"
59#include "main/shaderobj.h"
60
61static bool
62brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
63{
64   if (is_scalar) {
65      nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
66                               type_size_scalar_bytes);
67      return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
68   } else {
69      nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
70                               type_size_vec4_bytes);
71      return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
72   }
73}
74
75static struct gl_program *brw_new_program(struct gl_context *ctx,
76                                          gl_shader_stage stage,
77                                          GLuint id, bool is_arb_asm);
78
79nir_shader *
80brw_create_nir(struct brw_context *brw,
81               const struct gl_shader_program *shader_prog,
82               struct gl_program *prog,
83               gl_shader_stage stage,
84               bool is_scalar)
85{
86   const struct intel_device_info *devinfo = &brw->screen->devinfo;
87   struct gl_context *ctx = &brw->ctx;
88   const nir_shader_compiler_options *options =
89      ctx->Const.ShaderCompilerOptions[stage].NirOptions;
90   nir_shader *nir;
91
92   /* First, lower the GLSL/Mesa IR or SPIR-V to NIR */
93   if (shader_prog) {
94      if (shader_prog->data->spirv) {
95         nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options);
96      } else {
97         nir = glsl_to_nir(ctx, shader_prog, stage, options);
98
99         /* Remap the locations to slots so those requiring two slots will
100          * occupy two locations. For instance, if we have in the IR code a
101          * dvec3 attr0 in location 0 and vec4 attr1 in location 1, in NIR attr0
102          * will use locations/slots 0 and 1, and attr1 will use location/slot 2
103          */
104         if (nir->info.stage == MESA_SHADER_VERTEX)
105            nir_remap_dual_slot_attributes(nir, &prog->DualSlotInputs);
106      }
107      assert (nir);
108
109      nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out,
110                                NULL);
111      nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir");
112      NIR_PASS_V(nir, nir_lower_io_to_temporaries,
113                 nir_shader_get_entrypoint(nir), true, false);
114   } else {
115      nir = prog_to_nir(prog, options);
116      NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
117   }
118   nir_validate_shader(nir, "before brw_preprocess_nir");
119
120   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
121
122   if (!ctx->SoftFP64 && ((nir->info.bit_sizes_int | nir->info.bit_sizes_float) & 64) &&
123       (options->lower_doubles_options & nir_lower_fp64_full_software)) {
124      ctx->SoftFP64 = glsl_float64_funcs_to_nir(ctx, options);
125   }
126
127   brw_preprocess_nir(brw->screen->compiler, nir, ctx->SoftFP64);
128
129   if (stage == MESA_SHADER_TESS_CTRL) {
130      /* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gfx8+. */
131      static const gl_state_index16 tokens[STATE_LENGTH] =
132         { STATE_TCS_PATCH_VERTICES_IN };
133      nir_lower_patch_vertices(nir, 0, devinfo->ver >= 8 ? tokens : NULL);
134   }
135
136   if (stage == MESA_SHADER_TESS_EVAL) {
137      /* Lower gl_PatchVerticesIn to a constant if we have a TCS, or
138       * a uniform if we don't.
139       */
140      struct gl_linked_shader *tcs =
141         shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
142      uint32_t static_patch_vertices =
143         tcs ? tcs->Program->nir->info.tess.tcs_vertices_out : 0;
144      static const gl_state_index16 tokens[STATE_LENGTH] =
145         { STATE_TES_PATCH_VERTICES_IN };
146      nir_lower_patch_vertices(nir, static_patch_vertices, tokens);
147   }
148
149   if (stage == MESA_SHADER_FRAGMENT) {
150      static const struct nir_lower_wpos_ytransform_options wpos_options = {
151         .state_tokens = {STATE_FB_WPOS_Y_TRANSFORM, 0, 0},
152         .fs_coord_pixel_center_integer = 1,
153         .fs_coord_origin_upper_left = 1,
154      };
155
156      bool progress = false;
157      NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
158      if (progress) {
159         _mesa_add_state_reference(prog->Parameters,
160                                   wpos_options.state_tokens);
161      }
162   }
163
164   return nir;
165}
166
167static void
168shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
169{
170   assert(glsl_type_is_vector_or_scalar(type));
171
172   uint32_t comp_size = glsl_type_is_boolean(type)
173      ? 4 : glsl_get_bit_size(type) / 8;
174   unsigned length = glsl_get_vector_elements(type);
175   *size = comp_size * length,
176   *align = comp_size * (length == 3 ? 4 : length);
177}
178
179void
180brw_nir_lower_resources(nir_shader *nir, struct gl_shader_program *shader_prog,
181                        struct gl_program *prog,
182                        const struct intel_device_info *devinfo)
183{
184   NIR_PASS_V(nir, brw_nir_lower_uniforms, nir->options->lower_to_scalar);
185   NIR_PASS_V(prog->nir, gl_nir_lower_samplers, shader_prog);
186   BITSET_COPY(prog->info.textures_used, prog->nir->info.textures_used);
187   BITSET_COPY(prog->info.textures_used_by_txf, prog->nir->info.textures_used_by_txf);
188
189   NIR_PASS_V(prog->nir, brw_nir_lower_storage_image, devinfo);
190
191   if (prog->nir->info.stage == MESA_SHADER_COMPUTE &&
192       shader_prog->data->spirv) {
193      NIR_PASS_V(prog->nir, nir_lower_vars_to_explicit_types,
194                 nir_var_mem_shared, shared_type_info);
195      NIR_PASS_V(prog->nir, nir_lower_explicit_io,
196                 nir_var_mem_shared, nir_address_format_32bit_offset);
197   }
198
199   NIR_PASS_V(prog->nir, gl_nir_lower_buffers, shader_prog);
200   /* Do a round of constant folding to clean up address calculations */
201   NIR_PASS_V(prog->nir, nir_opt_constant_folding);
202}
203
204void
205brw_shader_gather_info(nir_shader *nir, struct gl_program *prog)
206{
207   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
208
209   /* Copy the info we just generated back into the gl_program */
210   const char *prog_name = prog->info.name;
211   const char *prog_label = prog->info.label;
212   prog->info = nir->info;
213   prog->info.name = prog_name;
214   prog->info.label = prog_label;
215}
216
217static unsigned
218get_new_program_id(struct brw_screen *screen)
219{
220   return p_atomic_inc_return(&screen->program_id);
221}
222
223static struct gl_program *
224brw_new_program(struct gl_context *ctx,
225                gl_shader_stage stage,
226                GLuint id, bool is_arb_asm)
227{
228   struct brw_context *brw = brw_context(ctx);
229   struct brw_program *prog = rzalloc(NULL, struct brw_program);
230
231   if (prog) {
232      prog->id = get_new_program_id(brw->screen);
233
234      return _mesa_init_gl_program(&prog->program, stage, id, is_arb_asm);
235   }
236
237   return NULL;
238}
239
240static void
241brw_delete_program(struct gl_context *ctx, struct gl_program *prog)
242{
243   struct brw_context *brw = brw_context(ctx);
244
245   /* Beware!  prog's refcount has reached zero, and it's about to be freed.
246    *
247    * In brw_upload_pipeline_state(), we compare brw->programs[i] to
248    * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
249    * pointer has changed.
250    *
251    * We cannot leave brw->programs[i] as a dangling pointer to the dead
252    * program.  malloc() may allocate the same memory for a new gl_program,
253    * causing us to see matching pointers...but totally different programs.
254    *
255    * We cannot set brw->programs[i] to NULL, either.  If we've deleted the
256    * active program, Mesa may set ctx->FooProgram._Current to NULL.  That
257    * would cause us to see matching pointers (NULL == NULL), and fail to
258    * detect that a program has changed since our last draw.
259    *
260    * So, set it to a bogus gl_program pointer that will never match,
261    * causing us to properly reevaluate the state on our next draw.
262    *
263    * Getting this wrong causes heisenbugs which are very hard to catch,
264    * as you need a very specific allocation pattern to hit the problem.
265    */
266   static const struct gl_program deleted_program;
267
268   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
269      if (brw->programs[i] == prog)
270         brw->programs[i] = (struct gl_program *) &deleted_program;
271   }
272
273   _mesa_delete_program( ctx, prog );
274}
275
276
277static GLboolean
278brw_program_string_notify(struct gl_context *ctx,
279                          GLenum target,
280                          struct gl_program *prog)
281{
282   assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
283
284   struct brw_context *brw = brw_context(ctx);
285   const struct brw_compiler *compiler = brw->screen->compiler;
286
287   switch (target) {
288   case GL_FRAGMENT_PROGRAM_ARB: {
289      struct brw_program *newFP = brw_program(prog);
290      const struct brw_program *curFP =
291         brw_program_const(brw->programs[MESA_SHADER_FRAGMENT]);
292
293      if (newFP == curFP)
294         brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
295      _mesa_program_fragment_position_to_sysval(&newFP->program);
296      newFP->id = get_new_program_id(brw->screen);
297
298      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
299
300      brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
301
302      brw_shader_gather_info(prog->nir, prog);
303
304      brw_fs_precompile(ctx, prog);
305      break;
306   }
307   case GL_VERTEX_PROGRAM_ARB: {
308      struct brw_program *newVP = brw_program(prog);
309      const struct brw_program *curVP =
310         brw_program_const(brw->programs[MESA_SHADER_VERTEX]);
311
312      if (newVP == curVP)
313         brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
314      if (newVP->program.arb.IsPositionInvariant) {
315         _mesa_insert_mvp_code(ctx, &newVP->program);
316      }
317      newVP->id = get_new_program_id(brw->screen);
318
319      /* Also tell tnl about it:
320       */
321      _tnl_program_string(ctx, target, prog);
322
323      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
324                                 compiler->scalar_stage[MESA_SHADER_VERTEX]);
325
326      brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
327
328      brw_shader_gather_info(prog->nir, prog);
329
330      brw_vs_precompile(ctx, prog);
331      break;
332   }
333   default:
334      /*
335       * driver->ProgramStringNotify is only called for ARB programs, fixed
336       * function vertex programs, and ir_to_mesa (which isn't used by the
337       * i965 back-end).  Therefore, even after geometry shaders are added,
338       * this function should only ever be called with a target of
339       * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
340       */
341      unreachable("Unexpected target in brwProgramStringNotify");
342   }
343
344   return true;
345}
346
347static void
348brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
349{
350   struct brw_context *brw = brw_context(ctx);
351   const struct intel_device_info *devinfo = &brw->screen->devinfo;
352   unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
353   assert(devinfo->ver >= 7 && devinfo->ver <= 11);
354
355   if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
356                   GL_ELEMENT_ARRAY_BARRIER_BIT |
357                   GL_COMMAND_BARRIER_BIT))
358      bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
359
360   if (barriers & GL_UNIFORM_BARRIER_BIT)
361      bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
362               PIPE_CONTROL_CONST_CACHE_INVALIDATE);
363
364   if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
365      bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
366
367   if (barriers & (GL_TEXTURE_UPDATE_BARRIER_BIT |
368                   GL_PIXEL_BUFFER_BARRIER_BIT))
369      bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
370               PIPE_CONTROL_RENDER_TARGET_FLUSH);
371
372   if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
373      bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
374               PIPE_CONTROL_RENDER_TARGET_FLUSH);
375
376   /* Typed surface messages are handled by the render cache on IVB, so we
377    * need to flush it too.
378    */
379   if (devinfo->verx10 == 70)
380      bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
381
382   brw_emit_pipe_control_flush(brw, bits);
383}
384
385static void
386brw_framebuffer_fetch_barrier(struct gl_context *ctx)
387{
388   struct brw_context *brw = brw_context(ctx);
389   const struct intel_device_info *devinfo = &brw->screen->devinfo;
390
391   if (!ctx->Extensions.EXT_shader_framebuffer_fetch) {
392      if (devinfo->ver >= 6) {
393         brw_emit_pipe_control_flush(brw,
394                                     PIPE_CONTROL_RENDER_TARGET_FLUSH |
395                                     PIPE_CONTROL_CS_STALL);
396         brw_emit_pipe_control_flush(brw,
397                                     PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
398      } else {
399         brw_emit_pipe_control_flush(brw,
400                                     PIPE_CONTROL_RENDER_TARGET_FLUSH);
401      }
402   }
403}
404
405void
406brw_get_scratch_bo(struct brw_context *brw,
407                   struct brw_bo **scratch_bo, int size)
408{
409   struct brw_bo *old_bo = *scratch_bo;
410
411   if (old_bo && old_bo->size < size) {
412      brw_bo_unreference(old_bo);
413      old_bo = NULL;
414   }
415
416   if (!old_bo) {
417      *scratch_bo =
418         brw_bo_alloc(brw->bufmgr, "scratch bo", size, BRW_MEMZONE_SCRATCH);
419   }
420}
421
422/**
423 * Reserve enough scratch space for the given stage to hold \p per_thread_size
424 * bytes times the given \p thread_count.
425 */
426void
427brw_alloc_stage_scratch(struct brw_context *brw,
428                        struct brw_stage_state *stage_state,
429                        unsigned per_thread_size)
430{
431   if (stage_state->per_thread_scratch >= per_thread_size)
432      return;
433
434   stage_state->per_thread_scratch = per_thread_size;
435
436   if (stage_state->scratch_bo)
437      brw_bo_unreference(stage_state->scratch_bo);
438
439   const struct intel_device_info *devinfo = &brw->screen->devinfo;
440   assert(stage_state->stage < ARRAY_SIZE(devinfo->max_scratch_ids));
441   unsigned max_ids = devinfo->max_scratch_ids[stage_state->stage];
442   stage_state->scratch_bo =
443      brw_bo_alloc(brw->bufmgr, "shader scratch space",
444                   per_thread_size * max_ids, BRW_MEMZONE_SCRATCH);
445}
446
447void
448brw_init_frag_prog_functions(struct dd_function_table *functions)
449{
450   assert(functions->ProgramStringNotify == _tnl_program_string);
451
452   functions->NewProgram = brw_new_program;
453   functions->DeleteProgram = brw_delete_program;
454   functions->ProgramStringNotify = brw_program_string_notify;
455
456   functions->LinkShader = brw_link_shader;
457
458   functions->MemoryBarrier = brw_memory_barrier;
459   functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier;
460}
461
462struct shader_times {
463   uint64_t time;
464   uint64_t written;
465   uint64_t reset;
466};
467
468void
469brw_init_shader_time(struct brw_context *brw)
470{
471   const int max_entries = 2048;
472   brw->shader_time.bo =
473      brw_bo_alloc(brw->bufmgr, "shader time",
474                   max_entries * BRW_SHADER_TIME_STRIDE * 3,
475                   BRW_MEMZONE_OTHER);
476   brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
477   brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
478   brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
479                                          max_entries);
480   brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
481                                               max_entries);
482   brw->shader_time.max_entries = max_entries;
483}
484
485static int
486compare_time(const void *a, const void *b)
487{
488   uint64_t * const *a_val = a;
489   uint64_t * const *b_val = b;
490
491   /* We don't just subtract because we're turning the value to an int. */
492   if (**a_val < **b_val)
493      return -1;
494   else if (**a_val == **b_val)
495      return 0;
496   else
497      return 1;
498}
499
500static void
501print_shader_time_line(const char *stage, const char *name,
502                       int shader_num, uint64_t time, uint64_t total)
503{
504   fprintf(stderr, "%-6s%-18s", stage, name);
505
506   if (shader_num != 0)
507      fprintf(stderr, "%4d: ", shader_num);
508   else
509      fprintf(stderr, "    : ");
510
511   fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
512           (long long)time,
513           (double)time / 1000000000.0,
514           (double)time / total * 100.0);
515}
516
517static void
518brw_report_shader_time(struct brw_context *brw)
519{
520   if (!brw->shader_time.bo || !brw->shader_time.num_entries)
521      return;
522
523   uint64_t scaled[brw->shader_time.num_entries];
524   uint64_t *sorted[brw->shader_time.num_entries];
525   uint64_t total_by_type[ST_CS + 1];
526   memset(total_by_type, 0, sizeof(total_by_type));
527   double total = 0;
528   for (int i = 0; i < brw->shader_time.num_entries; i++) {
529      uint64_t written = 0, reset = 0;
530      enum shader_time_shader_type type = brw->shader_time.types[i];
531
532      sorted[i] = &scaled[i];
533
534      switch (type) {
535      case ST_VS:
536      case ST_TCS:
537      case ST_TES:
538      case ST_GS:
539      case ST_FS8:
540      case ST_FS16:
541      case ST_FS32:
542      case ST_CS:
543         written = brw->shader_time.cumulative[i].written;
544         reset = brw->shader_time.cumulative[i].reset;
545         break;
546
547      default:
548         /* I sometimes want to print things that aren't the 3 shader times.
549          * Just print the sum in that case.
550          */
551         written = 1;
552         reset = 0;
553         break;
554      }
555
556      uint64_t time = brw->shader_time.cumulative[i].time;
557      if (written) {
558         scaled[i] = time / written * (written + reset);
559      } else {
560         scaled[i] = time;
561      }
562
563      switch (type) {
564      case ST_VS:
565      case ST_TCS:
566      case ST_TES:
567      case ST_GS:
568      case ST_FS8:
569      case ST_FS16:
570      case ST_FS32:
571      case ST_CS:
572         total_by_type[type] += scaled[i];
573         break;
574      default:
575         break;
576      }
577
578      total += scaled[i];
579   }
580
581   if (total == 0) {
582      fprintf(stderr, "No shader time collected yet\n");
583      return;
584   }
585
586   qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
587
588   fprintf(stderr, "\n");
589   fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
590   for (int s = 0; s < brw->shader_time.num_entries; s++) {
591      const char *stage;
592      /* Work back from the sorted pointers times to a time to print. */
593      int i = sorted[s] - scaled;
594
595      if (scaled[i] == 0)
596         continue;
597
598      int shader_num = brw->shader_time.ids[i];
599      const char *shader_name = brw->shader_time.names[i];
600
601      switch (brw->shader_time.types[i]) {
602      case ST_VS:
603         stage = "vs";
604         break;
605      case ST_TCS:
606         stage = "tcs";
607         break;
608      case ST_TES:
609         stage = "tes";
610         break;
611      case ST_GS:
612         stage = "gs";
613         break;
614      case ST_FS8:
615         stage = "fs8";
616         break;
617      case ST_FS16:
618         stage = "fs16";
619         break;
620      case ST_FS32:
621         stage = "fs32";
622         break;
623      case ST_CS:
624         stage = "cs";
625         break;
626      default:
627         stage = "other";
628         break;
629      }
630
631      print_shader_time_line(stage, shader_name, shader_num,
632                             scaled[i], total);
633   }
634
635   fprintf(stderr, "\n");
636   print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
637   print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
638   print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
639   print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
640   print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
641   print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
642   print_shader_time_line("total", "fs32", 0, total_by_type[ST_FS32], total);
643   print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
644}
645
646static void
647brw_collect_shader_time(struct brw_context *brw)
648{
649   if (!brw->shader_time.bo)
650      return;
651
652   /* This probably stalls on the last rendering.  We could fix that by
653    * delaying reading the reports, but it doesn't look like it's a big
654    * overhead compared to the cost of tracking the time in the first place.
655    */
656   void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
657
658   for (int i = 0; i < brw->shader_time.num_entries; i++) {
659      uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
660
661      brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
662      brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
663      brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
664   }
665
666   /* Zero the BO out to clear it out for our next collection.
667    */
668   memset(bo_map, 0, brw->shader_time.bo->size);
669   brw_bo_unmap(brw->shader_time.bo);
670}
671
672void
673brw_collect_and_report_shader_time(struct brw_context *brw)
674{
675   brw_collect_shader_time(brw);
676
677   if (brw->shader_time.report_time == 0 ||
678       get_time() - brw->shader_time.report_time >= 1.0) {
679      brw_report_shader_time(brw);
680      brw->shader_time.report_time = get_time();
681   }
682}
683
684/**
685 * Chooses an index in the shader_time buffer and sets up tracking information
686 * for our printouts.
687 *
688 * Note that this holds on to references to the underlying programs, which may
689 * change their lifetimes compared to normal operation.
690 */
691int
692brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
693                          enum shader_time_shader_type type, bool is_glsl_sh)
694{
695   int shader_time_index = brw->shader_time.num_entries++;
696   assert(shader_time_index < brw->shader_time.max_entries);
697   brw->shader_time.types[shader_time_index] = type;
698
699   const char *name;
700   if (prog->Id == 0) {
701      name = "ff";
702   } else if (is_glsl_sh) {
703      name = prog->info.label ?
704         ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
705   } else {
706      name = "prog";
707   }
708
709   brw->shader_time.names[shader_time_index] = name;
710   brw->shader_time.ids[shader_time_index] = prog->Id;
711
712   return shader_time_index;
713}
714
715void
716brw_destroy_shader_time(struct brw_context *brw)
717{
718   brw_bo_unreference(brw->shader_time.bo);
719   brw->shader_time.bo = NULL;
720}
721
722void
723brw_stage_prog_data_free(const void *p)
724{
725   struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
726
727   ralloc_free(prog_data->param);
728   ralloc_free(prog_data->pull_param);
729}
730
731void
732brw_dump_arb_asm(const char *stage, struct gl_program *prog)
733{
734   fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
735           stage, prog->Id, stage);
736   _mesa_print_program(prog);
737}
738
739void
740brw_setup_tex_for_precompile(const struct intel_device_info *devinfo,
741                             struct brw_sampler_prog_key_data *tex,
742                             const struct gl_program *prog)
743{
744   const bool has_shader_channel_select = devinfo->verx10 >= 75;
745   unsigned sampler_count = util_last_bit(prog->SamplersUsed);
746   for (unsigned i = 0; i < sampler_count; i++) {
747      if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
748         /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
749         tex->swizzles[i] =
750            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
751      } else {
752         /* Color sampler: assume no swizzling. */
753         tex->swizzles[i] = SWIZZLE_XYZW;
754      }
755   }
756}
757
758/**
759 * Sets up the starting offsets for the groups of binding table entries
760 * common to all pipeline stages.
761 *
762 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
763 * unused but also make sure that addition of small offsets to them will
764 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
765 */
766uint32_t
767brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo,
768                                        const struct gl_program *prog,
769                                        struct brw_stage_prog_data *stage_prog_data,
770                                        uint32_t next_binding_table_offset)
771{
772   int num_textures = util_last_bit(prog->SamplersUsed);
773
774   stage_prog_data->binding_table.texture_start = next_binding_table_offset;
775   next_binding_table_offset += num_textures;
776
777   if (prog->info.num_ubos) {
778      assert(prog->info.num_ubos <= BRW_MAX_UBO);
779      stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
780      next_binding_table_offset += prog->info.num_ubos;
781   } else {
782      stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
783   }
784
785   if (prog->info.num_ssbos || prog->info.num_abos) {
786      assert(prog->info.num_abos <= BRW_MAX_ABO);
787      assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
788      stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
789      next_binding_table_offset += prog->info.num_abos + prog->info.num_ssbos;
790   } else {
791      stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
792   }
793
794   if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
795      stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
796      next_binding_table_offset++;
797   } else {
798      stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
799   }
800
801   if (prog->info.uses_texture_gather) {
802      if (devinfo->ver >= 8) {
803         stage_prog_data->binding_table.gather_texture_start =
804            stage_prog_data->binding_table.texture_start;
805      } else {
806         stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
807         next_binding_table_offset += num_textures;
808      }
809   } else {
810      stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
811   }
812
813   if (prog->info.num_images) {
814      stage_prog_data->binding_table.image_start = next_binding_table_offset;
815      next_binding_table_offset += prog->info.num_images;
816   } else {
817      stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
818   }
819
820   /* This may or may not be used depending on how the compile goes. */
821   stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
822   next_binding_table_offset++;
823
824   /* Plane 0 is just the regular texture section */
825   stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
826
827   stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
828   next_binding_table_offset += num_textures;
829
830   stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
831   next_binding_table_offset += num_textures;
832
833   /* Set the binding table size.  Some callers may append new entries
834    * and increase this accordingly.
835    */
836   stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
837
838   assert(next_binding_table_offset <= BRW_MAX_SURFACES);
839   return next_binding_table_offset;
840}
841
842void
843brw_populate_default_key(const struct brw_compiler *compiler,
844                         union brw_any_prog_key *prog_key,
845                         struct gl_shader_program *sh_prog,
846                         struct gl_program *prog)
847{
848   switch (prog->info.stage) {
849   case MESA_SHADER_VERTEX:
850      brw_vs_populate_default_key(compiler, &prog_key->vs, prog);
851      break;
852   case MESA_SHADER_TESS_CTRL:
853      brw_tcs_populate_default_key(compiler, &prog_key->tcs, sh_prog, prog);
854      break;
855   case MESA_SHADER_TESS_EVAL:
856      brw_tes_populate_default_key(compiler, &prog_key->tes, sh_prog, prog);
857      break;
858   case MESA_SHADER_GEOMETRY:
859      brw_gs_populate_default_key(compiler, &prog_key->gs, prog);
860      break;
861   case MESA_SHADER_FRAGMENT:
862      brw_wm_populate_default_key(compiler, &prog_key->wm, prog);
863      break;
864   case MESA_SHADER_COMPUTE:
865      brw_cs_populate_default_key(compiler, &prog_key->cs, prog);
866      break;
867   default:
868      unreachable("Unsupported stage!");
869   }
870}
871
872void
873brw_debug_recompile(struct brw_context *brw,
874                    gl_shader_stage stage,
875                    unsigned api_id,
876                    struct brw_base_prog_key *key)
877{
878   const struct brw_compiler *compiler = brw->screen->compiler;
879   enum brw_cache_id cache_id = brw_stage_cache_id(stage);
880
881   brw_shader_perf_log(compiler, brw, "Recompiling %s shader for program %d\n",
882                       _mesa_shader_stage_to_string(stage), api_id);
883
884   const void *old_key =
885      brw_find_previous_compile(&brw->cache, cache_id, key->program_string_id);
886
887   brw_debug_key_recompile(compiler, brw, stage, old_key, key);
888}
889