lower_ubo_reference.cpp revision b8e80941
1/*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file lower_ubo_reference.cpp
26 *
27 * IR lower pass to replace dereferences of variables in a uniform
28 * buffer object with usage of ir_binop_ubo_load expressions, each of
29 * which can read data up to the size of a vec4.
30 *
31 * This relieves drivers of the responsibility to deal with tricky UBO
32 * layout issues like std140 structures and row_major matrices on
33 * their own.
34 */
35
36#include "lower_buffer_access.h"
37#include "ir_builder.h"
38#include "main/macros.h"
39#include "glsl_parser_extras.h"
40#include "main/mtypes.h"
41
42using namespace ir_builder;
43
44namespace {
45class lower_ubo_reference_visitor :
46      public lower_buffer_access::lower_buffer_access {
47public:
48   lower_ubo_reference_visitor(struct gl_linked_shader *shader,
49                               bool clamp_block_indices,
50                               bool use_std430_as_default)
51   : shader(shader), clamp_block_indices(clamp_block_indices),
52     struct_field(NULL), variable(NULL)
53   {
54      this->use_std430_as_default = use_std430_as_default;
55   }
56
57   void handle_rvalue(ir_rvalue **rvalue);
58   ir_visitor_status visit_enter(ir_assignment *ir);
59
60   void setup_for_load_or_store(void *mem_ctx,
61                                ir_variable *var,
62                                ir_rvalue *deref,
63                                ir_rvalue **offset,
64                                unsigned *const_offset,
65                                bool *row_major,
66                                const glsl_type **matrix_type,
67                                enum glsl_interface_packing packing);
68   uint32_t ssbo_access_params();
69   ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
70			   ir_rvalue *offset);
71   ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
72                      ir_rvalue *offset);
73
74   bool check_for_buffer_array_copy(ir_assignment *ir);
75   bool check_for_buffer_struct_copy(ir_assignment *ir);
76   void check_for_ssbo_store(ir_assignment *ir);
77   void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
78                        ir_variable *write_var, unsigned write_mask);
79   ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
80                       unsigned write_mask);
81
82   enum {
83      ubo_load_access,
84      ssbo_load_access,
85      ssbo_store_access,
86      ssbo_unsized_array_length_access,
87      ssbo_atomic_access,
88   } buffer_access_type;
89
90   void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
91                             const glsl_type *type, ir_rvalue *offset,
92                             unsigned mask, int channel);
93
94   ir_visitor_status visit_enter(class ir_expression *);
95   ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
96   void check_ssbo_unsized_array_length_expression(class ir_expression *);
97   void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
98
99   ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
100                                                    ir_dereference *,
101                                                    ir_variable *);
102   ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
103
104   unsigned calculate_unsized_array_stride(ir_dereference *deref,
105                                           enum glsl_interface_packing packing);
106
107   ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
108   ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
109   ir_visitor_status visit_enter(ir_call *ir);
110   ir_visitor_status visit_enter(ir_texture *ir);
111
112   struct gl_linked_shader *shader;
113   bool clamp_block_indices;
114   const struct glsl_struct_field *struct_field;
115   ir_variable *variable;
116   ir_rvalue *uniform_block;
117   bool progress;
118};
119
120/**
121 * Determine the name of the interface block field
122 *
123 * This is the name of the specific member as it would appear in the
124 * \c gl_uniform_buffer_variable::Name field in the shader's
125 * \c UniformBlocks array.
126 */
127static const char *
128interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
129                     ir_rvalue **nonconst_block_index)
130{
131   *nonconst_block_index = NULL;
132   char *name_copy = NULL;
133   size_t base_length = 0;
134
135   /* Loop back through the IR until we find the uniform block */
136   ir_rvalue *ir = d;
137   while (ir != NULL) {
138      switch (ir->ir_type) {
139      case ir_type_dereference_variable: {
140         /* Exit loop */
141         ir = NULL;
142         break;
143      }
144
145      case ir_type_dereference_record: {
146         ir_dereference_record *r = (ir_dereference_record *) ir;
147         ir = r->record->as_dereference();
148
149         /* If we got here it means any previous array subscripts belong to
150          * block members and not the block itself so skip over them in the
151          * next pass.
152          */
153         d = ir;
154         break;
155      }
156
157      case ir_type_dereference_array: {
158         ir_dereference_array *a = (ir_dereference_array *) ir;
159         ir = a->array->as_dereference();
160         break;
161      }
162
163      case ir_type_swizzle: {
164         ir_swizzle *s = (ir_swizzle *) ir;
165         ir = s->val->as_dereference();
166         /* Skip swizzle in the next pass */
167         d = ir;
168         break;
169      }
170
171      default:
172         assert(!"Should not get here.");
173         break;
174      }
175   }
176
177   while (d != NULL) {
178      switch (d->ir_type) {
179      case ir_type_dereference_variable: {
180         ir_dereference_variable *v = (ir_dereference_variable *) d;
181         if (name_copy != NULL &&
182             v->var->is_interface_instance() &&
183             v->var->type->is_array()) {
184            return name_copy;
185         } else {
186            *nonconst_block_index = NULL;
187            return base_name;
188         }
189
190         break;
191      }
192
193      case ir_type_dereference_array: {
194         ir_dereference_array *a = (ir_dereference_array *) d;
195         size_t new_length;
196
197         if (name_copy == NULL) {
198            name_copy = ralloc_strdup(mem_ctx, base_name);
199            base_length = strlen(name_copy);
200         }
201
202         /* For arrays of arrays we start at the innermost array and work our
203          * way out so we need to insert the subscript at the base of the
204          * name string rather than just attaching it to the end.
205          */
206         new_length = base_length;
207         ir_constant *const_index = a->array_index->as_constant();
208         char *end = ralloc_strdup(NULL, &name_copy[new_length]);
209         if (!const_index) {
210            ir_rvalue *array_index = a->array_index;
211            if (array_index->type != glsl_type::uint_type)
212               array_index = i2u(array_index);
213
214            if (a->array->type->is_array() &&
215                a->array->type->fields.array->is_array()) {
216               ir_constant *base_size = new(mem_ctx)
217                  ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
218               array_index = mul(array_index, base_size);
219            }
220
221            if (*nonconst_block_index) {
222               *nonconst_block_index = add(*nonconst_block_index, array_index);
223            } else {
224               *nonconst_block_index = array_index;
225            }
226
227            ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
228                                         end);
229         } else {
230            ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
231                                         const_index->get_uint_component(0),
232                                         end);
233         }
234         ralloc_free(end);
235
236         d = a->array->as_dereference();
237
238         break;
239      }
240
241      default:
242         assert(!"Should not get here.");
243         break;
244      }
245   }
246
247   assert(!"Should not get here.");
248   return NULL;
249}
250
251static ir_rvalue *
252clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)
253{
254   assert(type->is_array());
255
256   const unsigned array_size = type->arrays_of_arrays_size();
257
258   ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
259   max_index->type = index->type;
260
261   ir_constant *zero = new(mem_ctx) ir_constant(0);
262   zero->type = index->type;
263
264   if (index->type->base_type == GLSL_TYPE_INT)
265      index = max2(index, zero);
266   index = min2(index, max_index);
267
268   return index;
269}
270
271void
272lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
273                                                     ir_variable *var,
274                                                     ir_rvalue *deref,
275                                                     ir_rvalue **offset,
276                                                     unsigned *const_offset,
277                                                     bool *row_major,
278                                                     const glsl_type **matrix_type,
279                                                     enum glsl_interface_packing packing)
280{
281   /* Determine the name of the interface block */
282   ir_rvalue *nonconst_block_index;
283   const char *const field_name =
284      interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
285                           deref, &nonconst_block_index);
286
287   if (nonconst_block_index && clamp_block_indices) {
288      nonconst_block_index =
289         clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);
290   }
291
292   /* Locate the block by interface name */
293   unsigned num_blocks;
294   struct gl_uniform_block **blocks;
295   if (this->buffer_access_type != ubo_load_access) {
296      num_blocks = shader->Program->info.num_ssbos;
297      blocks = shader->Program->sh.ShaderStorageBlocks;
298   } else {
299      num_blocks = shader->Program->info.num_ubos;
300      blocks = shader->Program->sh.UniformBlocks;
301   }
302   this->uniform_block = NULL;
303   for (unsigned i = 0; i < num_blocks; i++) {
304      if (strcmp(field_name, blocks[i]->Name) == 0) {
305
306         ir_constant *index = new(mem_ctx) ir_constant(i);
307
308         if (nonconst_block_index) {
309            this->uniform_block = add(nonconst_block_index, index);
310         } else {
311            this->uniform_block = index;
312         }
313
314         if (var->is_interface_instance()) {
315            *const_offset = 0;
316         } else {
317            *const_offset = blocks[i]->Uniforms[var->data.location].Offset;
318         }
319
320         break;
321      }
322   }
323
324   assert(this->uniform_block);
325
326   this->struct_field = NULL;
327   setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major,
328                       matrix_type, &this->struct_field, packing);
329}
330
331void
332lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
333{
334   if (!*rvalue)
335      return;
336
337   ir_dereference *deref = (*rvalue)->as_dereference();
338   if (!deref)
339      return;
340
341   ir_variable *var = deref->variable_referenced();
342   if (!var || !var->is_in_buffer_block())
343      return;
344
345   void *mem_ctx = ralloc_parent(shader->ir);
346
347   ir_rvalue *offset = NULL;
348   unsigned const_offset;
349   bool row_major;
350   const glsl_type *matrix_type;
351
352   enum glsl_interface_packing packing =
353      var->get_interface_type()->
354         get_internal_ifc_packing(use_std430_as_default);
355
356   this->buffer_access_type =
357      var->is_in_shader_storage_block() ?
358      ssbo_load_access : ubo_load_access;
359   this->variable = var;
360
361   /* Compute the offset to the start if the dereference as well as other
362    * information we need to configure the write
363    */
364   setup_for_load_or_store(mem_ctx, var, deref,
365                           &offset, &const_offset,
366                           &row_major, &matrix_type,
367                           packing);
368   assert(offset);
369
370   /* Now that we've calculated the offset to the start of the
371    * dereference, walk over the type and emit loads into a temporary.
372    */
373   const glsl_type *type = (*rvalue)->type;
374   ir_variable *load_var = new(mem_ctx) ir_variable(type,
375						    "ubo_load_temp",
376						    ir_var_temporary);
377   base_ir->insert_before(load_var);
378
379   ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
380						       "ubo_load_temp_offset",
381						       ir_var_temporary);
382   base_ir->insert_before(load_offset);
383   base_ir->insert_before(assign(load_offset, offset));
384
385   deref = new(mem_ctx) ir_dereference_variable(load_var);
386   emit_access(mem_ctx, false, deref, load_offset, const_offset,
387               row_major, matrix_type, packing, 0);
388   *rvalue = deref;
389
390   progress = true;
391}
392
393ir_expression *
394lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
395                                      const glsl_type *type,
396				      ir_rvalue *offset)
397{
398   ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
399   return new(mem_ctx)
400      ir_expression(ir_binop_ubo_load,
401                    type,
402                    block_ref,
403                    offset);
404
405}
406
407static bool
408shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
409{
410   return state->has_shader_storage_buffer_objects();
411}
412
413uint32_t
414lower_ubo_reference_visitor::ssbo_access_params()
415{
416   assert(variable);
417
418   if (variable->is_interface_instance()) {
419      assert(struct_field);
420
421      return ((struct_field->memory_coherent ? ACCESS_COHERENT : 0) |
422              (struct_field->memory_restrict ? ACCESS_RESTRICT : 0) |
423              (struct_field->memory_volatile ? ACCESS_VOLATILE : 0));
424   } else {
425      return ((variable->data.memory_coherent ? ACCESS_COHERENT : 0) |
426              (variable->data.memory_restrict ? ACCESS_RESTRICT : 0) |
427              (variable->data.memory_volatile ? ACCESS_VOLATILE : 0));
428   }
429}
430
431ir_call *
432lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
433                                        ir_rvalue *deref,
434                                        ir_rvalue *offset,
435                                        unsigned write_mask)
436{
437   exec_list sig_params;
438
439   ir_variable *block_ref = new(mem_ctx)
440      ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
441   sig_params.push_tail(block_ref);
442
443   ir_variable *offset_ref = new(mem_ctx)
444      ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
445   sig_params.push_tail(offset_ref);
446
447   ir_variable *val_ref = new(mem_ctx)
448      ir_variable(deref->type, "value" , ir_var_function_in);
449   sig_params.push_tail(val_ref);
450
451   ir_variable *writemask_ref = new(mem_ctx)
452      ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
453   sig_params.push_tail(writemask_ref);
454
455   ir_variable *access_ref = new(mem_ctx)
456      ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
457   sig_params.push_tail(access_ref);
458
459   ir_function_signature *sig = new(mem_ctx)
460      ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
461   assert(sig);
462   sig->replace_parameters(&sig_params);
463   sig->intrinsic_id = ir_intrinsic_ssbo_store;
464
465   ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
466   f->add_signature(sig);
467
468   exec_list call_params;
469   call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
470   call_params.push_tail(offset->clone(mem_ctx, NULL));
471   call_params.push_tail(deref->clone(mem_ctx, NULL));
472   call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
473   call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
474   return new(mem_ctx) ir_call(sig, NULL, &call_params);
475}
476
477ir_call *
478lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
479                                       const struct glsl_type *type,
480                                       ir_rvalue *offset)
481{
482   exec_list sig_params;
483
484   ir_variable *block_ref = new(mem_ctx)
485      ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
486   sig_params.push_tail(block_ref);
487
488   ir_variable *offset_ref = new(mem_ctx)
489      ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
490   sig_params.push_tail(offset_ref);
491
492   ir_variable *access_ref = new(mem_ctx)
493      ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
494   sig_params.push_tail(access_ref);
495
496   ir_function_signature *sig =
497      new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
498   assert(sig);
499   sig->replace_parameters(&sig_params);
500   sig->intrinsic_id = ir_intrinsic_ssbo_load;
501
502   ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
503   f->add_signature(sig);
504
505   ir_variable *result = new(mem_ctx)
506      ir_variable(type, "ssbo_load_result", ir_var_temporary);
507   base_ir->insert_before(result);
508   ir_dereference_variable *deref_result = new(mem_ctx)
509      ir_dereference_variable(result);
510
511   exec_list call_params;
512   call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
513   call_params.push_tail(offset->clone(mem_ctx, NULL));
514   call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
515
516   return new(mem_ctx) ir_call(sig, deref_result, &call_params);
517}
518
519void
520lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
521                                                  ir_dereference *deref,
522                                                  const glsl_type *type,
523                                                  ir_rvalue *offset,
524                                                  unsigned mask,
525                                                  int channel)
526{
527   switch (this->buffer_access_type) {
528   case ubo_load_access:
529      base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
530                                    ubo_load(mem_ctx, type, offset),
531                                    mask));
532      break;
533   case ssbo_load_access: {
534      ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
535      base_ir->insert_before(load_ssbo);
536      ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
537      ir_assignment *assignment =
538         assign(deref->clone(mem_ctx, NULL), value, mask);
539      base_ir->insert_before(assignment);
540      break;
541   }
542   case ssbo_store_access:
543      if (channel >= 0) {
544         base_ir->insert_after(ssbo_store(mem_ctx,
545                                          swizzle(deref, channel, 1),
546                                          offset, 1));
547      } else {
548         base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
549      }
550      break;
551   default:
552      unreachable("invalid buffer_access_type in insert_buffer_access");
553   }
554}
555
556void
557lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
558                                             ir_dereference *deref,
559                                             ir_variable *var,
560                                             ir_variable *write_var,
561                                             unsigned write_mask)
562{
563   ir_rvalue *offset = NULL;
564   unsigned const_offset;
565   bool row_major;
566   const glsl_type *matrix_type;
567
568   enum glsl_interface_packing packing =
569      var->get_interface_type()->
570         get_internal_ifc_packing(use_std430_as_default);
571
572   this->buffer_access_type = ssbo_store_access;
573   this->variable = var;
574
575   /* Compute the offset to the start if the dereference as well as other
576    * information we need to configure the write
577    */
578   setup_for_load_or_store(mem_ctx, var, deref,
579                           &offset, &const_offset,
580                           &row_major, &matrix_type,
581                           packing);
582   assert(offset);
583
584   /* Now emit writes from the temporary to memory */
585   ir_variable *write_offset =
586      new(mem_ctx) ir_variable(glsl_type::uint_type,
587                               "ssbo_store_temp_offset",
588                               ir_var_temporary);
589
590   base_ir->insert_before(write_offset);
591   base_ir->insert_before(assign(write_offset, offset));
592
593   deref = new(mem_ctx) ir_dereference_variable(write_var);
594   emit_access(mem_ctx, true, deref, write_offset, const_offset,
595               row_major, matrix_type, packing, write_mask);
596}
597
598ir_visitor_status
599lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
600{
601   check_ssbo_unsized_array_length_expression(ir);
602   return rvalue_visit(ir);
603}
604
605ir_expression *
606lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
607{
608   if (expr->operation !=
609       ir_expression_operation(ir_unop_ssbo_unsized_array_length))
610      return NULL;
611
612   ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
613   if (!rvalue ||
614       !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
615      return NULL;
616
617   ir_dereference *deref = expr->operands[0]->as_dereference();
618   if (!deref)
619      return NULL;
620
621   ir_variable *var = expr->operands[0]->variable_referenced();
622   if (!var || !var->is_in_shader_storage_block())
623      return NULL;
624   return process_ssbo_unsized_array_length(&rvalue, deref, var);
625}
626
627void
628lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
629{
630   if (ir->operation ==
631       ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
632         /* Don't replace this unop if it is found alone. It is going to be
633          * removed by the optimization passes or replaced if it is part of
634          * an ir_assignment or another ir_expression.
635          */
636         return;
637   }
638
639   for (unsigned i = 0; i < ir->num_operands; i++) {
640      if (ir->operands[i]->ir_type != ir_type_expression)
641         continue;
642      ir_expression *expr = (ir_expression *) ir->operands[i];
643      ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
644      if (!temp)
645         continue;
646
647      delete expr;
648      ir->operands[i] = temp;
649   }
650}
651
652void
653lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
654{
655   if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
656      return;
657
658   ir_expression *expr = (ir_expression *) ir->rhs;
659   ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
660   if (!temp)
661      return;
662
663   delete expr;
664   ir->rhs = temp;
665   return;
666}
667
668ir_expression *
669lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
670{
671   ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
672   return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
673                                     glsl_type::int_type,
674                                     block_ref);
675}
676
677unsigned
678lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
679                                                            enum glsl_interface_packing packing)
680{
681   unsigned array_stride = 0;
682
683   switch (deref->ir_type) {
684   case ir_type_dereference_variable:
685   {
686      ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
687      const struct glsl_type *unsized_array_type = NULL;
688      /* An unsized array can be sized by other lowering passes, so pick
689       * the first field of the array which has the data type of the unsized
690       * array.
691       */
692      unsized_array_type = deref_var->var->type->fields.array;
693
694      /* Whether or not the field is row-major (because it might be a
695       * bvec2 or something) does not affect the array itself. We need
696       * to know whether an array element in its entirety is row-major.
697       */
698      const bool array_row_major =
699         is_dereferenced_thing_row_major(deref_var);
700
701      if (packing == GLSL_INTERFACE_PACKING_STD430) {
702         array_stride = unsized_array_type->std430_array_stride(array_row_major);
703      } else {
704         array_stride = unsized_array_type->std140_size(array_row_major);
705         array_stride = glsl_align(array_stride, 16);
706      }
707      break;
708   }
709   case ir_type_dereference_record:
710   {
711      ir_dereference_record *deref_record = (ir_dereference_record *) deref;
712      ir_dereference *interface_deref =
713         deref_record->record->as_dereference();
714      assert(interface_deref != NULL);
715      const struct glsl_type *interface_type = interface_deref->type;
716      unsigned record_length = interface_type->length;
717      /* Unsized array is always the last element of the interface */
718      const struct glsl_type *unsized_array_type =
719         interface_type->fields.structure[record_length - 1].type->fields.array;
720
721      const bool array_row_major =
722         is_dereferenced_thing_row_major(deref_record);
723
724      if (packing == GLSL_INTERFACE_PACKING_STD430) {
725         array_stride = unsized_array_type->std430_array_stride(array_row_major);
726      } else {
727         array_stride = unsized_array_type->std140_size(array_row_major);
728         array_stride = glsl_align(array_stride, 16);
729      }
730      break;
731   }
732   default:
733      unreachable("Unsupported dereference type");
734   }
735   return array_stride;
736}
737
738ir_expression *
739lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
740                                                               ir_dereference *deref,
741                                                               ir_variable *var)
742{
743   void *mem_ctx = ralloc_parent(*rvalue);
744
745   ir_rvalue *base_offset = NULL;
746   unsigned const_offset;
747   bool row_major;
748   const glsl_type *matrix_type;
749
750   enum glsl_interface_packing packing =
751      var->get_interface_type()->
752         get_internal_ifc_packing(use_std430_as_default);
753   int unsized_array_stride =
754      calculate_unsized_array_stride(deref, packing);
755
756   this->buffer_access_type = ssbo_unsized_array_length_access;
757   this->variable = var;
758
759   /* Compute the offset to the start if the dereference as well as other
760    * information we need to calculate the length.
761    */
762   setup_for_load_or_store(mem_ctx, var, deref,
763                           &base_offset, &const_offset,
764                           &row_major, &matrix_type,
765                           packing);
766   /* array.length() =
767    *  max((buffer_object_size - offset_of_array) / stride_of_array, 0)
768    */
769   ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
770
771   ir_expression *offset_of_array = new(mem_ctx)
772      ir_expression(ir_binop_add, base_offset,
773                    new(mem_ctx) ir_constant(const_offset));
774   ir_expression *offset_of_array_int = new(mem_ctx)
775      ir_expression(ir_unop_u2i, offset_of_array);
776
777   ir_expression *sub = new(mem_ctx)
778      ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
779   ir_expression *div =  new(mem_ctx)
780      ir_expression(ir_binop_div, sub,
781                    new(mem_ctx) ir_constant(unsized_array_stride));
782   ir_expression *max = new(mem_ctx)
783      ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
784
785   return max;
786}
787
788void
789lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
790{
791   if (!ir || !ir->lhs)
792      return;
793
794   ir_rvalue *rvalue = ir->lhs->as_rvalue();
795   if (!rvalue)
796      return;
797
798   ir_dereference *deref = ir->lhs->as_dereference();
799   if (!deref)
800      return;
801
802   ir_variable *var = ir->lhs->variable_referenced();
803   if (!var || !var->is_in_shader_storage_block())
804      return;
805
806   /* We have a write to a buffer variable, so declare a temporary and rewrite
807    * the assignment so that the temporary is the LHS.
808    */
809   void *mem_ctx = ralloc_parent(shader->ir);
810
811   const glsl_type *type = rvalue->type;
812   ir_variable *write_var = new(mem_ctx) ir_variable(type,
813                                                     "ssbo_store_temp",
814                                                     ir_var_temporary);
815   base_ir->insert_before(write_var);
816   ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
817
818   /* Now we have to write the value assigned to the temporary back to memory */
819   write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
820   progress = true;
821}
822
823static bool
824is_buffer_backed_variable(ir_variable *var)
825{
826   return var->is_in_buffer_block() ||
827          var->data.mode == ir_var_shader_shared;
828}
829
830bool
831lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
832{
833   if (!ir || !ir->lhs || !ir->rhs)
834      return false;
835
836   /* LHS and RHS must be arrays
837    * FIXME: arrays of arrays?
838    */
839   if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
840      return false;
841
842   /* RHS must be a buffer-backed variable. This is what can cause the problem
843    * since it would lead to a series of loads that need to live until we
844    * see the writes to the LHS.
845    */
846   ir_variable *rhs_var = ir->rhs->variable_referenced();
847   if (!rhs_var || !is_buffer_backed_variable(rhs_var))
848      return false;
849
850   /* Split the array copy into individual element copies to reduce
851    * register pressure
852    */
853   ir_dereference *rhs_deref = ir->rhs->as_dereference();
854   if (!rhs_deref)
855      return false;
856
857   ir_dereference *lhs_deref = ir->lhs->as_dereference();
858   if (!lhs_deref)
859      return false;
860
861   assert(lhs_deref->type->length == rhs_deref->type->length);
862   void *mem_ctx = ralloc_parent(shader->ir);
863
864   for (unsigned i = 0; i < lhs_deref->type->length; i++) {
865      ir_dereference *lhs_i =
866         new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
867                                           new(mem_ctx) ir_constant(i));
868
869      ir_dereference *rhs_i =
870         new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
871                                           new(mem_ctx) ir_constant(i));
872      ir->insert_after(assign(lhs_i, rhs_i));
873   }
874
875   ir->remove();
876   progress = true;
877   return true;
878}
879
880bool
881lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
882{
883   if (!ir || !ir->lhs || !ir->rhs)
884      return false;
885
886   /* LHS and RHS must be records */
887   if (!ir->lhs->type->is_struct() || !ir->rhs->type->is_struct())
888      return false;
889
890   /* RHS must be a buffer-backed variable. This is what can cause the problem
891    * since it would lead to a series of loads that need to live until we
892    * see the writes to the LHS.
893    */
894   ir_variable *rhs_var = ir->rhs->variable_referenced();
895   if (!rhs_var || !is_buffer_backed_variable(rhs_var))
896      return false;
897
898   /* Split the struct copy into individual element copies to reduce
899    * register pressure
900    */
901   ir_dereference *rhs_deref = ir->rhs->as_dereference();
902   if (!rhs_deref)
903      return false;
904
905   ir_dereference *lhs_deref = ir->lhs->as_dereference();
906   if (!lhs_deref)
907      return false;
908
909   assert(lhs_deref->type == rhs_deref->type);
910   void *mem_ctx = ralloc_parent(shader->ir);
911
912   for (unsigned i = 0; i < lhs_deref->type->length; i++) {
913      const char *field_name = lhs_deref->type->fields.structure[i].name;
914      ir_dereference *lhs_field =
915         new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
916                                            field_name);
917      ir_dereference *rhs_field =
918         new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
919                                            field_name);
920      ir->insert_after(assign(lhs_field, rhs_field));
921   }
922
923   ir->remove();
924   progress = true;
925   return true;
926}
927
928ir_visitor_status
929lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
930{
931   /* Array and struct copies could involve large amounts of load/store
932    * operations. To improve register pressure we want to special-case
933    * these and split them into individual element copies.
934    * This way we avoid emitting all the loads for the RHS first and
935    * all the writes for the LHS second and register usage is more
936    * efficient.
937    */
938   if (check_for_buffer_array_copy(ir))
939      return visit_continue_with_parent;
940
941   if (check_for_buffer_struct_copy(ir))
942      return visit_continue_with_parent;
943
944   check_ssbo_unsized_array_length_assignment(ir);
945   check_for_ssbo_store(ir);
946   return rvalue_visit(ir);
947}
948
949/* Lowers the intrinsic call to a new internal intrinsic that swaps the
950 * access to the buffer variable in the first parameter by an offset
951 * and block index. This involves creating the new internal intrinsic
952 * (i.e. the new function signature).
953 */
954ir_call *
955lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
956{
957   /* SSBO atomics usually have 2 parameters, the buffer variable and an
958    * integer argument. The exception is CompSwap, that has an additional
959    * integer parameter.
960    */
961   int param_count = ir->actual_parameters.length();
962   assert(param_count == 2 || param_count == 3);
963
964   /* First argument must be a scalar integer buffer variable */
965   exec_node *param = ir->actual_parameters.get_head();
966   ir_instruction *inst = (ir_instruction *) param;
967   assert(inst->ir_type == ir_type_dereference_variable ||
968          inst->ir_type == ir_type_dereference_array ||
969          inst->ir_type == ir_type_dereference_record ||
970          inst->ir_type == ir_type_swizzle);
971
972   ir_rvalue *deref = (ir_rvalue *) inst;
973   assert(deref->type->is_scalar() &&
974          (deref->type->is_integer() || deref->type->is_float()));
975
976   ir_variable *var = deref->variable_referenced();
977   assert(var);
978
979   /* Compute the offset to the start if the dereference and the
980    * block index
981    */
982   void *mem_ctx = ralloc_parent(shader->ir);
983
984   ir_rvalue *offset = NULL;
985   unsigned const_offset;
986   bool row_major;
987   const glsl_type *matrix_type;
988
989   enum glsl_interface_packing packing =
990      var->get_interface_type()->
991         get_internal_ifc_packing(use_std430_as_default);
992
993   this->buffer_access_type = ssbo_atomic_access;
994   this->variable = var;
995
996   setup_for_load_or_store(mem_ctx, var, deref,
997                           &offset, &const_offset,
998                           &row_major, &matrix_type,
999                           packing);
1000   assert(offset);
1001   assert(!row_major);
1002   assert(matrix_type == NULL);
1003
1004   ir_rvalue *deref_offset =
1005      add(offset, new(mem_ctx) ir_constant(const_offset));
1006   ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
1007
1008   /* Create the new internal function signature that will take a block
1009    * index and offset instead of a buffer variable
1010    */
1011   exec_list sig_params;
1012   ir_variable *sig_param = new(mem_ctx)
1013      ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
1014   sig_params.push_tail(sig_param);
1015
1016   sig_param = new(mem_ctx)
1017      ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
1018   sig_params.push_tail(sig_param);
1019
1020   const glsl_type *type = deref->type->get_scalar_type();
1021   sig_param = new(mem_ctx)
1022         ir_variable(type, "data1", ir_var_function_in);
1023   sig_params.push_tail(sig_param);
1024
1025   if (param_count == 3) {
1026      sig_param = new(mem_ctx)
1027            ir_variable(type, "data2", ir_var_function_in);
1028      sig_params.push_tail(sig_param);
1029   }
1030
1031   ir_function_signature *sig =
1032      new(mem_ctx) ir_function_signature(deref->type,
1033                                         shader_storage_buffer_object);
1034   assert(sig);
1035   sig->replace_parameters(&sig_params);
1036
1037   assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
1038   assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
1039   sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo);
1040
1041   char func_name[64];
1042   sprintf(func_name, "%s_ssbo", ir->callee_name());
1043   ir_function *f = new(mem_ctx) ir_function(func_name);
1044   f->add_signature(sig);
1045
1046   /* Now, create the call to the internal intrinsic */
1047   exec_list call_params;
1048   call_params.push_tail(block_index);
1049   call_params.push_tail(deref_offset);
1050   param = ir->actual_parameters.get_head()->get_next();
1051   ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1052   call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1053   if (param_count == 3) {
1054      param = param->get_next();
1055      param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1056      call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1057   }
1058   ir_dereference_variable *return_deref =
1059      ir->return_deref->clone(mem_ctx, NULL);
1060   return new(mem_ctx) ir_call(sig, return_deref, &call_params);
1061}
1062
1063ir_call *
1064lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
1065{
1066   exec_list& params = ir->actual_parameters;
1067
1068   if (params.length() < 2 || params.length() > 3)
1069      return ir;
1070
1071   ir_rvalue *rvalue =
1072      ((ir_instruction *) params.get_head())->as_rvalue();
1073   if (!rvalue)
1074      return ir;
1075
1076   ir_variable *var = rvalue->variable_referenced();
1077   if (!var || !var->is_in_shader_storage_block())
1078      return ir;
1079
1080   const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
1081   if (id == ir_intrinsic_generic_atomic_add ||
1082       id == ir_intrinsic_generic_atomic_min ||
1083       id == ir_intrinsic_generic_atomic_max ||
1084       id == ir_intrinsic_generic_atomic_and ||
1085       id == ir_intrinsic_generic_atomic_or ||
1086       id == ir_intrinsic_generic_atomic_xor ||
1087       id == ir_intrinsic_generic_atomic_exchange ||
1088       id == ir_intrinsic_generic_atomic_comp_swap) {
1089      return lower_ssbo_atomic_intrinsic(ir);
1090   }
1091
1092   return ir;
1093}
1094
1095
1096ir_visitor_status
1097lower_ubo_reference_visitor::visit_enter(ir_call *ir)
1098{
1099   ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
1100   if (new_ir != ir) {
1101      progress = true;
1102      base_ir->replace_with(new_ir);
1103      return visit_continue_with_parent;
1104   }
1105
1106   return rvalue_visit(ir);
1107}
1108
1109
1110ir_visitor_status
1111lower_ubo_reference_visitor::visit_enter(ir_texture *ir)
1112{
1113   ir_dereference *sampler = ir->sampler;
1114
1115   if (sampler->ir_type == ir_type_dereference_record) {
1116      handle_rvalue((ir_rvalue **)&ir->sampler);
1117      return visit_continue_with_parent;
1118   }
1119
1120   return rvalue_visit(ir);
1121}
1122
1123
1124} /* unnamed namespace */
1125
1126void
1127lower_ubo_reference(struct gl_linked_shader *shader,
1128                    bool clamp_block_indices, bool use_std430_as_default)
1129{
1130   lower_ubo_reference_visitor v(shader, clamp_block_indices,
1131                                 use_std430_as_default);
1132
1133   /* Loop over the instructions lowering references, because we take
1134    * a deref of a UBO array using a UBO dereference as the index will
1135    * produce a collection of instructions all of which have cloned
1136    * UBO dereferences for that array index.
1137    */
1138   do {
1139      v.progress = false;
1140      visit_list_elements(&v, shader->ir);
1141   } while (v.progress);
1142}
1143