1/*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include "compiler/nir/nir.h"
25#include "compiler/nir/nir_builder.h"
26#include "gl_nir.h"
27#include "ir_uniform.h"
28
29#include "main/compiler.h"
30#include "main/mtypes.h"
31
32static nir_ssa_def *
33get_block_array_index(nir_builder *b, nir_deref_instr *deref,
34                      const struct gl_shader_program *shader_program)
35{
36   unsigned array_elements = 1;
37
38   /* Build a block name such as "block[2][0]" for finding in the list of
39    * blocks later on as well as an optional dynamic index which gets added
40    * to the block index later.
41    */
42   const char *block_name = "";
43   nir_ssa_def *nonconst_index = NULL;
44   while (deref->deref_type == nir_deref_type_array) {
45      nir_deref_instr *parent = nir_deref_instr_parent(deref);
46      assert(parent && glsl_type_is_array(parent->type));
47      unsigned arr_size = glsl_get_length(parent->type);
48
49      if (nir_src_is_const(deref->arr.index)) {
50         unsigned arr_index = nir_src_as_uint(deref->arr.index);
51         arr_index = MIN2(arr_index, arr_size - 1);
52
53         /* We're walking the deref from the tail so prepend the array index */
54         block_name = ralloc_asprintf(b->shader, "[%u]%s", arr_index,
55                                      block_name);
56      } else {
57         nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
58         arr_index = nir_umin(b, arr_index, nir_imm_int(b, arr_size - 1));
59         nir_ssa_def *arr_offset = nir_imul_imm(b, arr_index, array_elements);
60         if (nonconst_index)
61            nonconst_index = nir_iadd(b, nonconst_index, arr_offset);
62         else
63            nonconst_index = arr_offset;
64
65         /* We're walking the deref from the tail so prepend the array index */
66         block_name = ralloc_asprintf(b->shader, "[0]%s", block_name);
67      }
68
69      array_elements *= arr_size;
70      deref = parent;
71   }
72
73   assert(deref->deref_type == nir_deref_type_var);
74   block_name = ralloc_asprintf(b->shader, "%s%s",
75                                glsl_get_type_name(deref->var->interface_type),
76                                block_name);
77
78   struct gl_linked_shader *linked_shader =
79      shader_program->_LinkedShaders[b->shader->info.stage];
80
81   unsigned num_blocks;
82   struct gl_uniform_block **blocks;
83   if (deref->mode == nir_var_mem_ubo) {
84      num_blocks = linked_shader->Program->info.num_ubos;
85      blocks = linked_shader->Program->sh.UniformBlocks;
86   } else {
87      assert(deref->mode == nir_var_mem_ssbo);
88      num_blocks = linked_shader->Program->info.num_ssbos;
89      blocks = linked_shader->Program->sh.ShaderStorageBlocks;
90   }
91
92   for (unsigned i = 0; i < num_blocks; i++) {
93      if (strcmp(block_name, blocks[i]->Name) == 0) {
94         if (nonconst_index)
95            return nir_iadd_imm(b, nonconst_index, i);
96         else
97            return nir_imm_int(b, i);
98      }
99   }
100
101   unreachable("Failed to find the block by name");
102}
103
104static void
105get_block_index_offset(nir_variable *var,
106                       const struct gl_shader_program *shader_program,
107                       gl_shader_stage stage,
108                       unsigned *index, unsigned *offset)
109{
110
111   struct gl_linked_shader *linked_shader =
112      shader_program->_LinkedShaders[stage];
113
114   unsigned num_blocks;
115   struct gl_uniform_block **blocks;
116   if (var->data.mode == nir_var_mem_ubo) {
117      num_blocks = linked_shader->Program->info.num_ubos;
118      blocks = linked_shader->Program->sh.UniformBlocks;
119   } else {
120      assert(var->data.mode == nir_var_mem_ssbo);
121      num_blocks = linked_shader->Program->info.num_ssbos;
122      blocks = linked_shader->Program->sh.ShaderStorageBlocks;
123   }
124
125   const char *block_name = glsl_get_type_name(var->interface_type);
126   for (unsigned i = 0; i < num_blocks; i++) {
127      if (strcmp(block_name, blocks[i]->Name) == 0) {
128         *index = i;
129         *offset = blocks[i]->Uniforms[var->data.location].Offset;
130         return;
131      }
132   }
133
134   unreachable("Failed to find the block by name");
135}
136
137static bool
138lower_buffer_interface_derefs_impl(nir_function_impl *impl,
139                                   const struct gl_shader_program *shader_program)
140{
141   bool progress = false;
142
143   nir_builder b;
144   nir_builder_init(&b, impl);
145
146   nir_foreach_block(block, impl) {
147      nir_foreach_instr_safe(instr, block) {
148         switch (instr->type) {
149         case nir_instr_type_deref: {
150            nir_deref_instr *deref = nir_instr_as_deref(instr);
151            if (!(deref->mode & (nir_var_mem_ubo | nir_var_mem_ssbo)))
152               break;
153
154            /* We use nir_address_format_32bit_index_offset */
155            assert(deref->dest.is_ssa);
156            assert(deref->dest.ssa.bit_size == 32);
157            deref->dest.ssa.num_components = 2;
158
159            progress = true;
160
161            b.cursor = nir_before_instr(&deref->instr);
162
163            nir_ssa_def *ptr;
164            if (deref->deref_type == nir_deref_type_var &&
165                !glsl_type_is_interface(glsl_without_array(deref->var->type))) {
166               /* This variable is contained in an interface block rather than
167                * containing one.  We need the block index and its offset
168                * inside that block
169                */
170               unsigned index, offset;
171               get_block_index_offset(deref->var, shader_program,
172                                      b.shader->info.stage,
173                                      &index, &offset);
174               ptr = nir_imm_ivec2(&b, index, offset);
175            } else if (glsl_type_is_interface(deref->type)) {
176               /* This is the last deref before the block boundary.
177                * Everything after this point is a byte offset and will be
178                * handled by nir_lower_explicit_io().
179                */
180               nir_ssa_def *index = get_block_array_index(&b, deref,
181                                                          shader_program);
182               ptr = nir_vec2(&b, index, nir_imm_int(&b, 0));
183            } else {
184               /* This will get handled by nir_lower_explicit_io(). */
185               break;
186            }
187
188            nir_deref_instr *cast = nir_build_deref_cast(&b, ptr, deref->mode,
189                                                         deref->type, 0);
190            nir_ssa_def_rewrite_uses(&deref->dest.ssa,
191                                     nir_src_for_ssa(&cast->dest.ssa));
192            nir_deref_instr_remove_if_unused(deref);
193            break;
194         }
195
196         case nir_instr_type_intrinsic: {
197            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
198            switch (intrin->intrinsic) {
199            case nir_intrinsic_load_deref: {
200               nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
201               if (!(deref->mode & (nir_var_mem_ubo | nir_var_mem_ssbo)))
202                  break;
203
204               /* UBO and SSBO Booleans are 32-bit integers where any non-zero
205                * value is considered true.  NIR Booleans, on the other hand
206                * are 1-bit values until you get to a very late stage of the
207                * compilation process.  We need to turn those 1-bit loads into
208                * a 32-bit load wrapped in an i2b to get a proper NIR boolean
209                * from the SSBO.
210                */
211               if (glsl_type_is_boolean(deref->type)) {
212                  assert(intrin->dest.is_ssa);
213                  b.cursor = nir_after_instr(&intrin->instr);
214                  intrin->dest.ssa.bit_size = 32;
215                  nir_ssa_def *bval = nir_i2b(&b, &intrin->dest.ssa);
216                  nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
217                                                 nir_src_for_ssa(bval),
218                                                 bval->parent_instr);
219                  progress = true;
220               }
221               break;
222            }
223
224            case nir_intrinsic_store_deref: {
225               nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
226               if (!(deref->mode & (nir_var_mem_ubo | nir_var_mem_ssbo)))
227                  break;
228
229               /* SSBO Booleans are 32-bit integers where any non-zero value
230                * is considered true.  NIR Booleans, on the other hand are
231                * 1-bit values until you get to a very late stage of the
232                * compilation process.  We need to turn those 1-bit stores
233                * into a b2i32 followed by a 32-bit store.  Technically the
234                * value we write doesn't have to be 0/1 so once Booleans are
235                * lowered to 32-bit values, we have an unneeded sanitation
236                * step but in practice it doesn't cost much.
237                */
238               if (glsl_type_is_boolean(deref->type)) {
239                  assert(intrin->src[1].is_ssa);
240                  b.cursor = nir_before_instr(&intrin->instr);
241                  nir_ssa_def *ival = nir_b2i32(&b, intrin->src[1].ssa);
242                  nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
243                                        nir_src_for_ssa(ival));
244                  progress = true;
245               }
246               break;
247            }
248
249            case nir_intrinsic_copy_deref:
250               unreachable("copy_deref should be lowered by now");
251               break;
252
253            default:
254               /* Nothing to do */
255               break;
256            }
257            break;
258         }
259
260         default:
261            break; /* Nothing to do */
262         }
263      }
264   }
265
266   if (progress) {
267      nir_metadata_preserve(impl, nir_metadata_block_index |
268                                  nir_metadata_dominance);
269   }
270
271   return progress;
272}
273
274bool
275gl_nir_lower_buffers(nir_shader *shader,
276                     const struct gl_shader_program *shader_program)
277{
278   bool progress = false;
279
280   /* First, we lower the derefs to turn block variable and array derefs into
281    * a nir_address_format_32bit_index_offset pointer.  From there forward,
282    * we leave the derefs in place and let nir_lower_explicit_io handle them.
283    */
284   nir_foreach_function(function, shader) {
285      if (function->impl &&
286          lower_buffer_interface_derefs_impl(function->impl, shader_program))
287         progress = true;
288   }
289
290   /* If that did something, we validate and then call nir_lower_explicit_io
291    * to finish the process.
292    */
293   if (progress) {
294      nir_validate_shader(shader, "Lowering buffer interface derefs");
295      nir_lower_explicit_io(shader, nir_var_mem_ubo | nir_var_mem_ssbo,
296                            nir_address_format_32bit_index_offset);
297   }
298
299   return progress;
300}
301