1/*
2 * Copyright 2018 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26
27/**
28 * This utility transforms the shader to support dynamic array indexing
29 * for samplers and constant buffers.
30 * It calculates dynamic array index first and then compare it with each
31 * index and operation will be performed with matching index
32 */
33
34#include "util/u_debug.h"
35#include "util/u_math.h"
36#include "tgsi_info.h"
37#include "tgsi_dynamic_indexing.h"
38#include "tgsi_transform.h"
39#include "tgsi_dump.h"
40#include "pipe/p_state.h"
41
42
43struct dIndexing_transform_context
44{
45   struct tgsi_transform_context base;
46   unsigned orig_num_tmp;
47   unsigned orig_num_imm;
48   unsigned num_const_bufs;
49   unsigned num_samplers;
50   unsigned num_iterations;
51   unsigned const_buf_range[PIPE_MAX_CONSTANT_BUFFERS];
52};
53
54
55static inline struct dIndexing_transform_context *
56dIndexing_transform_context(struct tgsi_transform_context *ctx)
57{
58   return (struct dIndexing_transform_context *) ctx;
59}
60
61
62/**
63 * TGSI declaration transform callback.
64 */
65static void
66dIndexing_decl(struct tgsi_transform_context *ctx,
67               struct tgsi_full_declaration *decl)
68{
69   struct dIndexing_transform_context *dc = dIndexing_transform_context(ctx);
70
71   if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
72      /**
73       * Emit some extra temporary register to use in keeping track of
74       * dynamic index.
75       */
76      dc->orig_num_tmp = decl->Range.Last;
77      decl->Range.Last = decl->Range.Last + 3;
78   }
79   else if (decl->Declaration.File == TGSI_FILE_CONSTANT) {
80      /* Keep track of number of constants in each buffer */
81      dc->const_buf_range[decl->Dim.Index2D] = decl->Range.Last;
82   }
83   ctx->emit_declaration(ctx, decl);
84}
85
86
87/**
88 * TGSI transform prolog callback.
89 */
90static void
91dIndexing_prolog(struct tgsi_transform_context *ctx)
92{
93   tgsi_transform_immediate_int_decl(ctx, 0, 1, 2, 3);
94   tgsi_transform_immediate_int_decl(ctx, 4, 5, 6, 7);
95}
96
97
98/**
99 * This function emits some extra instruction to remove dynamic array
100 * indexing of constant buffers / samplers from the shader.
101 * It calculates dynamic array index first and compare it with each index for
102 * declared constants/samplers.
103 */
104static void
105remove_dynamic_indexes(struct tgsi_transform_context *ctx,
106                       struct tgsi_full_instruction *orig_inst,
107                       const struct tgsi_full_src_register *reg)
108{
109   struct dIndexing_transform_context *dc = dIndexing_transform_context(ctx);
110   int i, j;
111   int tmp_loopIdx = dc->orig_num_tmp + 1;
112   int tmp_cond = dc->orig_num_tmp + 2;
113   int tmp_arrayIdx = dc->orig_num_tmp + 3;
114   int imm_index = dc->orig_num_imm;
115   struct tgsi_full_instruction inst;
116   unsigned INVALID_INDEX = 99999;
117   unsigned file = TGSI_FILE_NULL, index = INVALID_INDEX;
118   unsigned imm_swz_index = INVALID_INDEX;
119
120   /* calculate dynamic array index store it in tmp_arrayIdx.x */
121   inst = tgsi_default_full_instruction();
122   inst.Instruction.Opcode = TGSI_OPCODE_UADD;
123   inst.Instruction.NumDstRegs = 1;
124   tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
125                          tmp_arrayIdx, TGSI_WRITEMASK_X);
126   inst.Instruction.NumSrcRegs = 2;
127   if (reg->Register.File == TGSI_FILE_CONSTANT) {
128      file = reg->DimIndirect.File;
129      index = reg->DimIndirect.Index;
130      imm_swz_index = reg->Dimension.Index;
131   }
132   else if (reg->Register.File == TGSI_FILE_SAMPLER) {
133      file = reg->Indirect.File;
134      index = reg->Indirect.Index;
135      imm_swz_index = reg->Register.Index;
136   }
137   tgsi_transform_src_reg(&inst.Src[0], file,
138                          index, TGSI_SWIZZLE_X,
139                          TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
140   tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE,
141                          imm_index + (imm_swz_index / 4),
142                          imm_swz_index % 4,
143                          imm_swz_index % 4,
144                          imm_swz_index % 4,
145                          imm_swz_index % 4);
146   ctx->emit_instruction(ctx, &inst);
147
148   /* initialize counter to zero: tmp_loopIdx = 0 */
149   inst = tgsi_default_full_instruction();
150   inst.Instruction.Opcode = TGSI_OPCODE_MOV;
151   inst.Instruction.NumDstRegs = 1;
152   tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
153                          tmp_loopIdx, TGSI_WRITEMASK_X);
154   inst.Instruction.NumSrcRegs = 1;
155   tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_IMMEDIATE,
156                          imm_index, TGSI_SWIZZLE_X,
157                          TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
158                          TGSI_SWIZZLE_X);
159   ctx->emit_instruction(ctx, &inst);
160
161   for (i = 0; i < dc->num_iterations; i++) {
162      boolean out_of_bound_index = FALSE;
163      /**
164       * Make sure we are not exceeding index limit of constant buffer
165       *
166       * For example, In declaration, We have
167       *
168       * DCL CONST[0][0..1]
169       * DCL CONST[1][0..2]
170       * DCL CONST[2][0]
171       *
172       * and our dynamic index instruction is
173       * MOV TEMP[0], CONST[ADDR[0].x][1]
174       *
175       * We have to make sure to skip unrolling for CONST[2] because
176       * it has only one constant in the buffer
177       */
178      if ((reg->Register.File == TGSI_FILE_CONSTANT) &&
179          (!reg->Register.Indirect &&
180           (reg->Register.Index > dc->const_buf_range[i]))) {
181         out_of_bound_index = TRUE;
182      }
183
184      if (!out_of_bound_index) {
185         /**
186          * If we have an instruction of the format:
187          * OPCODE dst, src..., CONST[K][foo], src...
188          * where K is dynamic and tmp_loopIdx = i (loopcount),
189          * replace it with:
190          *
191          * if (K == tmp_loopIdx)
192          *    OPCODE dst, src... where src is CONST[i][foo] and i is constant
193          * }
194          *
195          * Similarly, If instruction uses dynamic array index for samplers
196          * e.g. OPCODE dst, src, SAMPL[k] ..
197          * replace it with:
198          * if (K == tmp_loopIdx)
199          *    OPCODE dst, src, SAMPL[i][foo]... where i is constant.
200          * }
201          */
202         inst = tgsi_default_full_instruction();
203         inst.Instruction.Opcode = TGSI_OPCODE_USEQ;
204         inst.Instruction.NumDstRegs = 1;
205         tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
206                                tmp_cond, TGSI_WRITEMASK_X);
207         inst.Instruction.NumSrcRegs = 2;
208         tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
209                                tmp_arrayIdx, TGSI_SWIZZLE_X,
210                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
211                                TGSI_SWIZZLE_X);
212         tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_TEMPORARY,
213                                tmp_loopIdx, TGSI_SWIZZLE_X,
214                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
215                                TGSI_SWIZZLE_X);
216         ctx->emit_instruction(ctx, &inst);
217
218         inst = tgsi_default_full_instruction();
219         inst.Instruction.Opcode = TGSI_OPCODE_UIF;
220         inst.Instruction.NumDstRegs = 0;
221         inst.Instruction.NumSrcRegs = 1;
222         tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
223                                tmp_cond, TGSI_SWIZZLE_X,
224                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
225                                TGSI_SWIZZLE_X);
226         ctx->emit_instruction(ctx, &inst);
227
228         /* emit instruction with new, non-dynamic source registers */
229         inst = *orig_inst;
230         for (j = 0; j < inst.Instruction.NumSrcRegs; j++) {
231            if (inst.Src[j].Dimension.Indirect &&
232                inst.Src[j].Register.File == TGSI_FILE_CONSTANT) {
233               inst.Src[j].Register.Dimension = 1;
234               inst.Src[j].Dimension.Index = i;
235               inst.Src[j].Dimension.Indirect = 0;
236            }
237            else if (inst.Src[j].Register.Indirect &&
238                     inst.Src[j].Register.File == TGSI_FILE_SAMPLER) {
239               inst.Src[j].Register.Indirect = 0;
240               inst.Src[j].Register.Index = i;
241            }
242         }
243         ctx->emit_instruction(ctx, &inst);
244
245         inst = tgsi_default_full_instruction();
246         inst.Instruction.Opcode = TGSI_OPCODE_ENDIF;
247         inst.Instruction.NumDstRegs = 0;
248         inst.Instruction.NumSrcRegs = 0;
249         ctx->emit_instruction(ctx, &inst);
250      }
251
252      /**
253       * Increment counter
254       * UADD tmp_loopIdx.x tmp_loopIdx.x imm(1)
255       */
256      inst = tgsi_default_full_instruction();
257      inst.Instruction.Opcode = TGSI_OPCODE_UADD;
258      inst.Instruction.NumDstRegs = 1;
259      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
260                             tmp_loopIdx, TGSI_WRITEMASK_X);
261      inst.Instruction.NumSrcRegs = 2;
262      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
263                              tmp_loopIdx, TGSI_SWIZZLE_X,
264                              TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
265      tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE, imm_index,
266                             TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y,
267                             TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y);
268
269      ctx->emit_instruction(ctx, &inst);
270   }
271}
272
273
274/**
275 * TGSI instruction transform callback.
276 */
277static void
278dIndexing_inst(struct tgsi_transform_context *ctx,
279               struct tgsi_full_instruction *inst)
280{
281   int i;
282   boolean indexing = FALSE;
283   struct dIndexing_transform_context *dc = dIndexing_transform_context(ctx);
284
285   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
286      struct tgsi_full_src_register *src;
287      src = &inst->Src[i];
288      /* check if constant buffer/sampler is using dynamic index */
289      if ((src->Dimension.Indirect &&
290           src->Register.File == TGSI_FILE_CONSTANT) ||
291          (src->Register.Indirect &&
292           src->Register.File == TGSI_FILE_SAMPLER)) {
293
294         if (indexing)
295            assert("More than one src has dynamic indexing");
296
297         if (src->Register.File == TGSI_FILE_CONSTANT)
298            dc->num_iterations = dc->num_const_bufs;
299         else
300            dc->num_iterations = dc->num_samplers;
301
302         remove_dynamic_indexes(ctx, inst, src);
303         indexing = TRUE;
304      }
305   }
306
307   if (!indexing) {
308      ctx->emit_instruction(ctx, inst);
309   }
310}
311
312/**
313 * TGSI utility to remove dynamic array indexing for constant buffers and
314 * samplers.
315 *
316 * This utility accepts bitmask of declared constant buffers and samplers,
317 * number of immediates used in shader.
318 *
319 * If dynamic array index is used for constant buffers and samplers, this
320 * utility removes those dynamic indexes from shader. It also makes sure
321 * that it has same output as per original shader.
322 * This is achieved by calculating dynamic array index first and then compare
323 * it with each constant buffer/ sampler index and replace that dynamic index
324 * with static index.
325 */
326struct tgsi_token *
327tgsi_remove_dynamic_indexing(const struct tgsi_token *tokens_in,
328                             unsigned const_buffers_declared_bitmask,
329                             unsigned samplers_declared_bitmask,
330                             unsigned imm_count)
331{
332   struct dIndexing_transform_context transform;
333   const uint num_new_tokens = 1000; /* should be enough */
334   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
335   struct tgsi_token *new_tokens;
336
337   /* setup transformation context */
338   memset(&transform, 0, sizeof(transform));
339   transform.base.transform_declaration = dIndexing_decl;
340   transform.base.transform_instruction = dIndexing_inst;
341   transform.base.prolog = dIndexing_prolog;
342
343   transform.orig_num_tmp = 0;
344   transform.orig_num_imm = imm_count;
345   /* get count of declared const buffers and sampler from their bitmasks*/
346   transform.num_const_bufs = log2(const_buffers_declared_bitmask + 1);
347   transform.num_samplers = log2(samplers_declared_bitmask + 1);
348   transform.num_iterations = 0;
349
350   /* allocate new tokens buffer */
351   new_tokens = tgsi_alloc_tokens(new_len);
352   if (!new_tokens)
353      return NULL;
354
355   /* transform the shader */
356   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
357
358   return new_tokens;
359}
360
361
362