1/*
2 * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "ir3_context.h"
28#include "ir3_compiler.h"
29#include "ir3_image.h"
30#include "ir3_nir.h"
31#include "ir3_shader.h"
32
33struct ir3_context *
34ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so)
35{
36   struct ir3_context *ctx = rzalloc(NULL, struct ir3_context);
37
38   if (compiler->gen >= 4) {
39      if (so->type == MESA_SHADER_VERTEX) {
40         ctx->astc_srgb = so->key.vastc_srgb;
41      } else if (so->type == MESA_SHADER_FRAGMENT) {
42         ctx->astc_srgb = so->key.fastc_srgb;
43      }
44
45   } else {
46      if (so->type == MESA_SHADER_VERTEX) {
47         ctx->samples = so->key.vsamples;
48      } else if (so->type == MESA_SHADER_FRAGMENT) {
49         ctx->samples = so->key.fsamples;
50      }
51   }
52
53   if (compiler->gen >= 6) {
54      ctx->funcs = &ir3_a6xx_funcs;
55   } else if (compiler->gen >= 4) {
56      ctx->funcs = &ir3_a4xx_funcs;
57   }
58
59   ctx->compiler = compiler;
60   ctx->so = so;
61   ctx->def_ht =
62      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
63   ctx->block_ht =
64      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
65   ctx->continue_block_ht =
66      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
67   ctx->sel_cond_conversions =
68      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
69
70   /* TODO: maybe generate some sort of bitmask of what key
71    * lowers vs what shader has (ie. no need to lower
72    * texture clamp lowering if no texture sample instrs)..
73    * although should be done further up the stack to avoid
74    * creating duplicate variants..
75    */
76
77   ctx->s = nir_shader_clone(ctx, so->shader->nir);
78   ir3_nir_lower_variant(so, ctx->s);
79
80   /* this needs to be the last pass run, so do this here instead of
81    * in ir3_optimize_nir():
82    */
83   bool progress = false;
84   NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs);
85
86   /* we could need cleanup after lower_locals_to_regs */
87   while (progress) {
88      progress = false;
89      NIR_PASS(progress, ctx->s, nir_opt_algebraic);
90      NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
91   }
92
93   /* We want to lower nir_op_imul as late as possible, to catch also
94    * those generated by earlier passes (e.g, nir_lower_locals_to_regs).
95    * However, we want a final swing of a few passes to have a chance
96    * at optimizing the result.
97    */
98   progress = false;
99   NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
100   while (progress) {
101      progress = false;
102      NIR_PASS(progress, ctx->s, nir_opt_algebraic);
103      NIR_PASS(progress, ctx->s, nir_opt_copy_prop_vars);
104      NIR_PASS(progress, ctx->s, nir_opt_dead_write_vars);
105      NIR_PASS(progress, ctx->s, nir_opt_dce);
106      NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
107   }
108
109   /* Enable the texture pre-fetch feature only a4xx onwards.  But
110    * only enable it on generations that have been tested:
111    */
112   if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gen >= 6))
113      NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch);
114
115   NIR_PASS(progress, ctx->s, nir_lower_phis_to_scalar, true);
116
117   /* Super crude heuristic to limit # of tex prefetch in small
118    * shaders.  This completely ignores loops.. but that's really
119    * not the worst of it's problems.  (A frag shader that has
120    * loops is probably going to be big enough to not trigger a
121    * lower threshold.)
122    *
123    *   1) probably want to do this in terms of ir3 instructions
124    *   2) probably really want to decide this after scheduling
125    *      (or at least pre-RA sched) so we have a rough idea about
126    *      nops, and don't count things that get cp'd away
127    *   3) blob seems to use higher thresholds with a mix of more
128    *      SFU instructions.  Which partly makes sense, more SFU
129    *      instructions probably means you want to get the real
130    *      shader started sooner, but that considers where in the
131    *      shader the SFU instructions are, which blob doesn't seem
132    *      to do.
133    *
134    * This uses more conservative thresholds assuming a more alu
135    * than sfu heavy instruction mix.
136    */
137   if (so->type == MESA_SHADER_FRAGMENT) {
138      nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s);
139
140      unsigned instruction_count = 0;
141      nir_foreach_block (block, fxn) {
142         instruction_count += exec_list_length(&block->instr_list);
143      }
144
145      if (instruction_count < 50) {
146         ctx->prefetch_limit = 2;
147      } else if (instruction_count < 70) {
148         ctx->prefetch_limit = 3;
149      } else {
150         ctx->prefetch_limit = IR3_MAX_SAMPLER_PREFETCH;
151      }
152   }
153
154   if (shader_debug_enabled(so->type)) {
155      mesa_logi("NIR (final form) for %s shader %s:", ir3_shader_stage(so),
156                so->shader->nir->info.name);
157      nir_log_shaderi(ctx->s);
158   }
159
160   ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
161
162   return ctx;
163}
164
165void
166ir3_context_free(struct ir3_context *ctx)
167{
168   ralloc_free(ctx);
169}
170
171/*
172 * Misc helpers
173 */
174
175/* allocate a n element value array (to be populated by caller) and
176 * insert in def_ht
177 */
178struct ir3_instruction **
179ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n)
180{
181   struct ir3_instruction **value =
182      ralloc_array(ctx->def_ht, struct ir3_instruction *, n);
183   _mesa_hash_table_insert(ctx->def_ht, dst, value);
184   return value;
185}
186
187struct ir3_instruction **
188ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
189{
190   struct ir3_instruction **value;
191
192   if (dst->is_ssa) {
193      value = ir3_get_dst_ssa(ctx, &dst->ssa, n);
194   } else {
195      value = ralloc_array(ctx, struct ir3_instruction *, n);
196   }
197
198   /* NOTE: in non-ssa case, we don't really need to store last_dst
199    * but this helps us catch cases where put_dst() call is forgotten
200    */
201   compile_assert(ctx, !ctx->last_dst);
202   ctx->last_dst = value;
203   ctx->last_dst_n = n;
204
205   return value;
206}
207
208struct ir3_instruction *const *
209ir3_get_src(struct ir3_context *ctx, nir_src *src)
210{
211   if (src->is_ssa) {
212      struct hash_entry *entry;
213      entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
214      compile_assert(ctx, entry);
215      return entry->data;
216   } else {
217      nir_register *reg = src->reg.reg;
218      struct ir3_array *arr = ir3_get_array(ctx, reg);
219      unsigned num_components = arr->r->num_components;
220      struct ir3_instruction *addr = NULL;
221      struct ir3_instruction **value =
222         ralloc_array(ctx, struct ir3_instruction *, num_components);
223
224      if (src->reg.indirect)
225         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
226                              reg->num_components);
227
228      for (unsigned i = 0; i < num_components; i++) {
229         unsigned n = src->reg.base_offset * reg->num_components + i;
230         compile_assert(ctx, n < arr->length);
231         value[i] = ir3_create_array_load(ctx, arr, n, addr);
232      }
233
234      return value;
235   }
236}
237
238void
239ir3_put_dst(struct ir3_context *ctx, nir_dest *dst)
240{
241   unsigned bit_size = nir_dest_bit_size(*dst);
242
243   /* add extra mov if dst value is shared reg.. in some cases not all
244    * instructions can read from shared regs, in cases where they can
245    * ir3_cp will clean up the extra mov:
246    */
247   for (unsigned i = 0; i < ctx->last_dst_n; i++) {
248      if (!ctx->last_dst[i])
249         continue;
250      if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) {
251         ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32);
252      }
253   }
254
255   /* Note: 1-bit bools are stored in 32-bit regs */
256   if (bit_size == 16) {
257      for (unsigned i = 0; i < ctx->last_dst_n; i++) {
258         struct ir3_instruction *dst = ctx->last_dst[i];
259         ir3_set_dst_type(dst, true);
260         ir3_fixup_src_type(dst);
261         if (dst->opc == OPC_META_SPLIT) {
262            ir3_set_dst_type(ssa(dst->srcs[0]), true);
263            ir3_fixup_src_type(ssa(dst->srcs[0]));
264            dst->srcs[0]->flags |= IR3_REG_HALF;
265         }
266      }
267   }
268
269   if (!dst->is_ssa) {
270      nir_register *reg = dst->reg.reg;
271      struct ir3_array *arr = ir3_get_array(ctx, reg);
272      unsigned num_components = ctx->last_dst_n;
273      struct ir3_instruction *addr = NULL;
274
275      if (dst->reg.indirect)
276         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
277                              reg->num_components);
278
279      for (unsigned i = 0; i < num_components; i++) {
280         unsigned n = dst->reg.base_offset * reg->num_components + i;
281         compile_assert(ctx, n < arr->length);
282         if (!ctx->last_dst[i])
283            continue;
284         ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr);
285      }
286
287      ralloc_free(ctx->last_dst);
288   }
289
290   ctx->last_dst = NULL;
291   ctx->last_dst_n = 0;
292}
293
294static unsigned
295dest_flags(struct ir3_instruction *instr)
296{
297   return instr->dsts[0]->flags & (IR3_REG_HALF | IR3_REG_SHARED);
298}
299
300struct ir3_instruction *
301ir3_create_collect(struct ir3_block *block, struct ir3_instruction *const *arr,
302                   unsigned arrsz)
303{
304   struct ir3_instruction *collect;
305
306   if (arrsz == 0)
307      return NULL;
308
309   unsigned flags = dest_flags(arr[0]);
310
311   collect = ir3_instr_create(block, OPC_META_COLLECT, 1, arrsz);
312   __ssa_dst(collect)->flags |= flags;
313   for (unsigned i = 0; i < arrsz; i++) {
314      struct ir3_instruction *elem = arr[i];
315
316      /* Since arrays are pre-colored in RA, we can't assume that
317       * things will end up in the right place.  (Ie. if a collect
318       * joins elements from two different arrays.)  So insert an
319       * extra mov.
320       *
321       * We could possibly skip this if all the collected elements
322       * are contiguous elements in a single array.. not sure how
323       * likely that is to happen.
324       *
325       * Fixes a problem with glamor shaders, that in effect do
326       * something like:
327       *
328       *   if (foo)
329       *     texcoord = ..
330       *   else
331       *     texcoord = ..
332       *   color = texture2D(tex, texcoord);
333       *
334       * In this case, texcoord will end up as nir registers (which
335       * translate to ir3 array's of length 1.  And we can't assume
336       * the two (or more) arrays will get allocated in consecutive
337       * scalar registers.
338       *
339       */
340      if (elem->dsts[0]->flags & IR3_REG_ARRAY) {
341         type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
342         elem = ir3_MOV(block, elem, type);
343      }
344
345      debug_assert(dest_flags(elem) == flags);
346      __ssa_src(collect, elem, flags);
347   }
348
349   collect->dsts[0]->wrmask = MASK(arrsz);
350
351   return collect;
352}
353
354/* helper for instructions that produce multiple consecutive scalar
355 * outputs which need to have a split meta instruction inserted
356 */
357void
358ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
359               struct ir3_instruction *src, unsigned base, unsigned n)
360{
361   if ((n == 1) && (src->dsts[0]->wrmask == 0x1) &&
362       /* setup_input needs ir3_split_dest to generate a SPLIT instruction */
363       src->opc != OPC_META_INPUT) {
364      dst[0] = src;
365      return;
366   }
367
368   if (src->opc == OPC_META_COLLECT) {
369      debug_assert((base + n) <= src->srcs_count);
370
371      for (int i = 0; i < n; i++) {
372         dst[i] = ssa(src->srcs[i + base]);
373      }
374
375      return;
376   }
377
378   unsigned flags = dest_flags(src);
379
380   for (int i = 0, j = 0; i < n; i++) {
381      struct ir3_instruction *split =
382         ir3_instr_create(block, OPC_META_SPLIT, 1, 1);
383      __ssa_dst(split)->flags |= flags;
384      __ssa_src(split, src, flags);
385      split->split.off = i + base;
386
387      if (src->dsts[0]->wrmask & (1 << (i + base)))
388         dst[j++] = split;
389   }
390}
391
392NORETURN void
393ir3_context_error(struct ir3_context *ctx, const char *format, ...)
394{
395   struct hash_table *errors = NULL;
396   va_list ap;
397   va_start(ap, format);
398   if (ctx->cur_instr) {
399      errors = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
400                                       _mesa_key_pointer_equal);
401      char *msg = ralloc_vasprintf(errors, format, ap);
402      _mesa_hash_table_insert(errors, ctx->cur_instr, msg);
403   } else {
404      mesa_loge_v(format, ap);
405   }
406   va_end(ap);
407   nir_log_shader_annotated(ctx->s, errors);
408   ralloc_free(errors);
409   ctx->error = true;
410   unreachable("");
411}
412
413static struct ir3_instruction *
414create_addr0(struct ir3_block *block, struct ir3_instruction *src, int align)
415{
416   struct ir3_instruction *instr, *immed;
417
418   instr = ir3_COV(block, src, TYPE_U32, TYPE_S16);
419
420   switch (align) {
421   case 1:
422      /* src *= 1: */
423      break;
424   case 2:
425      /* src *= 2	=> src <<= 1: */
426      immed = create_immed_typed(block, 1, TYPE_S16);
427      instr = ir3_SHL_B(block, instr, 0, immed, 0);
428      break;
429   case 3:
430      /* src *= 3: */
431      immed = create_immed_typed(block, 3, TYPE_S16);
432      instr = ir3_MULL_U(block, instr, 0, immed, 0);
433      break;
434   case 4:
435      /* src *= 4 => src <<= 2: */
436      immed = create_immed_typed(block, 2, TYPE_S16);
437      instr = ir3_SHL_B(block, instr, 0, immed, 0);
438      break;
439   default:
440      unreachable("bad align");
441      return NULL;
442   }
443
444   instr->dsts[0]->flags |= IR3_REG_HALF;
445
446   instr = ir3_MOV(block, instr, TYPE_S16);
447   instr->dsts[0]->num = regid(REG_A0, 0);
448
449   return instr;
450}
451
452static struct ir3_instruction *
453create_addr1(struct ir3_block *block, unsigned const_val)
454{
455   struct ir3_instruction *immed =
456      create_immed_typed(block, const_val, TYPE_U16);
457   struct ir3_instruction *instr = ir3_MOV(block, immed, TYPE_U16);
458   instr->dsts[0]->num = regid(REG_A0, 1);
459   return instr;
460}
461
462/* caches addr values to avoid generating multiple cov/shl/mova
463 * sequences for each use of a given NIR level src as address
464 */
465struct ir3_instruction *
466ir3_get_addr0(struct ir3_context *ctx, struct ir3_instruction *src, int align)
467{
468   struct ir3_instruction *addr;
469   unsigned idx = align - 1;
470
471   compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr0_ht));
472
473   if (!ctx->addr0_ht[idx]) {
474      ctx->addr0_ht[idx] = _mesa_hash_table_create(ctx, _mesa_hash_pointer,
475                                                   _mesa_key_pointer_equal);
476   } else {
477      struct hash_entry *entry;
478      entry = _mesa_hash_table_search(ctx->addr0_ht[idx], src);
479      if (entry)
480         return entry->data;
481   }
482
483   addr = create_addr0(ctx->block, src, align);
484   _mesa_hash_table_insert(ctx->addr0_ht[idx], src, addr);
485
486   return addr;
487}
488
489/* Similar to ir3_get_addr0, but for a1.x. */
490struct ir3_instruction *
491ir3_get_addr1(struct ir3_context *ctx, unsigned const_val)
492{
493   struct ir3_instruction *addr;
494
495   if (!ctx->addr1_ht) {
496      ctx->addr1_ht = _mesa_hash_table_u64_create(ctx);
497   } else {
498      addr = _mesa_hash_table_u64_search(ctx->addr1_ht, const_val);
499      if (addr)
500         return addr;
501   }
502
503   addr = create_addr1(ctx->block, const_val);
504   _mesa_hash_table_u64_insert(ctx->addr1_ht, const_val, addr);
505
506   return addr;
507}
508
509struct ir3_instruction *
510ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
511{
512   struct ir3_block *b = ctx->block;
513   struct ir3_instruction *cond;
514
515   /* NOTE: only cmps.*.* can write p0.x: */
516   cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0);
517   cond->cat2.condition = IR3_COND_NE;
518
519   /* condition always goes in predicate register: */
520   cond->dsts[0]->num = regid(REG_P0, 0);
521   cond->dsts[0]->flags &= ~IR3_REG_SSA;
522
523   return cond;
524}
525
526/*
527 * Array helpers
528 */
529
530void
531ir3_declare_array(struct ir3_context *ctx, nir_register *reg)
532{
533   struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
534   arr->id = ++ctx->num_arrays;
535   /* NOTE: sometimes we get non array regs, for example for arrays of
536    * length 1.  See fs-const-array-of-struct-of-array.shader_test.  So
537    * treat a non-array as if it was an array of length 1.
538    *
539    * It would be nice if there was a nir pass to convert arrays of
540    * length 1 to ssa.
541    */
542   arr->length = reg->num_components * MAX2(1, reg->num_array_elems);
543   compile_assert(ctx, arr->length > 0);
544   arr->r = reg;
545   arr->half = reg->bit_size <= 16;
546   // HACK one-bit bools still end up as 32b:
547   if (reg->bit_size == 1)
548      arr->half = false;
549   list_addtail(&arr->node, &ctx->ir->array_list);
550}
551
552struct ir3_array *
553ir3_get_array(struct ir3_context *ctx, nir_register *reg)
554{
555   foreach_array (arr, &ctx->ir->array_list) {
556      if (arr->r == reg)
557         return arr;
558   }
559   ir3_context_error(ctx, "bogus reg: r%d\n", reg->index);
560   return NULL;
561}
562
563/* relative (indirect) if address!=NULL */
564struct ir3_instruction *
565ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
566                      struct ir3_instruction *address)
567{
568   struct ir3_block *block = ctx->block;
569   struct ir3_instruction *mov;
570   struct ir3_register *src;
571   unsigned flags = 0;
572
573   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
574   if (arr->half) {
575      mov->cat1.src_type = TYPE_U16;
576      mov->cat1.dst_type = TYPE_U16;
577      flags |= IR3_REG_HALF;
578   } else {
579      mov->cat1.src_type = TYPE_U32;
580      mov->cat1.dst_type = TYPE_U32;
581   }
582
583   mov->barrier_class = IR3_BARRIER_ARRAY_R;
584   mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
585   __ssa_dst(mov)->flags |= flags;
586   src = ir3_src_create(mov, 0,
587                        IR3_REG_ARRAY | COND(address, IR3_REG_RELATIV) | flags);
588   src->def = (arr->last_write && arr->last_write->instr->block == block)
589                 ? arr->last_write
590                 : NULL;
591   src->size = arr->length;
592   src->array.id = arr->id;
593   src->array.offset = n;
594   src->array.base = INVALID_REG;
595
596   if (address)
597      ir3_instr_set_address(mov, address);
598
599   return mov;
600}
601
602/* relative (indirect) if address!=NULL */
603void
604ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
605                       struct ir3_instruction *src,
606                       struct ir3_instruction *address)
607{
608   struct ir3_block *block = ctx->block;
609   struct ir3_instruction *mov;
610   struct ir3_register *dst;
611   unsigned flags = 0;
612
613   /* if not relative store, don't create an extra mov, since that
614    * ends up being difficult for cp to remove.
615    *
616    * Also, don't skip the mov if the src is meta (like fanout/split),
617    * since that creates a situation that RA can't really handle properly.
618    */
619   if (!address && !is_meta(src)) {
620      dst = src->dsts[0];
621
622      src->barrier_class |= IR3_BARRIER_ARRAY_W;
623      src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
624
625      dst->flags |= IR3_REG_ARRAY;
626      dst->size = arr->length;
627      dst->array.id = arr->id;
628      dst->array.offset = n;
629      dst->array.base = INVALID_REG;
630
631      if (arr->last_write && arr->last_write->instr->block == src->block)
632         ir3_reg_set_last_array(src, dst, arr->last_write);
633
634      arr->last_write = dst;
635
636      array_insert(block, block->keeps, src);
637
638      return;
639   }
640
641   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
642   if (arr->half) {
643      mov->cat1.src_type = TYPE_U16;
644      mov->cat1.dst_type = TYPE_U16;
645      flags |= IR3_REG_HALF;
646   } else {
647      mov->cat1.src_type = TYPE_U32;
648      mov->cat1.dst_type = TYPE_U32;
649   }
650   mov->barrier_class = IR3_BARRIER_ARRAY_W;
651   mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
652   dst = ir3_dst_create(
653      mov, 0,
654      IR3_REG_SSA | IR3_REG_ARRAY | flags | COND(address, IR3_REG_RELATIV));
655   dst->instr = mov;
656   dst->size = arr->length;
657   dst->array.id = arr->id;
658   dst->array.offset = n;
659   dst->array.base = INVALID_REG;
660   ir3_src_create(mov, 0, IR3_REG_SSA | flags)->def = src->dsts[0];
661
662   if (arr->last_write && arr->last_write->instr->block == block)
663      ir3_reg_set_last_array(mov, dst, arr->last_write);
664
665   if (address)
666      ir3_instr_set_address(mov, address);
667
668   arr->last_write = dst;
669
670   /* the array store may only matter to something in an earlier
671    * block (ie. loops), but since arrays are not in SSA, depth
672    * pass won't know this.. so keep all array stores:
673    */
674   array_insert(block, block->keeps, mov);
675}
676