1/* 2 * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "ir3_context.h" 28#include "ir3_compiler.h" 29#include "ir3_image.h" 30#include "ir3_nir.h" 31#include "ir3_shader.h" 32 33struct ir3_context * 34ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so) 35{ 36 struct ir3_context *ctx = rzalloc(NULL, struct ir3_context); 37 38 if (compiler->gen >= 4) { 39 if (so->type == MESA_SHADER_VERTEX) { 40 ctx->astc_srgb = so->key.vastc_srgb; 41 } else if (so->type == MESA_SHADER_FRAGMENT) { 42 ctx->astc_srgb = so->key.fastc_srgb; 43 } 44 45 } else { 46 if (so->type == MESA_SHADER_VERTEX) { 47 ctx->samples = so->key.vsamples; 48 } else if (so->type == MESA_SHADER_FRAGMENT) { 49 ctx->samples = so->key.fsamples; 50 } 51 } 52 53 if (compiler->gen >= 6) { 54 ctx->funcs = &ir3_a6xx_funcs; 55 } else if (compiler->gen >= 4) { 56 ctx->funcs = &ir3_a4xx_funcs; 57 } 58 59 ctx->compiler = compiler; 60 ctx->so = so; 61 ctx->def_ht = 62 _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 63 ctx->block_ht = 64 _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 65 ctx->continue_block_ht = 66 _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 67 ctx->sel_cond_conversions = 68 _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 69 70 /* TODO: maybe generate some sort of bitmask of what key 71 * lowers vs what shader has (ie. no need to lower 72 * texture clamp lowering if no texture sample instrs).. 73 * although should be done further up the stack to avoid 74 * creating duplicate variants.. 75 */ 76 77 ctx->s = nir_shader_clone(ctx, so->shader->nir); 78 ir3_nir_lower_variant(so, ctx->s); 79 80 /* this needs to be the last pass run, so do this here instead of 81 * in ir3_optimize_nir(): 82 */ 83 bool progress = false; 84 NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs); 85 86 /* we could need cleanup after lower_locals_to_regs */ 87 while (progress) { 88 progress = false; 89 NIR_PASS(progress, ctx->s, nir_opt_algebraic); 90 NIR_PASS(progress, ctx->s, nir_opt_constant_folding); 91 } 92 93 /* We want to lower nir_op_imul as late as possible, to catch also 94 * those generated by earlier passes (e.g, nir_lower_locals_to_regs). 95 * However, we want a final swing of a few passes to have a chance 96 * at optimizing the result. 97 */ 98 progress = false; 99 NIR_PASS(progress, ctx->s, ir3_nir_lower_imul); 100 while (progress) { 101 progress = false; 102 NIR_PASS(progress, ctx->s, nir_opt_algebraic); 103 NIR_PASS(progress, ctx->s, nir_opt_copy_prop_vars); 104 NIR_PASS(progress, ctx->s, nir_opt_dead_write_vars); 105 NIR_PASS(progress, ctx->s, nir_opt_dce); 106 NIR_PASS(progress, ctx->s, nir_opt_constant_folding); 107 } 108 109 /* Enable the texture pre-fetch feature only a4xx onwards. But 110 * only enable it on generations that have been tested: 111 */ 112 if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gen >= 6)) 113 NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch); 114 115 NIR_PASS(progress, ctx->s, nir_lower_phis_to_scalar, true); 116 117 /* Super crude heuristic to limit # of tex prefetch in small 118 * shaders. This completely ignores loops.. but that's really 119 * not the worst of it's problems. (A frag shader that has 120 * loops is probably going to be big enough to not trigger a 121 * lower threshold.) 122 * 123 * 1) probably want to do this in terms of ir3 instructions 124 * 2) probably really want to decide this after scheduling 125 * (or at least pre-RA sched) so we have a rough idea about 126 * nops, and don't count things that get cp'd away 127 * 3) blob seems to use higher thresholds with a mix of more 128 * SFU instructions. Which partly makes sense, more SFU 129 * instructions probably means you want to get the real 130 * shader started sooner, but that considers where in the 131 * shader the SFU instructions are, which blob doesn't seem 132 * to do. 133 * 134 * This uses more conservative thresholds assuming a more alu 135 * than sfu heavy instruction mix. 136 */ 137 if (so->type == MESA_SHADER_FRAGMENT) { 138 nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s); 139 140 unsigned instruction_count = 0; 141 nir_foreach_block (block, fxn) { 142 instruction_count += exec_list_length(&block->instr_list); 143 } 144 145 if (instruction_count < 50) { 146 ctx->prefetch_limit = 2; 147 } else if (instruction_count < 70) { 148 ctx->prefetch_limit = 3; 149 } else { 150 ctx->prefetch_limit = IR3_MAX_SAMPLER_PREFETCH; 151 } 152 } 153 154 if (shader_debug_enabled(so->type)) { 155 mesa_logi("NIR (final form) for %s shader %s:", ir3_shader_stage(so), 156 so->shader->nir->info.name); 157 nir_log_shaderi(ctx->s); 158 } 159 160 ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); 161 162 return ctx; 163} 164 165void 166ir3_context_free(struct ir3_context *ctx) 167{ 168 ralloc_free(ctx); 169} 170 171/* 172 * Misc helpers 173 */ 174 175/* allocate a n element value array (to be populated by caller) and 176 * insert in def_ht 177 */ 178struct ir3_instruction ** 179ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n) 180{ 181 struct ir3_instruction **value = 182 ralloc_array(ctx->def_ht, struct ir3_instruction *, n); 183 _mesa_hash_table_insert(ctx->def_ht, dst, value); 184 return value; 185} 186 187struct ir3_instruction ** 188ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n) 189{ 190 struct ir3_instruction **value; 191 192 if (dst->is_ssa) { 193 value = ir3_get_dst_ssa(ctx, &dst->ssa, n); 194 } else { 195 value = ralloc_array(ctx, struct ir3_instruction *, n); 196 } 197 198 /* NOTE: in non-ssa case, we don't really need to store last_dst 199 * but this helps us catch cases where put_dst() call is forgotten 200 */ 201 compile_assert(ctx, !ctx->last_dst); 202 ctx->last_dst = value; 203 ctx->last_dst_n = n; 204 205 return value; 206} 207 208struct ir3_instruction *const * 209ir3_get_src(struct ir3_context *ctx, nir_src *src) 210{ 211 if (src->is_ssa) { 212 struct hash_entry *entry; 213 entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); 214 compile_assert(ctx, entry); 215 return entry->data; 216 } else { 217 nir_register *reg = src->reg.reg; 218 struct ir3_array *arr = ir3_get_array(ctx, reg); 219 unsigned num_components = arr->r->num_components; 220 struct ir3_instruction *addr = NULL; 221 struct ir3_instruction **value = 222 ralloc_array(ctx, struct ir3_instruction *, num_components); 223 224 if (src->reg.indirect) 225 addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0], 226 reg->num_components); 227 228 for (unsigned i = 0; i < num_components; i++) { 229 unsigned n = src->reg.base_offset * reg->num_components + i; 230 compile_assert(ctx, n < arr->length); 231 value[i] = ir3_create_array_load(ctx, arr, n, addr); 232 } 233 234 return value; 235 } 236} 237 238void 239ir3_put_dst(struct ir3_context *ctx, nir_dest *dst) 240{ 241 unsigned bit_size = nir_dest_bit_size(*dst); 242 243 /* add extra mov if dst value is shared reg.. in some cases not all 244 * instructions can read from shared regs, in cases where they can 245 * ir3_cp will clean up the extra mov: 246 */ 247 for (unsigned i = 0; i < ctx->last_dst_n; i++) { 248 if (!ctx->last_dst[i]) 249 continue; 250 if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) { 251 ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32); 252 } 253 } 254 255 /* Note: 1-bit bools are stored in 32-bit regs */ 256 if (bit_size == 16) { 257 for (unsigned i = 0; i < ctx->last_dst_n; i++) { 258 struct ir3_instruction *dst = ctx->last_dst[i]; 259 ir3_set_dst_type(dst, true); 260 ir3_fixup_src_type(dst); 261 if (dst->opc == OPC_META_SPLIT) { 262 ir3_set_dst_type(ssa(dst->srcs[0]), true); 263 ir3_fixup_src_type(ssa(dst->srcs[0])); 264 dst->srcs[0]->flags |= IR3_REG_HALF; 265 } 266 } 267 } 268 269 if (!dst->is_ssa) { 270 nir_register *reg = dst->reg.reg; 271 struct ir3_array *arr = ir3_get_array(ctx, reg); 272 unsigned num_components = ctx->last_dst_n; 273 struct ir3_instruction *addr = NULL; 274 275 if (dst->reg.indirect) 276 addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0], 277 reg->num_components); 278 279 for (unsigned i = 0; i < num_components; i++) { 280 unsigned n = dst->reg.base_offset * reg->num_components + i; 281 compile_assert(ctx, n < arr->length); 282 if (!ctx->last_dst[i]) 283 continue; 284 ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr); 285 } 286 287 ralloc_free(ctx->last_dst); 288 } 289 290 ctx->last_dst = NULL; 291 ctx->last_dst_n = 0; 292} 293 294static unsigned 295dest_flags(struct ir3_instruction *instr) 296{ 297 return instr->dsts[0]->flags & (IR3_REG_HALF | IR3_REG_SHARED); 298} 299 300struct ir3_instruction * 301ir3_create_collect(struct ir3_block *block, struct ir3_instruction *const *arr, 302 unsigned arrsz) 303{ 304 struct ir3_instruction *collect; 305 306 if (arrsz == 0) 307 return NULL; 308 309 unsigned flags = dest_flags(arr[0]); 310 311 collect = ir3_instr_create(block, OPC_META_COLLECT, 1, arrsz); 312 __ssa_dst(collect)->flags |= flags; 313 for (unsigned i = 0; i < arrsz; i++) { 314 struct ir3_instruction *elem = arr[i]; 315 316 /* Since arrays are pre-colored in RA, we can't assume that 317 * things will end up in the right place. (Ie. if a collect 318 * joins elements from two different arrays.) So insert an 319 * extra mov. 320 * 321 * We could possibly skip this if all the collected elements 322 * are contiguous elements in a single array.. not sure how 323 * likely that is to happen. 324 * 325 * Fixes a problem with glamor shaders, that in effect do 326 * something like: 327 * 328 * if (foo) 329 * texcoord = .. 330 * else 331 * texcoord = .. 332 * color = texture2D(tex, texcoord); 333 * 334 * In this case, texcoord will end up as nir registers (which 335 * translate to ir3 array's of length 1. And we can't assume 336 * the two (or more) arrays will get allocated in consecutive 337 * scalar registers. 338 * 339 */ 340 if (elem->dsts[0]->flags & IR3_REG_ARRAY) { 341 type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 342 elem = ir3_MOV(block, elem, type); 343 } 344 345 debug_assert(dest_flags(elem) == flags); 346 __ssa_src(collect, elem, flags); 347 } 348 349 collect->dsts[0]->wrmask = MASK(arrsz); 350 351 return collect; 352} 353 354/* helper for instructions that produce multiple consecutive scalar 355 * outputs which need to have a split meta instruction inserted 356 */ 357void 358ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst, 359 struct ir3_instruction *src, unsigned base, unsigned n) 360{ 361 if ((n == 1) && (src->dsts[0]->wrmask == 0x1) && 362 /* setup_input needs ir3_split_dest to generate a SPLIT instruction */ 363 src->opc != OPC_META_INPUT) { 364 dst[0] = src; 365 return; 366 } 367 368 if (src->opc == OPC_META_COLLECT) { 369 debug_assert((base + n) <= src->srcs_count); 370 371 for (int i = 0; i < n; i++) { 372 dst[i] = ssa(src->srcs[i + base]); 373 } 374 375 return; 376 } 377 378 unsigned flags = dest_flags(src); 379 380 for (int i = 0, j = 0; i < n; i++) { 381 struct ir3_instruction *split = 382 ir3_instr_create(block, OPC_META_SPLIT, 1, 1); 383 __ssa_dst(split)->flags |= flags; 384 __ssa_src(split, src, flags); 385 split->split.off = i + base; 386 387 if (src->dsts[0]->wrmask & (1 << (i + base))) 388 dst[j++] = split; 389 } 390} 391 392NORETURN void 393ir3_context_error(struct ir3_context *ctx, const char *format, ...) 394{ 395 struct hash_table *errors = NULL; 396 va_list ap; 397 va_start(ap, format); 398 if (ctx->cur_instr) { 399 errors = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 400 _mesa_key_pointer_equal); 401 char *msg = ralloc_vasprintf(errors, format, ap); 402 _mesa_hash_table_insert(errors, ctx->cur_instr, msg); 403 } else { 404 mesa_loge_v(format, ap); 405 } 406 va_end(ap); 407 nir_log_shader_annotated(ctx->s, errors); 408 ralloc_free(errors); 409 ctx->error = true; 410 unreachable(""); 411} 412 413static struct ir3_instruction * 414create_addr0(struct ir3_block *block, struct ir3_instruction *src, int align) 415{ 416 struct ir3_instruction *instr, *immed; 417 418 instr = ir3_COV(block, src, TYPE_U32, TYPE_S16); 419 420 switch (align) { 421 case 1: 422 /* src *= 1: */ 423 break; 424 case 2: 425 /* src *= 2 => src <<= 1: */ 426 immed = create_immed_typed(block, 1, TYPE_S16); 427 instr = ir3_SHL_B(block, instr, 0, immed, 0); 428 break; 429 case 3: 430 /* src *= 3: */ 431 immed = create_immed_typed(block, 3, TYPE_S16); 432 instr = ir3_MULL_U(block, instr, 0, immed, 0); 433 break; 434 case 4: 435 /* src *= 4 => src <<= 2: */ 436 immed = create_immed_typed(block, 2, TYPE_S16); 437 instr = ir3_SHL_B(block, instr, 0, immed, 0); 438 break; 439 default: 440 unreachable("bad align"); 441 return NULL; 442 } 443 444 instr->dsts[0]->flags |= IR3_REG_HALF; 445 446 instr = ir3_MOV(block, instr, TYPE_S16); 447 instr->dsts[0]->num = regid(REG_A0, 0); 448 449 return instr; 450} 451 452static struct ir3_instruction * 453create_addr1(struct ir3_block *block, unsigned const_val) 454{ 455 struct ir3_instruction *immed = 456 create_immed_typed(block, const_val, TYPE_U16); 457 struct ir3_instruction *instr = ir3_MOV(block, immed, TYPE_U16); 458 instr->dsts[0]->num = regid(REG_A0, 1); 459 return instr; 460} 461 462/* caches addr values to avoid generating multiple cov/shl/mova 463 * sequences for each use of a given NIR level src as address 464 */ 465struct ir3_instruction * 466ir3_get_addr0(struct ir3_context *ctx, struct ir3_instruction *src, int align) 467{ 468 struct ir3_instruction *addr; 469 unsigned idx = align - 1; 470 471 compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr0_ht)); 472 473 if (!ctx->addr0_ht[idx]) { 474 ctx->addr0_ht[idx] = _mesa_hash_table_create(ctx, _mesa_hash_pointer, 475 _mesa_key_pointer_equal); 476 } else { 477 struct hash_entry *entry; 478 entry = _mesa_hash_table_search(ctx->addr0_ht[idx], src); 479 if (entry) 480 return entry->data; 481 } 482 483 addr = create_addr0(ctx->block, src, align); 484 _mesa_hash_table_insert(ctx->addr0_ht[idx], src, addr); 485 486 return addr; 487} 488 489/* Similar to ir3_get_addr0, but for a1.x. */ 490struct ir3_instruction * 491ir3_get_addr1(struct ir3_context *ctx, unsigned const_val) 492{ 493 struct ir3_instruction *addr; 494 495 if (!ctx->addr1_ht) { 496 ctx->addr1_ht = _mesa_hash_table_u64_create(ctx); 497 } else { 498 addr = _mesa_hash_table_u64_search(ctx->addr1_ht, const_val); 499 if (addr) 500 return addr; 501 } 502 503 addr = create_addr1(ctx->block, const_val); 504 _mesa_hash_table_u64_insert(ctx->addr1_ht, const_val, addr); 505 506 return addr; 507} 508 509struct ir3_instruction * 510ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) 511{ 512 struct ir3_block *b = ctx->block; 513 struct ir3_instruction *cond; 514 515 /* NOTE: only cmps.*.* can write p0.x: */ 516 cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0); 517 cond->cat2.condition = IR3_COND_NE; 518 519 /* condition always goes in predicate register: */ 520 cond->dsts[0]->num = regid(REG_P0, 0); 521 cond->dsts[0]->flags &= ~IR3_REG_SSA; 522 523 return cond; 524} 525 526/* 527 * Array helpers 528 */ 529 530void 531ir3_declare_array(struct ir3_context *ctx, nir_register *reg) 532{ 533 struct ir3_array *arr = rzalloc(ctx, struct ir3_array); 534 arr->id = ++ctx->num_arrays; 535 /* NOTE: sometimes we get non array regs, for example for arrays of 536 * length 1. See fs-const-array-of-struct-of-array.shader_test. So 537 * treat a non-array as if it was an array of length 1. 538 * 539 * It would be nice if there was a nir pass to convert arrays of 540 * length 1 to ssa. 541 */ 542 arr->length = reg->num_components * MAX2(1, reg->num_array_elems); 543 compile_assert(ctx, arr->length > 0); 544 arr->r = reg; 545 arr->half = reg->bit_size <= 16; 546 // HACK one-bit bools still end up as 32b: 547 if (reg->bit_size == 1) 548 arr->half = false; 549 list_addtail(&arr->node, &ctx->ir->array_list); 550} 551 552struct ir3_array * 553ir3_get_array(struct ir3_context *ctx, nir_register *reg) 554{ 555 foreach_array (arr, &ctx->ir->array_list) { 556 if (arr->r == reg) 557 return arr; 558 } 559 ir3_context_error(ctx, "bogus reg: r%d\n", reg->index); 560 return NULL; 561} 562 563/* relative (indirect) if address!=NULL */ 564struct ir3_instruction * 565ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n, 566 struct ir3_instruction *address) 567{ 568 struct ir3_block *block = ctx->block; 569 struct ir3_instruction *mov; 570 struct ir3_register *src; 571 unsigned flags = 0; 572 573 mov = ir3_instr_create(block, OPC_MOV, 1, 1); 574 if (arr->half) { 575 mov->cat1.src_type = TYPE_U16; 576 mov->cat1.dst_type = TYPE_U16; 577 flags |= IR3_REG_HALF; 578 } else { 579 mov->cat1.src_type = TYPE_U32; 580 mov->cat1.dst_type = TYPE_U32; 581 } 582 583 mov->barrier_class = IR3_BARRIER_ARRAY_R; 584 mov->barrier_conflict = IR3_BARRIER_ARRAY_W; 585 __ssa_dst(mov)->flags |= flags; 586 src = ir3_src_create(mov, 0, 587 IR3_REG_ARRAY | COND(address, IR3_REG_RELATIV) | flags); 588 src->def = (arr->last_write && arr->last_write->instr->block == block) 589 ? arr->last_write 590 : NULL; 591 src->size = arr->length; 592 src->array.id = arr->id; 593 src->array.offset = n; 594 src->array.base = INVALID_REG; 595 596 if (address) 597 ir3_instr_set_address(mov, address); 598 599 return mov; 600} 601 602/* relative (indirect) if address!=NULL */ 603void 604ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, 605 struct ir3_instruction *src, 606 struct ir3_instruction *address) 607{ 608 struct ir3_block *block = ctx->block; 609 struct ir3_instruction *mov; 610 struct ir3_register *dst; 611 unsigned flags = 0; 612 613 /* if not relative store, don't create an extra mov, since that 614 * ends up being difficult for cp to remove. 615 * 616 * Also, don't skip the mov if the src is meta (like fanout/split), 617 * since that creates a situation that RA can't really handle properly. 618 */ 619 if (!address && !is_meta(src)) { 620 dst = src->dsts[0]; 621 622 src->barrier_class |= IR3_BARRIER_ARRAY_W; 623 src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; 624 625 dst->flags |= IR3_REG_ARRAY; 626 dst->size = arr->length; 627 dst->array.id = arr->id; 628 dst->array.offset = n; 629 dst->array.base = INVALID_REG; 630 631 if (arr->last_write && arr->last_write->instr->block == src->block) 632 ir3_reg_set_last_array(src, dst, arr->last_write); 633 634 arr->last_write = dst; 635 636 array_insert(block, block->keeps, src); 637 638 return; 639 } 640 641 mov = ir3_instr_create(block, OPC_MOV, 1, 1); 642 if (arr->half) { 643 mov->cat1.src_type = TYPE_U16; 644 mov->cat1.dst_type = TYPE_U16; 645 flags |= IR3_REG_HALF; 646 } else { 647 mov->cat1.src_type = TYPE_U32; 648 mov->cat1.dst_type = TYPE_U32; 649 } 650 mov->barrier_class = IR3_BARRIER_ARRAY_W; 651 mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; 652 dst = ir3_dst_create( 653 mov, 0, 654 IR3_REG_SSA | IR3_REG_ARRAY | flags | COND(address, IR3_REG_RELATIV)); 655 dst->instr = mov; 656 dst->size = arr->length; 657 dst->array.id = arr->id; 658 dst->array.offset = n; 659 dst->array.base = INVALID_REG; 660 ir3_src_create(mov, 0, IR3_REG_SSA | flags)->def = src->dsts[0]; 661 662 if (arr->last_write && arr->last_write->instr->block == block) 663 ir3_reg_set_last_array(mov, dst, arr->last_write); 664 665 if (address) 666 ir3_instr_set_address(mov, address); 667 668 arr->last_write = dst; 669 670 /* the array store may only matter to something in an earlier 671 * block (ie. loops), but since arrays are not in SSA, depth 672 * pass won't know this.. so keep all array stores: 673 */ 674 array_insert(block, block->keeps, mov); 675} 676