1/* 2 * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "util/u_math.h" 28 29#include "ir3_compiler.h" 30#include "ir3_context.h" 31#include "ir3_image.h" 32#include "ir3_shader.h" 33#include "ir3_nir.h" 34 35struct ir3_context * 36ir3_context_init(struct ir3_compiler *compiler, 37 struct ir3_shader_variant *so) 38{ 39 struct ir3_context *ctx = rzalloc(NULL, struct ir3_context); 40 41 if (compiler->gpu_id >= 400) { 42 if (so->type == MESA_SHADER_VERTEX) { 43 ctx->astc_srgb = so->key.vastc_srgb; 44 } else if (so->type == MESA_SHADER_FRAGMENT) { 45 ctx->astc_srgb = so->key.fastc_srgb; 46 } 47 48 } else { 49 if (so->type == MESA_SHADER_VERTEX) { 50 ctx->samples = so->key.vsamples; 51 } else if (so->type == MESA_SHADER_FRAGMENT) { 52 ctx->samples = so->key.fsamples; 53 } 54 } 55 56 if (compiler->gpu_id >= 600) { 57 ctx->funcs = &ir3_a6xx_funcs; 58 } else if (compiler->gpu_id >= 400) { 59 ctx->funcs = &ir3_a4xx_funcs; 60 } 61 62 ctx->compiler = compiler; 63 ctx->so = so; 64 ctx->def_ht = _mesa_hash_table_create(ctx, 65 _mesa_hash_pointer, _mesa_key_pointer_equal); 66 ctx->block_ht = _mesa_hash_table_create(ctx, 67 _mesa_hash_pointer, _mesa_key_pointer_equal); 68 69 /* TODO: maybe generate some sort of bitmask of what key 70 * lowers vs what shader has (ie. no need to lower 71 * texture clamp lowering if no texture sample instrs).. 72 * although should be done further up the stack to avoid 73 * creating duplicate variants.. 74 */ 75 76 if (ir3_key_lowers_nir(&so->key)) { 77 nir_shader *s = nir_shader_clone(ctx, so->shader->nir); 78 ctx->s = ir3_optimize_nir(so->shader, s, &so->key); 79 } else { 80 /* fast-path for shader key that lowers nothing in NIR: */ 81 ctx->s = nir_shader_clone(ctx, so->shader->nir); 82 } 83 84 /* this needs to be the last pass run, so do this here instead of 85 * in ir3_optimize_nir(): 86 */ 87 NIR_PASS_V(ctx->s, nir_lower_bool_to_int32); 88 NIR_PASS_V(ctx->s, nir_lower_locals_to_regs); 89 NIR_PASS_V(ctx->s, nir_convert_from_ssa, true); 90 91 if (ir3_shader_debug & IR3_DBG_DISASM) { 92 DBG("dump nir%dv%d: type=%d, k={cts=%u,hp=%u}", 93 so->shader->id, so->id, so->type, 94 so->key.color_two_side, so->key.half_precision); 95 nir_print_shader(ctx->s, stdout); 96 } 97 98 if (shader_debug_enabled(so->type)) { 99 fprintf(stderr, "NIR (final form) for %s shader:\n", 100 _mesa_shader_stage_to_string(so->type)); 101 nir_print_shader(ctx->s, stderr); 102 } 103 104 ir3_nir_scan_driver_consts(ctx->s, &so->const_layout); 105 106 so->num_uniforms = ctx->s->num_uniforms; 107 so->num_ubos = ctx->s->info.num_ubos; 108 109 ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); 110 111 /* Layout of constant registers, each section aligned to vec4. Note 112 * that pointer size (ubo, etc) changes depending on generation. 113 * 114 * user consts 115 * UBO addresses 116 * SSBO sizes 117 * if (vertex shader) { 118 * driver params (IR3_DP_*) 119 * if (stream_output.num_outputs > 0) 120 * stream-out addresses 121 * } 122 * immediates 123 * 124 * Immediates go last mostly because they are inserted in the CP pass 125 * after the nir -> ir3 frontend. 126 * 127 * Note UBO size in bytes should be aligned to vec4 128 */ 129 debug_assert((ctx->so->shader->ubo_state.size % 16) == 0); 130 unsigned constoff = align(ctx->so->shader->ubo_state.size / 16, 4); 131 unsigned ptrsz = ir3_pointer_size(ctx); 132 133 memset(&so->constbase, ~0, sizeof(so->constbase)); 134 135 if (so->num_ubos > 0) { 136 so->constbase.ubo = constoff; 137 constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4; 138 } 139 140 if (so->const_layout.ssbo_size.count > 0) { 141 unsigned cnt = so->const_layout.ssbo_size.count; 142 so->constbase.ssbo_sizes = constoff; 143 constoff += align(cnt, 4) / 4; 144 } 145 146 if (so->const_layout.image_dims.count > 0) { 147 unsigned cnt = so->const_layout.image_dims.count; 148 so->constbase.image_dims = constoff; 149 constoff += align(cnt, 4) / 4; 150 } 151 152 unsigned num_driver_params = 0; 153 if (so->type == MESA_SHADER_VERTEX) { 154 num_driver_params = IR3_DP_VS_COUNT; 155 } else if (so->type == MESA_SHADER_COMPUTE) { 156 num_driver_params = IR3_DP_CS_COUNT; 157 } 158 159 so->constbase.driver_param = constoff; 160 constoff += align(num_driver_params, 4) / 4; 161 162 if ((so->type == MESA_SHADER_VERTEX) && 163 (compiler->gpu_id < 500) && 164 so->shader->stream_output.num_outputs > 0) { 165 so->constbase.tfbo = constoff; 166 constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4; 167 } 168 169 so->constbase.immediate = constoff; 170 171 return ctx; 172} 173 174void 175ir3_context_free(struct ir3_context *ctx) 176{ 177 ralloc_free(ctx); 178} 179 180/* 181 * Misc helpers 182 */ 183 184/* allocate a n element value array (to be populated by caller) and 185 * insert in def_ht 186 */ 187struct ir3_instruction ** 188ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n) 189{ 190 struct ir3_instruction **value = 191 ralloc_array(ctx->def_ht, struct ir3_instruction *, n); 192 _mesa_hash_table_insert(ctx->def_ht, dst, value); 193 return value; 194} 195 196struct ir3_instruction ** 197ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n) 198{ 199 struct ir3_instruction **value; 200 201 if (dst->is_ssa) { 202 value = ir3_get_dst_ssa(ctx, &dst->ssa, n); 203 } else { 204 value = ralloc_array(ctx, struct ir3_instruction *, n); 205 } 206 207 /* NOTE: in non-ssa case, we don't really need to store last_dst 208 * but this helps us catch cases where put_dst() call is forgotten 209 */ 210 compile_assert(ctx, !ctx->last_dst); 211 ctx->last_dst = value; 212 ctx->last_dst_n = n; 213 214 return value; 215} 216 217struct ir3_instruction * const * 218ir3_get_src(struct ir3_context *ctx, nir_src *src) 219{ 220 if (src->is_ssa) { 221 struct hash_entry *entry; 222 entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); 223 compile_assert(ctx, entry); 224 return entry->data; 225 } else { 226 nir_register *reg = src->reg.reg; 227 struct ir3_array *arr = ir3_get_array(ctx, reg); 228 unsigned num_components = arr->r->num_components; 229 struct ir3_instruction *addr = NULL; 230 struct ir3_instruction **value = 231 ralloc_array(ctx, struct ir3_instruction *, num_components); 232 233 if (src->reg.indirect) 234 addr = ir3_get_addr(ctx, ir3_get_src(ctx, src->reg.indirect)[0], 235 reg->num_components); 236 237 for (unsigned i = 0; i < num_components; i++) { 238 unsigned n = src->reg.base_offset * reg->num_components + i; 239 compile_assert(ctx, n < arr->length); 240 value[i] = ir3_create_array_load(ctx, arr, n, addr); 241 } 242 243 return value; 244 } 245} 246 247void 248ir3_put_dst(struct ir3_context *ctx, nir_dest *dst) 249{ 250 unsigned bit_size = nir_dest_bit_size(*dst); 251 252 /* add extra mov if dst value is HIGH reg.. in some cases not all 253 * instructions can read from HIGH regs, in cases where they can 254 * ir3_cp will clean up the extra mov: 255 */ 256 for (unsigned i = 0; i < ctx->last_dst_n; i++) { 257 if (!ctx->last_dst[i]) 258 continue; 259 if (ctx->last_dst[i]->regs[0]->flags & IR3_REG_HIGH) { 260 ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32); 261 } 262 } 263 264 if (bit_size < 32) { 265 for (unsigned i = 0; i < ctx->last_dst_n; i++) { 266 struct ir3_instruction *dst = ctx->last_dst[i]; 267 dst->regs[0]->flags |= IR3_REG_HALF; 268 if (ctx->last_dst[i]->opc == OPC_META_FO) 269 dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF; 270 } 271 } 272 273 if (!dst->is_ssa) { 274 nir_register *reg = dst->reg.reg; 275 struct ir3_array *arr = ir3_get_array(ctx, reg); 276 unsigned num_components = ctx->last_dst_n; 277 struct ir3_instruction *addr = NULL; 278 279 if (dst->reg.indirect) 280 addr = ir3_get_addr(ctx, ir3_get_src(ctx, dst->reg.indirect)[0], 281 reg->num_components); 282 283 for (unsigned i = 0; i < num_components; i++) { 284 unsigned n = dst->reg.base_offset * reg->num_components + i; 285 compile_assert(ctx, n < arr->length); 286 if (!ctx->last_dst[i]) 287 continue; 288 ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr); 289 } 290 291 ralloc_free(ctx->last_dst); 292 } 293 294 ctx->last_dst = NULL; 295 ctx->last_dst_n = 0; 296} 297 298struct ir3_instruction * 299ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr, 300 unsigned arrsz) 301{ 302 struct ir3_block *block = ctx->block; 303 struct ir3_instruction *collect; 304 305 if (arrsz == 0) 306 return NULL; 307 308 unsigned flags = arr[0]->regs[0]->flags & IR3_REG_HALF; 309 310 collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz); 311 ir3_reg_create(collect, 0, flags); /* dst */ 312 for (unsigned i = 0; i < arrsz; i++) { 313 struct ir3_instruction *elem = arr[i]; 314 315 /* Since arrays are pre-colored in RA, we can't assume that 316 * things will end up in the right place. (Ie. if a collect 317 * joins elements from two different arrays.) So insert an 318 * extra mov. 319 * 320 * We could possibly skip this if all the collected elements 321 * are contiguous elements in a single array.. not sure how 322 * likely that is to happen. 323 * 324 * Fixes a problem with glamor shaders, that in effect do 325 * something like: 326 * 327 * if (foo) 328 * texcoord = .. 329 * else 330 * texcoord = .. 331 * color = texture2D(tex, texcoord); 332 * 333 * In this case, texcoord will end up as nir registers (which 334 * translate to ir3 array's of length 1. And we can't assume 335 * the two (or more) arrays will get allocated in consecutive 336 * scalar registers. 337 * 338 */ 339 if (elem->regs[0]->flags & IR3_REG_ARRAY) { 340 type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 341 elem = ir3_MOV(block, elem, type); 342 } 343 344 compile_assert(ctx, (elem->regs[0]->flags & IR3_REG_HALF) == flags); 345 ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = elem; 346 } 347 348 collect->regs[0]->wrmask = MASK(arrsz); 349 350 return collect; 351} 352 353/* helper for instructions that produce multiple consecutive scalar 354 * outputs which need to have a split/fanout meta instruction inserted 355 */ 356void 357ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst, 358 struct ir3_instruction *src, unsigned base, unsigned n) 359{ 360 struct ir3_instruction *prev = NULL; 361 362 if ((n == 1) && (src->regs[0]->wrmask == 0x1)) { 363 dst[0] = src; 364 return; 365 } 366 367 unsigned flags = src->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH); 368 369 for (int i = 0, j = 0; i < n; i++) { 370 struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO); 371 ir3_reg_create(split, 0, IR3_REG_SSA | flags); 372 ir3_reg_create(split, 0, IR3_REG_SSA | flags)->instr = src; 373 split->fo.off = i + base; 374 375 if (prev) { 376 split->cp.left = prev; 377 split->cp.left_cnt++; 378 prev->cp.right = split; 379 prev->cp.right_cnt++; 380 } 381 prev = split; 382 383 if (src->regs[0]->wrmask & (1 << (i + base))) 384 dst[j++] = split; 385 } 386} 387 388NORETURN void 389ir3_context_error(struct ir3_context *ctx, const char *format, ...) 390{ 391 struct hash_table *errors = NULL; 392 va_list ap; 393 va_start(ap, format); 394 if (ctx->cur_instr) { 395 errors = _mesa_hash_table_create(NULL, 396 _mesa_hash_pointer, 397 _mesa_key_pointer_equal); 398 char *msg = ralloc_vasprintf(errors, format, ap); 399 _mesa_hash_table_insert(errors, ctx->cur_instr, msg); 400 } else { 401 _debug_vprintf(format, ap); 402 } 403 va_end(ap); 404 nir_print_shader_annotated(ctx->s, stdout, errors); 405 ralloc_free(errors); 406 ctx->error = true; 407 unreachable(""); 408} 409 410static struct ir3_instruction * 411create_addr(struct ir3_block *block, struct ir3_instruction *src, int align) 412{ 413 struct ir3_instruction *instr, *immed; 414 415 /* TODO in at least some cases, the backend could probably be 416 * made clever enough to propagate IR3_REG_HALF.. 417 */ 418 instr = ir3_COV(block, src, TYPE_U32, TYPE_S16); 419 instr->regs[0]->flags |= IR3_REG_HALF; 420 421 switch(align){ 422 case 1: 423 /* src *= 1: */ 424 break; 425 case 2: 426 /* src *= 2 => src <<= 1: */ 427 immed = create_immed(block, 1); 428 immed->regs[0]->flags |= IR3_REG_HALF; 429 430 instr = ir3_SHL_B(block, instr, 0, immed, 0); 431 instr->regs[0]->flags |= IR3_REG_HALF; 432 instr->regs[1]->flags |= IR3_REG_HALF; 433 break; 434 case 3: 435 /* src *= 3: */ 436 immed = create_immed(block, 3); 437 immed->regs[0]->flags |= IR3_REG_HALF; 438 439 instr = ir3_MULL_U(block, instr, 0, immed, 0); 440 instr->regs[0]->flags |= IR3_REG_HALF; 441 instr->regs[1]->flags |= IR3_REG_HALF; 442 break; 443 case 4: 444 /* src *= 4 => src <<= 2: */ 445 immed = create_immed(block, 2); 446 immed->regs[0]->flags |= IR3_REG_HALF; 447 448 instr = ir3_SHL_B(block, instr, 0, immed, 0); 449 instr->regs[0]->flags |= IR3_REG_HALF; 450 instr->regs[1]->flags |= IR3_REG_HALF; 451 break; 452 default: 453 unreachable("bad align"); 454 return NULL; 455 } 456 457 instr = ir3_MOV(block, instr, TYPE_S16); 458 instr->regs[0]->num = regid(REG_A0, 0); 459 instr->regs[0]->flags |= IR3_REG_HALF; 460 instr->regs[1]->flags |= IR3_REG_HALF; 461 462 return instr; 463} 464 465/* caches addr values to avoid generating multiple cov/shl/mova 466 * sequences for each use of a given NIR level src as address 467 */ 468struct ir3_instruction * 469ir3_get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align) 470{ 471 struct ir3_instruction *addr; 472 unsigned idx = align - 1; 473 474 compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht)); 475 476 if (!ctx->addr_ht[idx]) { 477 ctx->addr_ht[idx] = _mesa_hash_table_create(ctx, 478 _mesa_hash_pointer, _mesa_key_pointer_equal); 479 } else { 480 struct hash_entry *entry; 481 entry = _mesa_hash_table_search(ctx->addr_ht[idx], src); 482 if (entry) 483 return entry->data; 484 } 485 486 addr = create_addr(ctx->block, src, align); 487 _mesa_hash_table_insert(ctx->addr_ht[idx], src, addr); 488 489 return addr; 490} 491 492struct ir3_instruction * 493ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) 494{ 495 struct ir3_block *b = ctx->block; 496 struct ir3_instruction *cond; 497 498 /* NOTE: only cmps.*.* can write p0.x: */ 499 cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0); 500 cond->cat2.condition = IR3_COND_NE; 501 502 /* condition always goes in predicate register: */ 503 cond->regs[0]->num = regid(REG_P0, 0); 504 505 return cond; 506} 507 508/* 509 * Array helpers 510 */ 511 512void 513ir3_declare_array(struct ir3_context *ctx, nir_register *reg) 514{ 515 struct ir3_array *arr = rzalloc(ctx, struct ir3_array); 516 arr->id = ++ctx->num_arrays; 517 /* NOTE: sometimes we get non array regs, for example for arrays of 518 * length 1. See fs-const-array-of-struct-of-array.shader_test. So 519 * treat a non-array as if it was an array of length 1. 520 * 521 * It would be nice if there was a nir pass to convert arrays of 522 * length 1 to ssa. 523 */ 524 arr->length = reg->num_components * MAX2(1, reg->num_array_elems); 525 compile_assert(ctx, arr->length > 0); 526 arr->r = reg; 527 list_addtail(&arr->node, &ctx->ir->array_list); 528} 529 530struct ir3_array * 531ir3_get_array(struct ir3_context *ctx, nir_register *reg) 532{ 533 list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { 534 if (arr->r == reg) 535 return arr; 536 } 537 ir3_context_error(ctx, "bogus reg: %s\n", reg->name); 538 return NULL; 539} 540 541/* relative (indirect) if address!=NULL */ 542struct ir3_instruction * 543ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n, 544 struct ir3_instruction *address) 545{ 546 struct ir3_block *block = ctx->block; 547 struct ir3_instruction *mov; 548 struct ir3_register *src; 549 550 mov = ir3_instr_create(block, OPC_MOV); 551 mov->cat1.src_type = TYPE_U32; 552 mov->cat1.dst_type = TYPE_U32; 553 mov->barrier_class = IR3_BARRIER_ARRAY_R; 554 mov->barrier_conflict = IR3_BARRIER_ARRAY_W; 555 ir3_reg_create(mov, 0, 0); 556 src = ir3_reg_create(mov, 0, IR3_REG_ARRAY | 557 COND(address, IR3_REG_RELATIV)); 558 src->instr = arr->last_write; 559 src->size = arr->length; 560 src->array.id = arr->id; 561 src->array.offset = n; 562 563 if (address) 564 ir3_instr_set_address(mov, address); 565 566 return mov; 567} 568 569/* relative (indirect) if address!=NULL */ 570void 571ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, 572 struct ir3_instruction *src, struct ir3_instruction *address) 573{ 574 struct ir3_block *block = ctx->block; 575 struct ir3_instruction *mov; 576 struct ir3_register *dst; 577 578 /* if not relative store, don't create an extra mov, since that 579 * ends up being difficult for cp to remove. 580 */ 581 if (!address) { 582 dst = src->regs[0]; 583 584 src->barrier_class |= IR3_BARRIER_ARRAY_W; 585 src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; 586 587 dst->flags |= IR3_REG_ARRAY; 588 dst->instr = arr->last_write; 589 dst->size = arr->length; 590 dst->array.id = arr->id; 591 dst->array.offset = n; 592 593 arr->last_write = src; 594 595 array_insert(block, block->keeps, src); 596 597 return; 598 } 599 600 mov = ir3_instr_create(block, OPC_MOV); 601 mov->cat1.src_type = TYPE_U32; 602 mov->cat1.dst_type = TYPE_U32; 603 mov->barrier_class = IR3_BARRIER_ARRAY_W; 604 mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; 605 dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY | 606 COND(address, IR3_REG_RELATIV)); 607 dst->instr = arr->last_write; 608 dst->size = arr->length; 609 dst->array.id = arr->id; 610 dst->array.offset = n; 611 ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src; 612 613 if (address) 614 ir3_instr_set_address(mov, address); 615 616 arr->last_write = mov; 617 618 /* the array store may only matter to something in an earlier 619 * block (ie. loops), but since arrays are not in SSA, depth 620 * pass won't know this.. so keep all array stores: 621 */ 622 array_insert(block, block->keeps, mov); 623} 624