1/* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27#define FBC_DEBUG 0 28 29#if FBC_DEBUG 30#define FBC_DUMP(q) do { q } while (0) 31#else 32#define FBC_DUMP(q) 33#endif 34 35#include "sb_bc.h" 36#include "sb_shader.h" 37#include "sb_pass.h" 38 39namespace r600_sb { 40 41void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) { 42 43 alu_group_node *g = sh.create_alu_group(); 44 alu_node *a = sh.create_alu(); 45 46 a->bc.set_op(ALU_OP0_NOP); 47 a->bc.last = 1; 48 49 g->push_back(a); 50 b4->insert_before(g); 51} 52 53int bc_finalizer::run() { 54 55 run_on(sh.root); 56 57 regions_vec &rv = sh.get_regions(); 58 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E; 59 ++I) { 60 region_node *r = *I; 61 62 assert(r); 63 64 bool loop = r->is_loop(); 65 66 if (loop) 67 finalize_loop(r); 68 else 69 finalize_if(r); 70 71 r->expand(); 72 } 73 74 cf_peephole(); 75 76 // workaround for some problems on r6xx/7xx 77 // add ALU NOP to each vertex shader 78 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) { 79 cf_node *c = sh.create_clause(NST_ALU_CLAUSE); 80 81 alu_group_node *g = sh.create_alu_group(); 82 83 alu_node *a = sh.create_alu(); 84 a->bc.set_op(ALU_OP0_NOP); 85 a->bc.last = 1; 86 87 g->push_back(a); 88 c->push_back(g); 89 90 sh.root->push_back(c); 91 92 c = sh.create_cf(CF_OP_NOP); 93 sh.root->push_back(c); 94 95 last_cf = c; 96 } 97 98 if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) { 99 last_cf = sh.create_cf(CF_OP_NOP); 100 sh.root->push_back(last_cf); 101 } 102 103 if (ctx.is_cayman()) { 104 if (!last_cf) { 105 cf_node *c = sh.create_cf(CF_OP_CF_END); 106 sh.root->push_back(c); 107 } else 108 last_cf->insert_after(sh.create_cf(CF_OP_CF_END)); 109 } else 110 last_cf->bc.end_of_program = 1; 111 112 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) { 113 cf_node *le = last_export[t]; 114 if (le) 115 le->bc.set_op(CF_OP_EXPORT_DONE); 116 } 117 118 sh.ngpr = ngpr; 119 sh.nstack = nstack; 120 return 0; 121} 122 123void bc_finalizer::finalize_loop(region_node* r) { 124 125 update_nstack(r); 126 127 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10); 128 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END); 129 130 // Update last_cf, but don't overwrite it if it's outside the current loop nest since 131 // it may point to a cf that is later in program order. 132 // The single parent level check is sufficient since finalize_loop() is processed in 133 // reverse order from innermost to outermost loop nest level. 134 if (!last_cf || last_cf->get_parent_region() == r) { 135 last_cf = loop_end; 136 } 137 138 loop_start->jump_after(loop_end); 139 loop_end->jump_after(loop_start); 140 141 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); 142 I != E; ++I) { 143 depart_node *dep = *I; 144 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK); 145 loop_break->jump(loop_end); 146 dep->push_back(loop_break); 147 dep->expand(); 148 } 149 150 // FIXME produces unnecessary LOOP_CONTINUE 151 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end(); 152 I != E; ++I) { 153 repeat_node *rep = *I; 154 if (!(rep->parent == r && rep->prev == NULL)) { 155 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE); 156 loop_cont->jump(loop_end); 157 rep->push_back(loop_cont); 158 } 159 rep->expand(); 160 } 161 162 r->push_front(loop_start); 163 r->push_back(loop_end); 164} 165 166void bc_finalizer::finalize_if(region_node* r) { 167 168 update_nstack(r); 169 170 // expecting the following control flow structure here: 171 // - region 172 // { 173 // - depart/repeat 1 (it may be depart/repeat for some outer region) 174 // { 175 // - if 176 // { 177 // - depart/repeat 2 (possibly for outer region) 178 // { 179 // - some optional code 180 // } 181 // } 182 // - optional <else> code> ... 183 // } 184 // } 185 186 container_node *repdep1 = static_cast<container_node*>(r->first); 187 assert(repdep1->is_depart() || repdep1->is_repeat()); 188 189 if_node *n_if = static_cast<if_node*>(repdep1->first); 190 191 if (n_if) { 192 193 194 assert(n_if->is_if()); 195 196 container_node *repdep2 = static_cast<container_node*>(n_if->first); 197 assert(repdep2->is_depart() || repdep2->is_repeat()); 198 199 cf_node *if_jump = sh.create_cf(CF_OP_JUMP); 200 cf_node *if_pop = sh.create_cf(CF_OP_POP); 201 202 if (!last_cf || last_cf->get_parent_region() == r) { 203 last_cf = if_pop; 204 } 205 if_pop->bc.pop_count = 1; 206 if_pop->jump_after(if_pop); 207 208 r->push_front(if_jump); 209 r->push_back(if_pop); 210 211 /* the depart/repeat 1 is actually part of the "else" code. 212 * if it's a depart for an outer loop region it will want to 213 * insert a LOOP_BREAK or LOOP_CONTINUE in here, so we need 214 * to emit the else clause. 215 */ 216 bool has_else = n_if->next; 217 218 if (repdep1->is_depart()) { 219 depart_node *dep1 = static_cast<depart_node*>(repdep1); 220 if (dep1->target != r && dep1->target->is_loop()) 221 has_else = true; 222 } 223 224 if (repdep1->is_repeat()) { 225 repeat_node *rep1 = static_cast<repeat_node*>(repdep1); 226 if (rep1->target != r && rep1->target->is_loop()) 227 has_else = true; 228 } 229 230 if (has_else) { 231 cf_node *nelse = sh.create_cf(CF_OP_ELSE); 232 n_if->insert_after(nelse); 233 if_jump->jump(nelse); 234 nelse->jump_after(if_pop); 235 nelse->bc.pop_count = 1; 236 237 } else { 238 if_jump->jump_after(if_pop); 239 if_jump->bc.pop_count = 1; 240 } 241 242 n_if->expand(); 243 } 244 245 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); 246 I != E; ++I) { 247 (*I)->expand(); 248 } 249 r->departs.clear(); 250 assert(r->repeats.empty()); 251} 252 253void bc_finalizer::run_on(container_node* c) { 254 node *prev_node = NULL; 255 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { 256 node *n = *I; 257 258 if (n->is_alu_group()) { 259 finalize_alu_group(static_cast<alu_group_node*>(n), prev_node); 260 } else { 261 if (n->is_alu_clause()) { 262 cf_node *c = static_cast<cf_node*>(n); 263 264 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) { 265 if (ctx.stack_workaround_8xx) { 266 region_node *r = c->get_parent_region(); 267 if (r) { 268 unsigned ifs, loops; 269 unsigned elems = get_stack_depth(r, loops, ifs); 270 unsigned dmod1 = elems % ctx.stack_entry_size; 271 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size; 272 273 if (elems && (!dmod1 || !dmod2)) 274 c->flags |= NF_ALU_STACK_WORKAROUND; 275 } 276 } else if (ctx.stack_workaround_9xx) { 277 region_node *r = c->get_parent_region(); 278 if (r) { 279 unsigned ifs, loops; 280 get_stack_depth(r, loops, ifs); 281 if (loops >= 2) 282 c->flags |= NF_ALU_STACK_WORKAROUND; 283 } 284 } 285 } 286 last_cf = c; 287 } else if (n->is_fetch_inst()) { 288 finalize_fetch(static_cast<fetch_node*>(n)); 289 } else if (n->is_cf_inst()) { 290 finalize_cf(static_cast<cf_node*>(n)); 291 } 292 if (n->is_container()) 293 run_on(static_cast<container_node*>(n)); 294 } 295 prev_node = n; 296 } 297} 298 299void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) { 300 301 alu_node *last = NULL; 302 alu_group_node *prev_g = NULL; 303 bool add_nop = false; 304 if (prev_node && prev_node->is_alu_group()) { 305 prev_g = static_cast<alu_group_node*>(prev_node); 306 } 307 308 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { 309 alu_node *n = static_cast<alu_node*>(*I); 310 unsigned slot = n->bc.slot; 311 value *d = n->dst.empty() ? NULL : n->dst[0]; 312 313 if (d && d->is_special_reg()) { 314 assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access() || d->is_scratch()); 315 d = NULL; 316 } 317 318 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0); 319 320 if (d) { 321 assert(fdst.chan() == slot || slot == SLOT_TRANS); 322 } 323 324 if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman())) 325 n->bc.dst_gpr = fdst.sel(); 326 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0; 327 328 329 if (d && d->is_rel() && d->rel && !d->rel->is_const()) { 330 n->bc.dst_rel = 1; 331 update_ngpr(d->array->gpr.sel() + d->array->array_size -1); 332 } else { 333 n->bc.dst_rel = 0; 334 } 335 336 n->bc.write_mask = d != NULL; 337 n->bc.last = 0; 338 339 if (n->bc.op_ptr->flags & AF_PRED) { 340 n->bc.update_pred = (n->dst[1] != NULL); 341 n->bc.update_exec_mask = (n->dst[2] != NULL); 342 } 343 344 // FIXME handle predication here 345 n->bc.pred_sel = PRED_SEL_OFF; 346 347 update_ngpr(n->bc.dst_gpr); 348 349 add_nop |= finalize_alu_src(g, n, prev_g); 350 351 last = n; 352 } 353 354 if (add_nop) { 355 if (sh.get_ctx().r6xx_gpr_index_workaround) { 356 insert_rv6xx_load_ar_workaround(g); 357 } 358 } 359 last->bc.last = 1; 360} 361 362bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) { 363 vvec &sv = a->src; 364 bool add_nop = false; 365 FBC_DUMP( 366 sblog << "finalize_alu_src: "; 367 dump::dump_op(a); 368 sblog << "\n"; 369 ); 370 371 unsigned si = 0; 372 373 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) { 374 value *v = *I; 375 assert(v); 376 377 bc_alu_src &src = a->bc.src[si]; 378 sel_chan sc; 379 src.rel = 0; 380 381 sel_chan gpr; 382 383 switch (v->kind) { 384 case VLK_REL_REG: 385 sc = v->get_final_gpr(); 386 src.sel = sc.sel(); 387 src.chan = sc.chan(); 388 if (!v->rel->is_const()) { 389 src.rel = 1; 390 update_ngpr(v->array->gpr.sel() + v->array->array_size -1); 391 if (prev && !add_nop) { 392 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) { 393 alu_node *pn = static_cast<alu_node*>(*pI); 394 if (pn->bc.dst_gpr == src.sel) { 395 add_nop = true; 396 break; 397 } 398 } 399 } 400 } else 401 src.rel = 0; 402 403 break; 404 case VLK_REG: 405 gpr = v->get_final_gpr(); 406 src.sel = gpr.sel(); 407 src.chan = gpr.chan(); 408 update_ngpr(src.sel); 409 break; 410 case VLK_TEMP: 411 src.sel = v->gpr.sel(); 412 src.chan = v->gpr.chan(); 413 update_ngpr(src.sel); 414 break; 415 case VLK_UNDEF: 416 case VLK_CONST: { 417 literal lv = v->literal_value; 418 src.chan = 0; 419 420 if (lv == literal(0)) 421 src.sel = ALU_SRC_0; 422 else if (lv == literal(0.5f)) 423 src.sel = ALU_SRC_0_5; 424 else if (lv == literal(1.0f)) 425 src.sel = ALU_SRC_1; 426 else if (lv == literal(1)) 427 src.sel = ALU_SRC_1_INT; 428 else if (lv == literal(-1)) 429 src.sel = ALU_SRC_M_1_INT; 430 else { 431 src.sel = ALU_SRC_LITERAL; 432 src.chan = g->literal_chan(lv); 433 src.value = lv; 434 } 435 break; 436 } 437 case VLK_KCACHE: { 438 cf_node *clause = static_cast<cf_node*>(g->parent); 439 assert(clause->is_alu_clause()); 440 sel_chan k = translate_kcache(clause, v); 441 442 assert(k && "kcache translation failed"); 443 444 src.sel = k.sel(); 445 src.chan = k.chan(); 446 break; 447 } 448 case VLK_SPECIAL_REG: 449 if (v->select.sel() == SV_LDS_OQA) { 450 src.sel = ALU_SRC_LDS_OQ_A_POP; 451 src.chan = 0; 452 } else if (v->select.sel() == SV_LDS_OQB) { 453 src.sel = ALU_SRC_LDS_OQ_B_POP; 454 src.chan = 0; 455 } else { 456 src.sel = ALU_SRC_0; 457 src.chan = 0; 458 } 459 break; 460 case VLK_PARAM: 461 case VLK_SPECIAL_CONST: 462 src.sel = v->select.sel(); 463 src.chan = v->select.chan(); 464 break; 465 default: 466 assert(!"unknown value kind"); 467 break; 468 } 469 if (prev && !add_nop) { 470 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) { 471 alu_node *pn = static_cast<alu_node*>(*pI); 472 if (pn->bc.dst_rel) { 473 if (pn->bc.dst_gpr == src.sel) { 474 add_nop = true; 475 break; 476 } 477 } 478 } 479 } 480 } 481 482 while (si < 3) { 483 a->bc.src[si++].sel = 0; 484 } 485 return add_nop; 486} 487 488void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start) 489{ 490 int reg = -1; 491 492 for (unsigned chan = 0; chan < 4; ++chan) { 493 494 dst.bc.dst_sel[chan] = SEL_MASK; 495 496 unsigned sel = SEL_MASK; 497 498 value *v = src.src[arg_start + chan]; 499 500 if (!v || v->is_undef()) { 501 sel = SEL_MASK; 502 } else if (v->is_const()) { 503 literal l = v->literal_value; 504 if (l == literal(0)) 505 sel = SEL_0; 506 else if (l == literal(1.0f)) 507 sel = SEL_1; 508 else { 509 sblog << "invalid fetch constant operand " << chan << " "; 510 dump::dump_op(&src); 511 sblog << "\n"; 512 abort(); 513 } 514 515 } else if (v->is_any_gpr()) { 516 unsigned vreg = v->gpr.sel(); 517 unsigned vchan = v->gpr.chan(); 518 519 if (reg == -1) 520 reg = vreg; 521 else if ((unsigned)reg != vreg) { 522 sblog << "invalid fetch source operand " << chan << " "; 523 dump::dump_op(&src); 524 sblog << "\n"; 525 abort(); 526 } 527 528 sel = vchan; 529 530 } else { 531 sblog << "invalid fetch source operand " << chan << " "; 532 dump::dump_op(&src); 533 sblog << "\n"; 534 abort(); 535 } 536 537 dst.bc.src_sel[chan] = sel; 538 } 539 540 if (reg >= 0) 541 update_ngpr(reg); 542 543 dst.bc.src_gpr = reg >= 0 ? reg : 0; 544} 545 546void bc_finalizer::emit_set_grad(fetch_node* f) { 547 548 assert(f->src.size() == 12 || f->src.size() == 13); 549 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H }; 550 551 unsigned arg_start = 0; 552 553 for (unsigned op = 0; op < 2; ++op) { 554 fetch_node *n = sh.create_fetch(); 555 n->bc.set_op(ops[op]); 556 557 arg_start += 4; 558 559 copy_fetch_src(*n, *f, arg_start); 560 561 f->insert_before(n); 562 } 563 564} 565 566void bc_finalizer::emit_set_texture_offsets(fetch_node &f) { 567 assert(f.src.size() == 8); 568 569 fetch_node *n = sh.create_fetch(); 570 571 n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS); 572 573 copy_fetch_src(*n, f, 4); 574 575 f.insert_before(n); 576} 577 578void bc_finalizer::finalize_fetch(fetch_node* f) { 579 580 int reg = -1; 581 582 // src 583 584 unsigned src_count = 4; 585 586 unsigned flags = f->bc.op_ptr->flags; 587 588 if (flags & FF_VTX) { 589 src_count = 1; 590 } else if (flags & FF_GDS) { 591 src_count = 2; 592 } else if (flags & FF_USEGRAD) { 593 emit_set_grad(f); 594 } else if (flags & FF_USE_TEXTURE_OFFSETS) { 595 emit_set_texture_offsets(*f); 596 } 597 598 for (unsigned chan = 0; chan < src_count; ++chan) { 599 600 unsigned sel = f->bc.src_sel[chan]; 601 602 if (sel > SEL_W) 603 continue; 604 605 value *v = f->src[chan]; 606 607 if (v->is_undef()) { 608 sel = SEL_MASK; 609 } else if (v->is_const()) { 610 literal l = v->literal_value; 611 if (l == literal(0)) 612 sel = SEL_0; 613 else if (l == literal(1.0f)) 614 sel = SEL_1; 615 else { 616 sblog << "invalid fetch constant operand " << chan << " "; 617 dump::dump_op(f); 618 sblog << "\n"; 619 abort(); 620 } 621 622 } else if (v->is_any_gpr()) { 623 unsigned vreg = v->gpr.sel(); 624 unsigned vchan = v->gpr.chan(); 625 626 if (reg == -1) 627 reg = vreg; 628 else if ((unsigned)reg != vreg) { 629 sblog << "invalid fetch source operand " << chan << " "; 630 dump::dump_op(f); 631 sblog << "\n"; 632 abort(); 633 } 634 635 sel = vchan; 636 637 } else { 638 sblog << "invalid fetch source operand " << chan << " "; 639 dump::dump_op(f); 640 sblog << "\n"; 641 abort(); 642 } 643 644 f->bc.src_sel[chan] = sel; 645 } 646 647 if (reg >= 0) 648 update_ngpr(reg); 649 650 f->bc.src_gpr = reg >= 0 ? reg : 0; 651 652 // dst 653 654 reg = -1; 655 656 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK}; 657 658 for (unsigned chan = 0; chan < 4; ++chan) { 659 660 unsigned sel = f->bc.dst_sel[chan]; 661 662 if (sel == SEL_MASK) 663 continue; 664 665 value *v = f->dst[chan]; 666 if (!v) 667 continue; 668 669 if (v->is_any_gpr()) { 670 unsigned vreg = v->gpr.sel(); 671 unsigned vchan = v->gpr.chan(); 672 673 if (reg == -1) 674 reg = vreg; 675 else if ((unsigned)reg != vreg) { 676 sblog << "invalid fetch dst operand " << chan << " "; 677 dump::dump_op(f); 678 sblog << "\n"; 679 abort(); 680 } 681 682 dst_swz[vchan] = sel; 683 684 } else { 685 sblog << "invalid fetch dst operand " << chan << " "; 686 dump::dump_op(f); 687 sblog << "\n"; 688 abort(); 689 } 690 691 } 692 693 for (unsigned i = 0; i < 4; ++i) 694 f->bc.dst_sel[i] = dst_swz[i]; 695 696 if ((flags & FF_GDS) && reg == -1) { 697 f->bc.dst_sel[0] = SEL_MASK; 698 f->bc.dst_gpr = 0; 699 return ; 700 } 701 assert(reg >= 0); 702 703 if (reg >= 0) 704 update_ngpr(reg); 705 706 f->bc.dst_gpr = reg >= 0 ? reg : 0; 707} 708 709void bc_finalizer::finalize_cf(cf_node* c) { 710 711 unsigned flags = c->bc.op_ptr->flags; 712 713 c->bc.end_of_program = 0; 714 last_cf = c; 715 716 if (flags & CF_EXP) { 717 c->bc.set_op(CF_OP_EXPORT); 718 last_export[c->bc.type] = c; 719 720 int reg = -1; 721 722 for (unsigned chan = 0; chan < 4; ++chan) { 723 724 unsigned sel = c->bc.sel[chan]; 725 726 if (sel > SEL_W) 727 continue; 728 729 value *v = c->src[chan]; 730 731 if (v->is_undef()) { 732 sel = SEL_MASK; 733 } else if (v->is_const()) { 734 literal l = v->literal_value; 735 if (l == literal(0)) 736 sel = SEL_0; 737 else if (l == literal(1.0f)) 738 sel = SEL_1; 739 else { 740 sblog << "invalid export constant operand " << chan << " "; 741 dump::dump_op(c); 742 sblog << "\n"; 743 abort(); 744 } 745 746 } else if (v->is_any_gpr()) { 747 unsigned vreg = v->gpr.sel(); 748 unsigned vchan = v->gpr.chan(); 749 750 if (reg == -1) 751 reg = vreg; 752 else if ((unsigned)reg != vreg) { 753 sblog << "invalid export source operand " << chan << " "; 754 dump::dump_op(c); 755 sblog << "\n"; 756 abort(); 757 } 758 759 sel = vchan; 760 761 } else { 762 sblog << "invalid export source operand " << chan << " "; 763 dump::dump_op(c); 764 sblog << "\n"; 765 abort(); 766 } 767 768 c->bc.sel[chan] = sel; 769 } 770 771 if (reg >= 0) 772 update_ngpr(reg); 773 774 c->bc.rw_gpr = reg >= 0 ? reg : 0; 775 776 } else if (flags & CF_MEM) { 777 778 int reg = -1; 779 unsigned mask = 0; 780 781 782 for (unsigned chan = 0; chan < 4; ++chan) { 783 value *v; 784 if (ctx.hw_class == HW_CLASS_R600 && c->bc.op == CF_OP_MEM_SCRATCH && 785 (c->bc.type == 2 || c->bc.type == 3)) 786 v = c->dst[chan]; 787 else 788 v = c->src[chan]; 789 790 if (!v || v->is_undef()) 791 continue; 792 793 if (!v->is_any_gpr() || v->gpr.chan() != chan) { 794 sblog << "invalid source operand " << chan << " "; 795 dump::dump_op(c); 796 sblog << "\n"; 797 abort(); 798 } 799 unsigned vreg = v->gpr.sel(); 800 if (reg == -1) 801 reg = vreg; 802 else if ((unsigned)reg != vreg) { 803 sblog << "invalid source operand " << chan << " "; 804 dump::dump_op(c); 805 sblog << "\n"; 806 abort(); 807 } 808 809 mask |= (1 << chan); 810 } 811 812 if (reg >= 0) 813 update_ngpr(reg); 814 815 c->bc.rw_gpr = reg >= 0 ? reg : 0; 816 c->bc.comp_mask = mask; 817 818 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { 819 820 reg = -1; 821 822 for (unsigned chan = 0; chan < 4; ++chan) { 823 value *v = c->src[4 + chan]; 824 if (!v || v->is_undef()) 825 continue; 826 827 if (!v->is_any_gpr() || v->gpr.chan() != chan) { 828 sblog << "invalid source operand " << chan << " "; 829 dump::dump_op(c); 830 sblog << "\n"; 831 abort(); 832 } 833 unsigned vreg = v->gpr.sel(); 834 if (reg == -1) 835 reg = vreg; 836 else if ((unsigned)reg != vreg) { 837 sblog << "invalid source operand " << chan << " "; 838 dump::dump_op(c); 839 sblog << "\n"; 840 abort(); 841 } 842 } 843 844 assert(reg >= 0); 845 846 if (reg >= 0) 847 update_ngpr(reg); 848 849 c->bc.index_gpr = reg >= 0 ? reg : 0; 850 } 851 } else if (flags & CF_CALL) { 852 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1); 853 } 854} 855 856sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) { 857 unsigned sel = v->select.kcache_sel(); 858 unsigned bank = v->select.kcache_bank(); 859 unsigned chan = v->select.chan(); 860 static const unsigned kc_base[] = {128, 160, 256, 288}; 861 862 sel &= 4095; 863 864 unsigned line = sel >> 4; 865 866 for (unsigned k = 0; k < 4; ++k) { 867 bc_kcache &kc = alu->bc.kc[k]; 868 869 if (kc.mode == KC_LOCK_NONE) 870 break; 871 872 if (kc.bank == bank && (kc.addr == line || 873 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) { 874 875 sel = kc_base[k] + (sel - (kc.addr << 4)); 876 877 return sel_chan(sel, chan); 878 } 879 } 880 881 assert(!"kcache translation error"); 882 return 0; 883} 884 885void bc_finalizer::update_ngpr(unsigned gpr) { 886 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr) 887 ngpr = gpr + 1; 888} 889 890unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops, 891 unsigned &ifs, unsigned add) { 892 unsigned stack_elements = add; 893 bool has_non_wqm_push = (add != 0); 894 region_node *r = n->is_region() ? 895 static_cast<region_node*>(n) : n->get_parent_region(); 896 897 loops = 0; 898 ifs = 0; 899 900 while (r) { 901 if (r->is_loop()) { 902 ++loops; 903 } else { 904 ++ifs; 905 has_non_wqm_push = true; 906 } 907 r = r->get_parent_region(); 908 } 909 stack_elements += (loops * ctx.stack_entry_size) + ifs; 910 911 // reserve additional elements in some cases 912 switch (ctx.hw_class) { 913 case HW_CLASS_R600: 914 case HW_CLASS_R700: 915 // If any non-WQM push is invoked, 2 elements should be reserved. 916 if (has_non_wqm_push) 917 stack_elements += 2; 918 break; 919 case HW_CLASS_CAYMAN: 920 // If any stack operation is invoked, 2 elements should be reserved 921 if (stack_elements) 922 stack_elements += 2; 923 break; 924 case HW_CLASS_EVERGREEN: 925 // According to the docs we need to reserve 1 element for each of the 926 // following cases: 927 // 1) non-WQM push is used with WQM/LOOP frames on stack 928 // 2) ALU_ELSE_AFTER is used at the point of max stack usage 929 // NOTE: 930 // It was found that the conditions above are not sufficient, there are 931 // other cases where we also need to reserve stack space, that's why 932 // we always reserve 1 stack element if we have non-WQM push on stack. 933 // Condition 2 is ignored for now because we don't use this instruction. 934 if (has_non_wqm_push) 935 ++stack_elements; 936 break; 937 case HW_CLASS_UNKNOWN: 938 assert(0); 939 } 940 return stack_elements; 941} 942 943void bc_finalizer::update_nstack(region_node* r, unsigned add) { 944 unsigned loops = 0; 945 unsigned ifs = 0; 946 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add; 947 948 // XXX all chips expect this value to be computed using 4 as entry size, 949 // not the real entry size 950 unsigned stack_entries = (elems + 3) >> 2; 951 952 if (nstack < stack_entries) 953 nstack = stack_entries; 954} 955 956void bc_finalizer::cf_peephole() { 957 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) { 958 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E; 959 I = N) { 960 N = I; ++N; 961 cf_node *c = static_cast<cf_node*>(*I); 962 963 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && 964 (c->flags & NF_ALU_STACK_WORKAROUND)) { 965 cf_node *push = sh.create_cf(CF_OP_PUSH); 966 c->insert_before(push); 967 push->jump(c); 968 c->bc.set_op(CF_OP_ALU); 969 } 970 } 971 } 972 973 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E; 974 I = N) { 975 N = I; ++N; 976 977 cf_node *c = static_cast<cf_node*>(*I); 978 979 if (c->jump_after_target) { 980 if (c->jump_target->next == NULL) { 981 c->jump_target->insert_after(sh.create_cf(CF_OP_NOP)); 982 if (last_cf == c->jump_target) 983 last_cf = static_cast<cf_node*>(c->jump_target->next); 984 } 985 c->jump_target = static_cast<cf_node*>(c->jump_target->next); 986 c->jump_after_target = false; 987 } 988 989 if (c->is_cf_op(CF_OP_POP)) { 990 node *p = c->prev; 991 if (p->is_alu_clause()) { 992 cf_node *a = static_cast<cf_node*>(p); 993 994 if (a->bc.op == CF_OP_ALU) { 995 a->bc.set_op(CF_OP_ALU_POP_AFTER); 996 c->remove(); 997 } 998 } 999 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) { 1000 // if JUMP is immediately followed by its jump target, 1001 // then JUMP is useless and we can eliminate it 1002 c->remove(); 1003 } 1004 } 1005} 1006 1007} // namespace r600_sb 1008