1/* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27#define BCP_DEBUG 0 28 29#if BCP_DEBUG 30#define BCP_DUMP(q) do { q } while (0) 31#else 32#define BCP_DUMP(q) 33#endif 34 35#include "r600_pipe.h" 36#include "r600_shader.h" 37#include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1 38 39#include <stack> 40 41#include "sb_bc.h" 42#include "sb_shader.h" 43#include "sb_pass.h" 44#include "util/macros.h" 45 46namespace r600_sb { 47 48int bc_parser::decode() { 49 50 dw = bc->bytecode; 51 bc_ndw = bc->ndw; 52 max_cf = 0; 53 54 dec = new bc_decoder(ctx, dw, bc_ndw); 55 56 shader_target t = TARGET_UNKNOWN; 57 58 if (pshader) { 59 switch (bc->type) { 60 case PIPE_SHADER_FRAGMENT: t = TARGET_PS; break; 61 case PIPE_SHADER_VERTEX: 62 t = pshader->vs_as_ls ? TARGET_LS : (pshader->vs_as_es ? TARGET_ES : TARGET_VS); 63 break; 64 case PIPE_SHADER_GEOMETRY: t = TARGET_GS; break; 65 case PIPE_SHADER_COMPUTE: t = TARGET_COMPUTE; break; 66 case PIPE_SHADER_TESS_CTRL: t = TARGET_HS; break; 67 case PIPE_SHADER_TESS_EVAL: t = pshader->tes_as_es ? TARGET_ES : TARGET_VS; break; 68 default: assert(!"unknown shader target"); return -1; break; 69 } 70 } else { 71 if (bc->type == PIPE_SHADER_COMPUTE) 72 t = TARGET_COMPUTE; 73 else 74 t = TARGET_FETCH; 75 } 76 77 sh = new shader(ctx, t, bc->debug_id); 78 sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE || bc->precise); 79 80 int r = decode_shader(); 81 82 delete dec; 83 84 sh->ngpr = bc->ngpr; 85 sh->nstack = bc->nstack; 86 87 return r; 88} 89 90int bc_parser::decode_shader() { 91 int r = 0; 92 unsigned i = 0; 93 bool eop = false; 94 95 sh->init(); 96 97 do { 98 eop = false; 99 if ((r = decode_cf(i, eop))) 100 return r; 101 102 } while (!eop || (i >> 1) < max_cf); 103 104 return 0; 105} 106 107int bc_parser::prepare() { 108 int r = 0; 109 if ((r = parse_decls())) 110 return r; 111 if ((r = prepare_ir())) 112 return r; 113 return 0; 114} 115 116int bc_parser::parse_decls() { 117 118 if (!pshader) { 119 if (gpr_reladdr) 120 sh->add_gpr_array(0, bc->ngpr, 0x0F); 121 122 // compute shaders have some values preloaded in R0, R1 123 sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */); 124 sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */); 125 return 0; 126 } 127 128 if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) { 129 130 assert(pshader->num_arrays); 131 132 if (pshader->num_arrays) { 133 for (unsigned i = 0; i < pshader->num_arrays; ++i) { 134 r600_shader_array &a = pshader->arrays[i]; 135 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); 136 } 137 } else { 138 sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F); 139 } 140 } 141 142 // GS inputs can add indirect addressing 143 if (sh->target == TARGET_GS) { 144 if (pshader->num_arrays) { 145 for (unsigned i = 0; i < pshader->num_arrays; ++i) { 146 r600_shader_array &a = pshader->arrays[i]; 147 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); 148 } 149 } 150 } 151 152 if (sh->target == TARGET_VS || sh->target == TARGET_ES || sh->target == TARGET_HS || sh->target == TARGET_LS) 153 sh->add_input(0, 1, 0x0F); 154 else if (sh->target == TARGET_GS) { 155 sh->add_input(0, 1, 0x0F); 156 sh->add_input(1, 1, 0x0F); 157 } else if (sh->target == TARGET_COMPUTE) { 158 sh->add_input(0, 1, 0x0F); 159 sh->add_input(1, 1, 0x0F); 160 } 161 162 bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN 163 && sh->target == TARGET_PS; 164 165 bool ij_interpolators[6]; 166 memset(ij_interpolators, 0, sizeof(ij_interpolators)); 167 168 for (unsigned i = 0; i < pshader->ninput; ++i) { 169 r600_shader_io & in = pshader->input[i]; 170 bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid); 171 sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F); 172 if (ps_interp && in.spi_sid) { 173 int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location); 174 if (k >= 0) { 175 ij_interpolators[k] |= true; 176 if (in.uses_interpolate_at_centroid) { 177 k = eg_get_interpolator_index(in.interpolate, TGSI_INTERPOLATE_LOC_CENTROID); 178 ij_interpolators[k] |= true; 179 } 180 } 181 } 182 } 183 184 if (ps_interp) { 185 /* add the egcm ij interpolators to live inputs */ 186 unsigned num_ij = 0; 187 for (unsigned i = 0; i < ARRAY_SIZE(ij_interpolators); i++) { 188 num_ij += ij_interpolators[i]; 189 } 190 191 unsigned mask = (1 << (2 * num_ij)) - 1; 192 unsigned gpr = 0; 193 194 while (mask) { 195 sh->add_input(gpr, true, mask & 0x0F); 196 ++gpr; 197 mask >>= 4; 198 } 199 } 200 201 return 0; 202} 203 204int bc_parser::decode_cf(unsigned &i, bool &eop) { 205 206 int r; 207 208 cf_node *cf = sh->create_cf(); 209 sh->root->push_back(cf); 210 211 unsigned id = i >> 1; 212 213 cf->bc.id = id; 214 215 if (cf_map.size() < id + 1) 216 cf_map.resize(id + 1); 217 218 cf_map[id] = cf; 219 220 if ((r = dec->decode_cf(i, cf->bc))) 221 return r; 222 223 cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; 224 225 if (flags & CF_ALU) { 226 if ((r = decode_alu_clause(cf))) 227 return r; 228 } else if (flags & CF_FETCH) { 229 if ((r = decode_fetch_clause(cf))) 230 return r; 231 } else if (flags & CF_EXP) { 232 if (cf->bc.rw_rel) 233 gpr_reladdr = true; 234 assert(!cf->bc.rw_rel); 235 } else if (flags & CF_MEM) { 236 if (cf->bc.rw_rel) 237 gpr_reladdr = true; 238 assert(!cf->bc.rw_rel); 239 } else if (flags & CF_BRANCH) { 240 if (cf->bc.addr > max_cf) 241 max_cf = cf->bc.addr; 242 } 243 244 eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END || 245 cf->bc.op == CF_OP_RET; 246 return 0; 247} 248 249int bc_parser::decode_alu_clause(cf_node* cf) { 250 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt; 251 252 cf->subtype = NST_ALU_CLAUSE; 253 254 cgroup = 0; 255 memset(slots[0], 0, 5*sizeof(slots[0][0])); 256 257 unsigned ng = 0; 258 259 do { 260 decode_alu_group(cf, i, gcnt); 261 assert(gcnt <= cnt); 262 cnt -= gcnt; 263 ng++; 264 } while (cnt); 265 266 return 0; 267} 268 269int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { 270 int r; 271 alu_node *n; 272 alu_group_node *g = sh->create_alu_group(); 273 274 cgroup = !cgroup; 275 memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); 276 gcnt = 0; 277 278 unsigned literal_mask = 0; 279 280 do { 281 n = sh->create_alu(); 282 g->push_back(n); 283 284 if ((r = dec->decode_alu(i, n->bc))) 285 return r; 286 287 if (!sh->assign_slot(n, slots[cgroup])) { 288 assert(!"alu slot assignment failed"); 289 return -1; 290 } 291 292 gcnt++; 293 294 } while (gcnt <= 5 && !n->bc.last); 295 296 assert(n->bc.last); 297 298 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { 299 n = static_cast<alu_node*>(*I); 300 301 if (n->bc.dst_rel) 302 gpr_reladdr = true; 303 304 for (int k = 0; k < n->bc.op_ptr->src_count; ++k) { 305 bc_alu_src &src = n->bc.src[k]; 306 if (src.rel) 307 gpr_reladdr = true; 308 if (src.sel == ALU_SRC_LITERAL) { 309 literal_mask |= (1 << src.chan); 310 src.value.u = dw[i + src.chan]; 311 } 312 } 313 } 314 315 unsigned literal_ndw = 0; 316 while (literal_mask) { 317 g->literals.push_back(dw[i + literal_ndw]); 318 literal_ndw += 1; 319 literal_mask >>= 1; 320 } 321 322 literal_ndw = (literal_ndw + 1) & ~1u; 323 324 i += literal_ndw; 325 gcnt += literal_ndw >> 1; 326 327 cf->push_back(g); 328 return 0; 329} 330 331int bc_parser::prepare_alu_clause(cf_node* cf) { 332 333 // loop over alu groups 334 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { 335 assert(I->subtype == NST_ALU_GROUP); 336 alu_group_node *g = static_cast<alu_group_node*>(*I); 337 prepare_alu_group(cf, g); 338 } 339 340 return 0; 341} 342 343void bc_parser::save_set_cf_index(value *val, unsigned idx) 344{ 345 assert(idx <= 1); 346 assert(val); 347 cf_index_value[idx] = val; 348} 349value *bc_parser::get_cf_index_value(unsigned idx) 350{ 351 assert(idx <= 1); 352 assert(cf_index_value[idx]); 353 return cf_index_value[idx]; 354} 355void bc_parser::save_mova(alu_node *mova) 356{ 357 assert(mova); 358 this->mova = mova; 359} 360alu_node *bc_parser::get_mova() 361{ 362 assert(mova); 363 return mova; 364} 365 366int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { 367 368 alu_node *n; 369 370 cgroup = !cgroup; 371 memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); 372 373 for (node_iterator I = g->begin(), E = g->end(); 374 I != E; ++I) { 375 n = static_cast<alu_node*>(*I); 376 bool ubo_indexing[2] = {}; 377 378 if (!sh->assign_slot(n, slots[cgroup])) { 379 assert(!"alu slot assignment failed"); 380 return -1; 381 } 382 383 unsigned src_count = n->bc.op_ptr->src_count; 384 385 if (ctx.alu_slots(n->bc.op) & AF_4SLOT) 386 n->flags |= NF_ALU_4SLOT; 387 388 if (ctx.alu_slots(n->bc.op) & AF_2SLOT) 389 n->flags |= NF_ALU_2SLOT; 390 391 n->src.resize(src_count); 392 393 unsigned flags = n->bc.op_ptr->flags; 394 395 if (flags & AF_LDS) { 396 bool need_rw = false, need_oqa = false, need_oqb = false; 397 int ndst = 0, ncount = 0; 398 399 /* all non-read operations have side effects */ 400 if (n->bc.op != LDS_OP2_LDS_READ2_RET && 401 n->bc.op != LDS_OP1_LDS_READ_REL_RET && 402 n->bc.op != LDS_OP1_LDS_READ_RET) { 403 n->flags |= NF_DONT_KILL; 404 ndst++; 405 need_rw = true; 406 } 407 408 if (n->bc.op >= LDS_OP2_LDS_ADD_RET && n->bc.op <= LDS_OP1_LDS_USHORT_READ_RET) { 409 need_oqa = true; 410 ndst++; 411 } 412 413 if (n->bc.op == LDS_OP2_LDS_READ2_RET || n->bc.op == LDS_OP1_LDS_READ_REL_RET) { 414 need_oqb = true; 415 ndst++; 416 } 417 418 n->dst.resize(ndst); 419 if (need_oqa) 420 n->dst[ncount++] = sh->get_special_value(SV_LDS_OQA); 421 if (need_oqb) 422 n->dst[ncount++] = sh->get_special_value(SV_LDS_OQB); 423 if (need_rw) 424 n->dst[ncount++] = sh->get_special_value(SV_LDS_RW); 425 426 n->flags |= NF_DONT_MOVE | NF_DONT_HOIST; 427 428 } else if (flags & AF_PRED) { 429 n->dst.resize(3); 430 if (n->bc.update_pred) 431 n->dst[1] = sh->get_special_value(SV_ALU_PRED); 432 if (n->bc.update_exec_mask) 433 n->dst[2] = sh->get_special_value(SV_EXEC_MASK); 434 435 n->flags |= NF_DONT_HOIST; 436 437 } else if (flags & AF_KILL) { 438 439 n->dst.resize(2); 440 n->dst[1] = sh->get_special_value(SV_VALID_MASK); 441 sh->set_uses_kill(); 442 443 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | 444 NF_DONT_KILL | NF_SCHEDULE_EARLY; 445 446 } else { 447 n->dst.resize(1); 448 } 449 450 if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) { 451 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX 452 // DCE will kill this op 453 save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1); 454 } else if (flags & AF_MOVA) { 455 456 n->dst[0] = sh->get_special_value(SV_AR_INDEX); 457 save_mova(n); 458 459 n->flags |= NF_DONT_HOIST; 460 461 } else if ((n->bc.op_ptr->src_count == 3 || n->bc.write_mask) && !(flags & AF_LDS)) { 462 assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X); 463 464 value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan, 465 n->bc.dst_rel); 466 467 n->dst[0] = v; 468 } 469 470 if (n->bc.pred_sel) { 471 sh->has_alu_predication = true; 472 n->pred = sh->get_special_value(SV_ALU_PRED); 473 } 474 475 for (unsigned s = 0; s < src_count; ++s) { 476 bc_alu_src &src = n->bc.src[s]; 477 478 if (src.sel == ALU_SRC_LITERAL) { 479 n->src[s] = sh->get_const_value(src.value); 480 } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) { 481 unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ? 482 ((unsigned)SLOT_TRANS) : src.chan; 483 484 // XXX shouldn't happen but llvm backend uses PS on cayman 485 if (prev_slot == SLOT_TRANS && ctx.is_cayman()) 486 prev_slot = SLOT_X; 487 488 alu_node *prev_alu = slots[pgroup][prev_slot]; 489 490 assert(prev_alu); 491 492 if (!prev_alu->dst[0]) { 493 value * t = sh->create_temp_value(); 494 prev_alu->dst[0] = t; 495 } 496 497 value *d = prev_alu->dst[0]; 498 499 if (d->is_rel()) { 500 d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr, 501 prev_alu->bc.dst_chan, 502 prev_alu->bc.dst_rel); 503 } 504 505 n->src[s] = d; 506 } else if (ctx.is_kcache_sel(src.sel)) { 507 unsigned sel = src.sel, kc_addr; 508 unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1); 509 510 bc_kcache &kc = cf->bc.kc[kc_set]; 511 kc_addr = (kc.addr << 4) + (sel & 0x1F); 512 n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode); 513 514 if (kc.index_mode != KC_INDEX_NONE) { 515 assert(kc.index_mode != KC_LOCK_LOOP); 516 ubo_indexing[kc.index_mode - KC_INDEX_0] = true; 517 } 518 } else if (src.sel < MAX_GPR) { 519 value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel); 520 521 n->src[s] = v; 522 523 } else if (src.sel >= ALU_SRC_PARAM_OFFSET) { 524 // using slot for value channel because in fact the slot 525 // determines the channel that is loaded by INTERP_LOAD_P0 526 // (and maybe some others). 527 // otherwise GVN will consider INTERP_LOAD_P0s with the same 528 // param index as equal instructions and leave only one of them 529 n->src[s] = sh->get_special_ro_value(sel_chan(src.sel, 530 n->bc.slot)); 531 } else if (ctx.is_lds_oq(src.sel)) { 532 switch (src.sel) { 533 case ALU_SRC_LDS_OQ_A: 534 case ALU_SRC_LDS_OQ_B: 535 assert(!"Unsupported LDS queue access in SB"); 536 break; 537 case ALU_SRC_LDS_OQ_A_POP: 538 n->src[s] = sh->get_special_value(SV_LDS_OQA); 539 break; 540 case ALU_SRC_LDS_OQ_B_POP: 541 n->src[s] = sh->get_special_value(SV_LDS_OQB); 542 break; 543 } 544 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE; 545 546 } else { 547 switch (src.sel) { 548 case ALU_SRC_0: 549 n->src[s] = sh->get_const_value(0); 550 break; 551 case ALU_SRC_0_5: 552 n->src[s] = sh->get_const_value(0.5f); 553 break; 554 case ALU_SRC_1: 555 n->src[s] = sh->get_const_value(1.0f); 556 break; 557 case ALU_SRC_1_INT: 558 n->src[s] = sh->get_const_value(1); 559 break; 560 case ALU_SRC_M_1_INT: 561 n->src[s] = sh->get_const_value(-1); 562 break; 563 default: 564 n->src[s] = sh->get_special_ro_value(src.sel); 565 break; 566 } 567 } 568 } 569 570 // add UBO index values if any as dependencies 571 if (ubo_indexing[0]) { 572 n->src.push_back(get_cf_index_value(0)); 573 } 574 if (ubo_indexing[1]) { 575 n->src.push_back(get_cf_index_value(1)); 576 } 577 578 if ((flags & AF_MOVA) && (n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) && 579 ctx.is_cayman()) 580 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX 581 save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1); 582 } 583 584 // pack multislot instructions into alu_packed_node 585 586 alu_packed_node *p = NULL; 587 for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) { 588 N = I + 1; 589 alu_node *a = static_cast<alu_node*>(*I); 590 unsigned sflags = a->bc.slot_flags; 591 592 if (sflags == AF_4V || sflags == AF_2V || (ctx.is_cayman() && sflags == AF_S)) { 593 if (!p) 594 p = sh->create_alu_packed(); 595 596 a->remove(); 597 p->push_back(a); 598 if (sflags == AF_2V && p->count() == 2) { 599 g->push_front(p); 600 p = NULL; 601 } 602 } 603 } 604 605 if (p) { 606 g->push_front(p); 607 608 if (p->count() == 3 && ctx.is_cayman()) { 609 // cayman's scalar instruction that can use 3 or 4 slots 610 611 // FIXME for simplicity we'll always add 4th slot, 612 // but probably we might want to always remove 4th slot and make 613 // sure that regalloc won't choose 'w' component for dst 614 615 alu_node *f = static_cast<alu_node*>(p->first); 616 alu_node *a = sh->create_alu(); 617 a->src = f->src; 618 a->dst.resize(f->dst.size()); 619 a->bc = f->bc; 620 a->bc.slot = SLOT_W; 621 p->push_back(a); 622 } 623 } 624 625 return 0; 626} 627 628int bc_parser::decode_fetch_clause(cf_node* cf) { 629 int r; 630 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1; 631 632 if (cf->bc.op_ptr->flags & FF_GDS) 633 cf->subtype = NST_GDS_CLAUSE; 634 else 635 cf->subtype = NST_TEX_CLAUSE; 636 637 while (cnt--) { 638 fetch_node *n = sh->create_fetch(); 639 cf->push_back(n); 640 if ((r = dec->decode_fetch(i, n->bc))) 641 return r; 642 if (n->bc.src_rel || n->bc.dst_rel) 643 gpr_reladdr = true; 644 645 } 646 return 0; 647} 648 649int bc_parser::prepare_fetch_clause(cf_node *cf) { 650 651 vvec grad_v, grad_h, texture_offsets; 652 653 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { 654 655 fetch_node *n = static_cast<fetch_node*>(*I); 656 assert(n->is_valid()); 657 658 unsigned flags = n->bc.op_ptr->flags; 659 660 unsigned vtx = flags & FF_VTX; 661 unsigned gds = flags & FF_GDS; 662 unsigned num_src = gds ? 2 : vtx ? ctx.vtx_src_num : 4; 663 664 n->dst.resize(4); 665 666 if (gds) { 667 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL; 668 } 669 if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) { 670 sh->uses_gradients = true; 671 } 672 673 if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) { 674 675 vvec *grad = NULL; 676 677 switch (n->bc.op) { 678 case FETCH_OP_SET_GRADIENTS_V: 679 grad = &grad_v; 680 break; 681 case FETCH_OP_SET_GRADIENTS_H: 682 grad = &grad_h; 683 break; 684 case FETCH_OP_SET_TEXTURE_OFFSETS: 685 grad = &texture_offsets; 686 break; 687 default: 688 assert(!"unexpected SET_GRAD instruction"); 689 return -1; 690 } 691 692 if (grad->empty()) 693 grad->resize(4); 694 695 for(unsigned s = 0; s < 4; ++s) { 696 unsigned sw = n->bc.src_sel[s]; 697 if (sw <= SEL_W) 698 (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr, 699 sw, false); 700 else if (sw == SEL_0) 701 (*grad)[s] = sh->get_const_value(0.0f); 702 else if (sw == SEL_1) 703 (*grad)[s] = sh->get_const_value(1.0f); 704 } 705 } else { 706 // Fold source values for instructions with hidden target values in to the instructions 707 // using them. The set instructions are later re-emitted by bc_finalizer 708 if (flags & FF_USEGRAD) { 709 n->src.resize(12); 710 std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4); 711 std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8); 712 } else if (flags & FF_USE_TEXTURE_OFFSETS) { 713 n->src.resize(8); 714 std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4); 715 } else { 716 n->src.resize(4); 717 } 718 719 for(int s = 0; s < 4; ++s) { 720 if (n->bc.dst_sel[s] != SEL_MASK) 721 n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false); 722 // NOTE: it doesn't matter here which components of the result we 723 // are using, but original n->bc.dst_sel should be taken into 724 // account when building the bytecode 725 } 726 for(unsigned s = 0; s < num_src; ++s) { 727 if (n->bc.src_sel[s] <= SEL_W) 728 n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr, 729 n->bc.src_sel[s], false); 730 } 731 732 // Scheduler will emit the appropriate instructions to set CF_IDX0/1 733 if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) { 734 n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1)); 735 } 736 if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) { 737 n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1)); 738 } 739 } 740 741 if (n->bc.op == FETCH_OP_READ_SCRATCH) { 742 n->src.push_back(sh->get_special_value(SV_SCRATCH)); 743 n->dst.push_back(sh->get_special_value(SV_SCRATCH)); 744 } 745 } 746 747 return 0; 748} 749 750int bc_parser::prepare_ir() { 751 752 for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) { 753 cf_node *c = *I; 754 755 if (!c) 756 continue; 757 758 unsigned flags = c->bc.op_ptr->flags; 759 760 if (flags & CF_ALU) { 761 prepare_alu_clause(c); 762 } else if (flags & CF_FETCH) { 763 prepare_fetch_clause(c); 764 } else if (c->bc.op == CF_OP_CALL_FS) { 765 sh->init_call_fs(c); 766 c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE; 767 } else if (flags & CF_LOOP_START) { 768 prepare_loop(c); 769 } else if (c->bc.op == CF_OP_JUMP) { 770 prepare_if(c); 771 } else if (c->bc.op == CF_OP_LOOP_END) { 772 loop_stack.pop(); 773 } else if (c->bc.op == CF_OP_LOOP_CONTINUE) { 774 assert(!loop_stack.empty()); 775 repeat_node *rep = sh->create_repeat(loop_stack.top()); 776 if (c->parent->first != c) 777 rep->move(c->parent->first, c); 778 c->replace_with(rep); 779 sh->simplify_dep_rep(rep); 780 } else if (c->bc.op == CF_OP_LOOP_BREAK) { 781 assert(!loop_stack.empty()); 782 depart_node *dep = sh->create_depart(loop_stack.top()); 783 if (c->parent->first != c) 784 dep->move(c->parent->first, c); 785 c->replace_with(dep); 786 sh->simplify_dep_rep(dep); 787 } else if (flags & CF_EXP) { 788 789 // unroll burst exports 790 791 assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE); 792 793 c->bc.set_op(CF_OP_EXPORT); 794 795 unsigned burst_count = c->bc.burst_count; 796 unsigned eop = c->bc.end_of_program; 797 798 c->bc.end_of_program = 0; 799 c->bc.burst_count = 0; 800 801 do { 802 c->src.resize(4); 803 804 for(int s = 0; s < 4; ++s) { 805 switch (c->bc.sel[s]) { 806 case SEL_0: 807 c->src[s] = sh->get_const_value(0.0f); 808 break; 809 case SEL_1: 810 c->src[s] = sh->get_const_value(1.0f); 811 break; 812 case SEL_MASK: 813 break; 814 default: 815 if (c->bc.sel[s] <= SEL_W) 816 c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr, 817 c->bc.sel[s], false); 818 else 819 assert(!"invalid src_sel for export"); 820 } 821 } 822 823 if (!burst_count--) 824 break; 825 826 cf_node *cf_next = sh->create_cf(); 827 cf_next->bc = c->bc; 828 ++cf_next->bc.rw_gpr; 829 ++cf_next->bc.array_base; 830 831 c->insert_after(cf_next); 832 c = cf_next; 833 834 } while (1); 835 836 c->bc.end_of_program = eop; 837 } else if (flags & CF_MEM) { 838 839 unsigned burst_count = c->bc.burst_count; 840 unsigned eop = c->bc.end_of_program; 841 842 c->bc.end_of_program = 0; 843 c->bc.burst_count = 0; 844 845 do { 846 847 if (ctx.hw_class == HW_CLASS_R600 && c->bc.op == CF_OP_MEM_SCRATCH && 848 (c->bc.type == 2 || c->bc.type == 3)) { 849 c->dst.resize(4); 850 for(int s = 0; s < 4; ++s) { 851 if (c->bc.comp_mask & (1 << s)) 852 c->dst[s] = 853 sh->get_gpr_value(true, c->bc.rw_gpr, s, false); 854 } 855 } else { 856 c->src.resize(4); 857 858 859 for(int s = 0; s < 4; ++s) { 860 if (c->bc.comp_mask & (1 << s)) 861 c->src[s] = 862 sh->get_gpr_value(true, c->bc.rw_gpr, s, false); 863 } 864 } 865 866 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write 867 c->src.resize(8); 868 for(int s = 0; s < 3; ++s) { 869 c->src[4 + s] = 870 sh->get_gpr_value(true, c->bc.index_gpr, s, false); 871 } 872 873 // FIXME probably we can relax it a bit 874 c->flags |= NF_DONT_HOIST | NF_DONT_MOVE; 875 } 876 877 if (flags & CF_EMIT) { 878 // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX 879 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 880 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 881 if (sh->target == TARGET_ES) { 882 // For ES shaders this is an export 883 c->flags |= NF_DONT_KILL; 884 } 885 } 886 else if (c->bc.op == CF_OP_MEM_SCRATCH) { 887 c->src.push_back(sh->get_special_value(SV_SCRATCH)); 888 c->dst.push_back(sh->get_special_value(SV_SCRATCH)); 889 } 890 891 if (!burst_count--) 892 break; 893 894 cf_node *cf_next = sh->create_cf(); 895 cf_next->bc = c->bc; 896 ++cf_next->bc.rw_gpr; 897 898 // FIXME is it correct? 899 cf_next->bc.array_base += cf_next->bc.elem_size + 1; 900 901 c->insert_after(cf_next); 902 c = cf_next; 903 } while (1); 904 905 c->bc.end_of_program = eop; 906 907 } else if (flags & CF_EMIT) { 908 /* quick peephole */ 909 cf_node *prev = static_cast<cf_node *>(c->prev); 910 if (c->bc.op == CF_OP_CUT_VERTEX && 911 prev && prev->is_valid() && 912 prev->bc.op == CF_OP_EMIT_VERTEX && 913 c->bc.count == prev->bc.count) { 914 prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX); 915 prev->bc.end_of_program = c->bc.end_of_program; 916 c->remove(); 917 } 918 else { 919 c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE; 920 921 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 922 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 923 } 924 } else if (c->bc.op == CF_OP_WAIT_ACK) { 925 c->src.push_back(sh->get_special_value(SV_SCRATCH)); 926 c->dst.push_back(sh->get_special_value(SV_SCRATCH)); 927 } 928 } 929 930 assert(loop_stack.empty()); 931 return 0; 932} 933 934int bc_parser::prepare_loop(cf_node* c) { 935 assert(c->bc.addr-1 < cf_map.size()); 936 937 cf_node *end = cf_map[c->bc.addr - 1]; 938 assert(end->bc.op == CF_OP_LOOP_END); 939 assert(c->parent == end->parent); 940 941 region_node *reg = sh->create_region(); 942 repeat_node *rep = sh->create_repeat(reg); 943 944 reg->push_back(rep); 945 c->insert_before(reg); 946 rep->move(c, end->next); 947 948 reg->src_loop = true; 949 950 loop_stack.push(reg); 951 return 0; 952} 953 954int bc_parser::prepare_if(cf_node* c) { 955 assert(c->bc.addr-1 < cf_map.size()); 956 cf_node *c_else = NULL, *end = cf_map[c->bc.addr]; 957 958 if (!end) 959 return 0; // not quite sure how this happens, malformed input? 960 961 BCP_DUMP( 962 sblog << "parsing JUMP @" << c->bc.id; 963 sblog << "\n"; 964 ); 965 966 if (end->bc.op == CF_OP_ELSE) { 967 BCP_DUMP( 968 sblog << " found ELSE : "; 969 dump::dump_op(end); 970 sblog << "\n"; 971 ); 972 973 c_else = end; 974 end = cf_map[c_else->bc.addr]; 975 } else { 976 BCP_DUMP( 977 sblog << " no else\n"; 978 ); 979 980 c_else = end; 981 } 982 983 if (c_else->parent != c->parent) 984 c_else = NULL; 985 986 if (end && end->parent != c->parent) 987 end = NULL; 988 989 region_node *reg = sh->create_region(); 990 991 depart_node *dep2 = sh->create_depart(reg); 992 depart_node *dep = sh->create_depart(reg); 993 if_node *n_if = sh->create_if(); 994 995 c->insert_before(reg); 996 997 if (c_else != end) 998 dep->move(c_else, end); 999 dep2->move(c, end); 1000 1001 reg->push_back(dep); 1002 dep->push_front(n_if); 1003 n_if->push_back(dep2); 1004 1005 n_if->cond = sh->get_special_value(SV_EXEC_MASK); 1006 1007 return 0; 1008} 1009 1010 1011} // namespace r600_sb 1012