1/* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27#include "sb_bc.h" 28#include "sb_shader.h" 29#include "sb_pass.h" 30#include "eg_sq.h" // V_SQ_CF_INDEX_0/1 31 32namespace r600_sb { 33 34static const char* chans = "xyzw01?_"; 35 36static const char* vec_bs[] = { 37 "VEC_012", "VEC_021", "VEC_120", "VEC_102", "VEC_201", "VEC_210" 38}; 39 40static const char* scl_bs[] = { 41 "SCL_210", "SCL_122", "SCL_212", "SCL_221" 42}; 43 44 45bool bc_dump::visit(cf_node& n, bool enter) { 46 if (enter) { 47 48 id = n.bc.id << 1; 49 50 if ((n.bc.op_ptr->flags & CF_ALU) && n.bc.is_alu_extended()) { 51 dump_dw(id, 2); 52 id += 2; 53 sblog << "\n"; 54 } 55 56 dump_dw(id, 2); 57 dump(n); 58 59 if (n.bc.op_ptr->flags & CF_CLAUSE) { 60 id = n.bc.addr << 1; 61 new_group = 1; 62 } 63 } 64 return true; 65} 66 67bool bc_dump::visit(alu_node& n, bool enter) { 68 if (enter) { 69 sblog << " "; 70 dump_dw(id, 2); 71 72 if (new_group) { 73 sblog.print_w(++group_index, 5); 74 sblog << " "; 75 } else 76 sblog << " "; 77 78 dump(n); 79 id += 2; 80 81 new_group = n.bc.last; 82 } else { 83 if (n.bc.last) { 84 alu_group_node *g = 85 static_cast<alu_group_node*>(n.get_alu_group_node()); 86 assert(g); 87 for (unsigned k = 0; k < g->literals.size(); ++k) { 88 sblog << " "; 89 dump_dw(id, 1); 90 id += 1; 91 sblog << "\n"; 92 } 93 94 id = (id + 1) & ~1u; 95 } 96 } 97 98 return false; 99} 100 101bool bc_dump::visit(fetch_node& n, bool enter) { 102 if (enter) { 103 sblog << " "; 104 dump_dw(id, 3); 105 dump(n); 106 id += 4; 107 } 108 return false; 109} 110 111static void fill_to(sb_ostringstream &s, int pos) { 112 int l = s.str().length(); 113 if (l < pos) 114 s << std::string(pos-l, ' '); 115} 116 117void bc_dump::dump(cf_node& n) { 118 sb_ostringstream s; 119 s << n.bc.op_ptr->name; 120 121 if (n.bc.op_ptr->flags & CF_EXP) { 122 static const char *exp_type[] = {"PIXEL", "POS ", "PARAM"}; 123 124 fill_to(s, 18); 125 s << " " << exp_type[n.bc.type] << " "; 126 127 if (n.bc.burst_count) { 128 sb_ostringstream s2; 129 s2 << n.bc.array_base << "-" << n.bc.array_base + n.bc.burst_count; 130 s.print_wl(s2.str(), 5); 131 s << " R" << n.bc.rw_gpr << "-" << 132 n.bc.rw_gpr + n.bc.burst_count << "."; 133 } else { 134 s.print_wl(n.bc.array_base, 5); 135 s << " R" << n.bc.rw_gpr << "."; 136 } 137 138 for (int k = 0; k < 4; ++k) 139 s << chans[n.bc.sel[k]]; 140 141 } else if (n.bc.op_ptr->flags & CF_MEM) { 142 static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", 143 "WRITE_IND_ACK"}; 144 fill_to(s, 18); 145 s << " " << exp_type[n.bc.type] << " "; 146 s.print_wl(n.bc.array_base, 5); 147 s << " R" << n.bc.rw_gpr << "."; 148 for (int k = 0; k < 4; ++k) 149 s << ((n.bc.comp_mask & (1 << k)) ? chans[k] : '_'); 150 151 if ((n.bc.op_ptr->flags & CF_RAT) && (n.bc.type & 1)) { 152 s << ", @R" << n.bc.index_gpr << ".xyz"; 153 } 154 if ((n.bc.op_ptr->flags & CF_MEM) && (n.bc.type & 1)) { 155 s << ", @R" << n.bc.index_gpr << ".x"; 156 } 157 158 s << " ES:" << n.bc.elem_size; 159 160 if (n.bc.mark) 161 s << " MARK"; 162 163 } else { 164 165 if (n.bc.op_ptr->flags & CF_CLAUSE) { 166 s << " " << n.bc.count+1; 167 } 168 169 s << " @" << (n.bc.addr << 1); 170 171 if (n.bc.op_ptr->flags & CF_ALU) { 172 static const char *index_mode[] = {"", " CF_INDEX_0", " CF_INDEX_1"}; 173 174 for (int k = 0; k < 4; ++k) { 175 bc_kcache &kc = n.bc.kc[k]; 176 if (kc.mode) { 177 s << " KC" << k << "[CB" << kc.bank << ":" << 178 (kc.addr << 4) << "-" << 179 (((kc.addr + kc.mode) << 4) - 1) << index_mode[kc.index_mode] << "]"; 180 } 181 } 182 } 183 184 if (n.bc.cond) 185 s << " CND:" << n.bc.cond; 186 187 if (n.bc.pop_count) 188 s << " POP:" << n.bc.pop_count; 189 190 if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT)) 191 s << " STREAM" << n.bc.count; 192 } 193 194 if (!n.bc.barrier) 195 s << " NO_BARRIER"; 196 197 if (n.bc.valid_pixel_mode) 198 s << " VPM"; 199 200 if (n.bc.whole_quad_mode) 201 s << " WQM"; 202 203 if (n.bc.end_of_program) 204 s << " EOP"; 205 206 sblog << s.str() << "\n"; 207} 208 209 210static void print_sel(sb_ostream &s, int sel, int rel, int index_mode, 211 int need_brackets) { 212 if (rel && index_mode >= 5 && sel < 128) 213 s << "G"; 214 if (rel || need_brackets) { 215 s << "["; 216 } 217 s << sel; 218 if (rel) { 219 if (index_mode == 0 || index_mode == 6) 220 s << "+AR"; 221 else if (index_mode == 4) 222 s << "+AL"; 223 } 224 if (rel || need_brackets) { 225 s << "]"; 226 } 227} 228 229static void print_dst(sb_ostream &s, bc_alu &alu) 230{ 231 unsigned sel = alu.dst_gpr; 232 char reg_char = 'R'; 233 if (sel >= 128 - 4) { // clause temporary gpr 234 sel -= 128 - 4; 235 reg_char = 'T'; 236 } 237 238 if (alu.write_mask || (alu.op_ptr->src_count == 3 && alu.op < LDS_OP2_LDS_ADD)) { 239 s << reg_char; 240 print_sel(s, sel, alu.dst_rel, alu.index_mode, 0); 241 } else { 242 s << "__"; 243 } 244 s << "."; 245 s << chans[alu.dst_chan]; 246} 247 248static void print_src(sb_ostream &s, bc_alu &alu, unsigned idx) 249{ 250 bc_alu_src *src = &alu.src[idx]; 251 unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0; 252 253 if (src->neg) 254 s <<"-"; 255 if (src->abs) 256 s <<"|"; 257 258 if (sel < 128 - 4) { 259 s << "R"; 260 } else if (sel < 128) { 261 s << "T"; 262 sel -= 128 - 4; 263 } else if (sel < 160) { 264 s << "KC0"; 265 need_brackets = 1; 266 sel -= 128; 267 } else if (sel < 192) { 268 s << "KC1"; 269 need_brackets = 1; 270 sel -= 160; 271 } else if (sel >= 448) { 272 s << "Param"; 273 sel -= 448; 274 } else if (sel >= 288) { 275 s << "KC3"; 276 need_brackets = 1; 277 sel -= 288; 278 } else if (sel >= 256) { 279 s << "KC2"; 280 need_brackets = 1; 281 sel -= 256; 282 } else { 283 need_sel = 0; 284 need_chan = 0; 285 switch (sel) { 286 case ALU_SRC_LDS_OQ_A: 287 s << "LDS_OQ_A"; 288 need_chan = 1; 289 break; 290 case ALU_SRC_LDS_OQ_B: 291 s << "LDS_OQ_B"; 292 need_chan = 1; 293 break; 294 case ALU_SRC_LDS_OQ_A_POP: 295 s << "LDS_OQ_A_POP"; 296 need_chan = 1; 297 break; 298 case ALU_SRC_LDS_OQ_B_POP: 299 s << "LDS_OQ_B_POP"; 300 need_chan = 1; 301 break; 302 case ALU_SRC_LDS_DIRECT_A: 303 s << "LDS_A["; s.print_zw_hex(src->value.u, 8); s << "]"; 304 break; 305 case ALU_SRC_LDS_DIRECT_B: 306 s << "LDS_B["; s.print_zw_hex(src->value.u, 8); s << "]"; 307 break; 308 case ALU_SRC_PS: 309 s << "PS"; 310 break; 311 case ALU_SRC_PV: 312 s << "PV"; 313 need_chan = 1; 314 break; 315 case ALU_SRC_LITERAL: 316 s << "[0x"; 317 s.print_zw_hex(src->value.u, 8); 318 s << " " << src->value.f << "]"; 319 need_chan = 1; 320 break; 321 case ALU_SRC_0_5: 322 s << "0.5"; 323 break; 324 case ALU_SRC_M_1_INT: 325 s << "-1"; 326 break; 327 case ALU_SRC_1_INT: 328 s << "1"; 329 break; 330 case ALU_SRC_1: 331 s << "1.0"; 332 break; 333 case ALU_SRC_0: 334 s << "0"; 335 break; 336 case ALU_SRC_TIME_LO: 337 s << "TIME_LO"; 338 break; 339 case ALU_SRC_TIME_HI: 340 s << "TIME_HI"; 341 break; 342 case ALU_SRC_MASK_LO: 343 s << "MASK_LO"; 344 break; 345 case ALU_SRC_MASK_HI: 346 s << "MASK_HI"; 347 break; 348 case ALU_SRC_HW_WAVE_ID: 349 s << "HW_WAVE_ID"; 350 break; 351 case ALU_SRC_SIMD_ID: 352 s << "SIMD_ID"; 353 break; 354 case ALU_SRC_SE_ID: 355 s << "SE_ID"; 356 break; 357 default: 358 s << "??IMM_" << sel; 359 break; 360 } 361 } 362 363 if (need_sel) 364 print_sel(s, sel, src->rel, alu.index_mode, need_brackets); 365 366 if (need_chan) { 367 s << "." << chans[src->chan]; 368 } 369 370 if (src->abs) 371 s << "|"; 372} 373void bc_dump::dump(alu_node& n) { 374 sb_ostringstream s; 375 static const char *omod_str[] = {"","*2","*4","/2"}; 376 static const char *slots = "xyzwt"; 377 378 s << (n.bc.update_exec_mask ? "M" : " "); 379 s << (n.bc.update_pred ? "P" : " "); 380 s << " "; 381 s << (n.bc.pred_sel>=2 ? (n.bc.pred_sel == 2 ? "0" : "1") : " "); 382 s << " "; 383 384 s << slots[n.bc.slot] << ": "; 385 386 s << n.bc.op_ptr->name << omod_str[n.bc.omod] << (n.bc.clamp ? "_sat" : ""); 387 fill_to(s, 26); 388 s << " "; 389 390 print_dst(s, n.bc); 391 for (int k = 0; k < n.bc.op_ptr->src_count; ++k) { 392 s << (k ? ", " : ", "); 393 print_src(s, n.bc, k); 394 } 395 396 if (n.bc.bank_swizzle) { 397 fill_to(s, 55); 398 if (n.bc.slot == SLOT_TRANS) 399 s << " " << scl_bs[n.bc.bank_swizzle]; 400 else 401 s << " " << vec_bs[n.bc.bank_swizzle]; 402 } 403 404 if (ctx.is_cayman()) { 405 if (n.bc.op == ALU_OP1_MOVA_INT) { 406 static const char *mova_str[] = { " AR_X", " PC", " CF_IDX0", " CF_IDX1", 407 " Unknown MOVA_INT dest" }; 408 s << mova_str[std::min(n.bc.dst_gpr, 4u)]; // CM_V_SQ_MOVA_DST_AR_* 409 } 410 } 411 412 if (n.bc.lds_idx_offset) { 413 s << " IDX_OFFSET:" << n.bc.lds_idx_offset; 414 } 415 416 sblog << s.str() << "\n"; 417} 418 419int bc_dump::init() { 420 sb_ostringstream s; 421 s << "===== SHADER #" << sh.id; 422 423 if (sh.optimized) 424 s << " OPT"; 425 426 s << " "; 427 428 std::string target = std::string(" ") + 429 sh.get_full_target_name() + " ====="; 430 431 while (s.str().length() + target.length() < 80) 432 s << "="; 433 434 s << target; 435 436 sblog << "\n" << s.str() << "\n"; 437 438 s.clear(); 439 440 if (bc_data) { 441 s << "===== " << ndw << " dw ===== " << sh.ngpr 442 << " gprs ===== " << sh.nstack << " stack "; 443 } 444 445 while (s.str().length() < 80) 446 s << "="; 447 448 sblog << s.str() << "\n"; 449 450 return 0; 451} 452 453int bc_dump::done() { 454 sb_ostringstream s; 455 s << "===== SHADER_END "; 456 457 while (s.str().length() < 80) 458 s << "="; 459 460 sblog << s.str() << "\n\n"; 461 462 return 0; 463} 464 465bc_dump::bc_dump(shader& s, bytecode* bc) : 466 vpass(s), bc_data(), ndw(), id(), 467 new_group(), group_index() { 468 469 if (bc) { 470 bc_data = bc->data(); 471 ndw = bc->ndw(); 472 } 473} 474 475void bc_dump::dump(fetch_node& n) { 476 sb_ostringstream s; 477 static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""}; 478 unsigned gds = n.bc.op_ptr->flags & FF_GDS; 479 bool gds_has_ret = gds && n.bc.op >= FETCH_OP_GDS_ADD_RET && 480 n.bc.op <= FETCH_OP_GDS_USHORT_READ_RET; 481 bool show_dst = !gds || (gds && gds_has_ret); 482 483 s << n.bc.op_ptr->name; 484 fill_to(s, 20); 485 486 if (show_dst) { 487 s << "R"; 488 print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0); 489 s << "."; 490 for (int k = 0; k < 4; ++k) 491 s << chans[n.bc.dst_sel[k]]; 492 s << ", "; 493 } 494 495 s << "R"; 496 print_sel(s, n.bc.src_gpr, n.bc.src_rel, INDEX_LOOP, 0); 497 s << "."; 498 499 unsigned vtx = n.bc.op_ptr->flags & FF_VTX; 500 unsigned num_src_comp = gds ? 3 : vtx ? ctx.is_cayman() ? 2 : 1 : 4; 501 502 for (unsigned k = 0; k < num_src_comp; ++k) 503 s << chans[n.bc.src_sel[k]]; 504 505 if (vtx && n.bc.offset[0]) { 506 s << " + " << n.bc.offset[0] << "b "; 507 } 508 509 if (!gds) 510 s << ", RID:" << n.bc.resource_id; 511 512 if (gds) { 513 s << " UAV:" << n.bc.uav_id; 514 if (n.bc.uav_index_mode) 515 s << " UAV:SQ_CF_INDEX_" << (n.bc.uav_index_mode - V_SQ_CF_INDEX_0); 516 if (n.bc.bcast_first_req) 517 s << " BFQ"; 518 if (n.bc.alloc_consume) 519 s << " AC"; 520 } else if (vtx) { 521 s << " " << fetch_type[n.bc.fetch_type]; 522 if (!ctx.is_cayman() && n.bc.mega_fetch_count) 523 s << " MFC:" << n.bc.mega_fetch_count; 524 if (n.bc.fetch_whole_quad) 525 s << " FWQ"; 526 if (ctx.is_egcm() && n.bc.resource_index_mode) 527 s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0); 528 if (ctx.is_egcm() && n.bc.sampler_index_mode) 529 s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0); 530 531 s << " UCF:" << n.bc.use_const_fields 532 << " FMT(DTA:" << n.bc.data_format 533 << " NUM:" << n.bc.num_format_all 534 << " COMP:" << n.bc.format_comp_all 535 << " MODE:" << n.bc.srf_mode_all << ")"; 536 } else { 537 s << ", SID:" << n.bc.sampler_id; 538 if (n.bc.lod_bias) 539 s << " LB:" << n.bc.lod_bias; 540 s << " CT:"; 541 for (unsigned k = 0; k < 4; ++k) 542 s << (n.bc.coord_type[k] ? "N" : "U"); 543 for (unsigned k = 0; k < 3; ++k) 544 if (n.bc.offset[k]) 545 s << " O" << chans[k] << ":" << n.bc.offset[k]; 546 if (ctx.is_egcm() && n.bc.resource_index_mode) 547 s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0); 548 if (ctx.is_egcm() && n.bc.sampler_index_mode) 549 s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0); 550 } 551 552 if (n.bc.op_ptr->flags & FF_MEM) { 553 s << ", ELEM_SIZE:" << n.bc.elem_size; 554 if (n.bc.uncached) 555 s << ", UNCACHED"; 556 if (n.bc.indexed) 557 s << ", INDEXED"; 558 if (n.bc.burst_count) 559 s << ", BURST_COUNT:" << n.bc.burst_count; 560 s << ", ARRAY_BASE:" << n.bc.array_base; 561 s << ", ARRAY_SIZE:" << n.bc.array_size; 562 } 563 564 sblog << s.str() << "\n"; 565} 566 567void bc_dump::dump_dw(unsigned dw_id, unsigned count) { 568 if (!bc_data) 569 return; 570 571 assert(dw_id + count <= ndw); 572 573 sblog.print_zw(dw_id, 4); 574 sblog << " "; 575 while (count--) { 576 sblog.print_zw_hex(bc_data[dw_id++], 8); 577 sblog << " "; 578 } 579} 580 581} // namespace r600_sb 582