disassemble.c revision 7ec681f3
1/* 2 * Copyright (C) 2019 Connor Abbott <cwabbott0@gmail.com> 3 * Copyright (C) 2019 Lyude Paul <thatslyude@gmail.com> 4 * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 */ 25 26#include <stdbool.h> 27#include <stdio.h> 28#include <stdint.h> 29#include <assert.h> 30#include <inttypes.h> 31#include <string.h> 32 33#include "bifrost.h" 34#include "disassemble.h" 35#include "bi_print_common.h" 36#include "util/compiler.h" 37#include "util/macros.h" 38 39// return bits (high, lo] 40static uint64_t bits(uint32_t word, unsigned lo, unsigned high) 41{ 42 if (high == 32) 43 return word >> lo; 44 return (word & ((1 << high) - 1)) >> lo; 45} 46 47// each of these structs represents an instruction that's dispatched in one 48// cycle. Note that these instructions are packed in funny ways within the 49// clause, hence the need for a separate struct. 50struct bifrost_alu_inst { 51 uint32_t fma_bits; 52 uint32_t add_bits; 53 uint64_t reg_bits; 54}; 55 56static unsigned get_reg0(struct bifrost_regs regs) 57{ 58 if (regs.ctrl == 0) 59 return regs.reg0 | ((regs.reg1 & 0x1) << 5); 60 61 return regs.reg0 <= regs.reg1 ? regs.reg0 : 63 - regs.reg0; 62} 63 64static unsigned get_reg1(struct bifrost_regs regs) 65{ 66 return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1; 67} 68 69// this represents the decoded version of the ctrl register field. 70struct bifrost_reg_ctrl { 71 bool read_reg0; 72 bool read_reg1; 73 struct bifrost_reg_ctrl_23 slot23; 74}; 75 76static void dump_header(FILE *fp, struct bifrost_header header, bool verbose) 77{ 78 fprintf(fp, "ds(%u) ", header.dependency_slot); 79 80 if (header.staging_barrier) 81 fprintf(fp, "osrb "); 82 83 fprintf(fp, "%s ", bi_flow_control_name(header.flow_control)); 84 85 if (header.suppress_inf) 86 fprintf(fp, "inf_suppress "); 87 if (header.suppress_nan) 88 fprintf(fp, "nan_suppress "); 89 90 if (header.flush_to_zero == BIFROST_FTZ_DX11) 91 fprintf(fp, "ftz_dx11 "); 92 else if (header.flush_to_zero == BIFROST_FTZ_ALWAYS) 93 fprintf(fp, "ftz_hsa "); 94 if (header.flush_to_zero == BIFROST_FTZ_ABRUPT) 95 fprintf(fp, "ftz_au "); 96 97 assert(!header.zero1); 98 assert(!header.zero2); 99 100 if (header.float_exceptions == BIFROST_EXCEPTIONS_DISABLED) 101 fprintf(fp, "fpe_ts "); 102 else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_DIVISION) 103 fprintf(fp, "fpe_pd "); 104 else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_SQRT) 105 fprintf(fp, "fpe_psqr "); 106 107 if (header.message_type) 108 fprintf(fp, "%s ", bi_message_type_name(header.message_type)); 109 110 if (header.terminate_discarded_threads) 111 fprintf(fp, "td "); 112 113 if (header.next_clause_prefetch) 114 fprintf(fp, "ncph "); 115 116 if (header.next_message_type) 117 fprintf(fp, "next_%s ", bi_message_type_name(header.next_message_type)); 118 if (header.dependency_wait != 0) { 119 fprintf(fp, "dwb("); 120 bool first = true; 121 for (unsigned i = 0; i < 8; i++) { 122 if (header.dependency_wait & (1 << i)) { 123 if (!first) { 124 fprintf(fp, ", "); 125 } 126 fprintf(fp, "%u", i); 127 first = false; 128 } 129 } 130 fprintf(fp, ") "); 131 } 132 133 fprintf(fp, "\n"); 134} 135 136static struct bifrost_reg_ctrl DecodeRegCtrl(FILE *fp, struct bifrost_regs regs, bool first) 137{ 138 struct bifrost_reg_ctrl decoded = {}; 139 unsigned ctrl; 140 if (regs.ctrl == 0) { 141 ctrl = regs.reg1 >> 2; 142 decoded.read_reg0 = !(regs.reg1 & 0x2); 143 decoded.read_reg1 = false; 144 } else { 145 ctrl = regs.ctrl; 146 decoded.read_reg0 = decoded.read_reg1 = true; 147 } 148 149 /* Modify control based on state */ 150 if (first) 151 ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1); 152 else if (regs.reg2 == regs.reg3) 153 ctrl += 16; 154 155 decoded.slot23 = bifrost_reg_ctrl_lut[ctrl]; 156 ASSERTED struct bifrost_reg_ctrl_23 reserved = { 0 }; 157 assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved))); 158 159 return decoded; 160} 161 162static void dump_regs(FILE *fp, struct bifrost_regs srcs, bool first) 163{ 164 struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first); 165 fprintf(fp, " # "); 166 if (ctrl.read_reg0) 167 fprintf(fp, "slot 0: r%u ", get_reg0(srcs)); 168 if (ctrl.read_reg1) 169 fprintf(fp, "slot 1: r%u ", get_reg1(srcs)); 170 171 const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD"; 172 173 if (ctrl.slot23.slot2 == BIFROST_OP_WRITE) 174 fprintf(fp, "slot 2: r%u (write FMA) ", srcs.reg2); 175 else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO) 176 fprintf(fp, "slot 2: r%u (write lo FMA) ", srcs.reg2); 177 else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI) 178 fprintf(fp, "slot 2: r%u (write hi FMA) ", srcs.reg2); 179 else if (ctrl.slot23.slot2 == BIFROST_OP_READ) 180 fprintf(fp, "slot 2: r%u (read) ", srcs.reg2); 181 182 if (ctrl.slot23.slot3 == BIFROST_OP_WRITE) 183 fprintf(fp, "slot 3: r%u (write %s) ", srcs.reg3, slot3_fma); 184 else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO) 185 fprintf(fp, "slot 3: r%u (write lo %s) ", srcs.reg3, slot3_fma); 186 else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI) 187 fprintf(fp, "slot 3: r%u (write hi %s) ", srcs.reg3, slot3_fma); 188 189 if (srcs.fau_idx) 190 fprintf(fp, "fau %X ", srcs.fau_idx); 191 192 fprintf(fp, "\n"); 193} 194 195static void 196bi_disasm_dest_mask(FILE *fp, enum bifrost_reg_op op) 197{ 198 if (op == BIFROST_OP_WRITE_LO) 199 fprintf(fp, ".h0"); 200 else if (op == BIFROST_OP_WRITE_HI) 201 fprintf(fp, ".h1"); 202} 203 204void 205bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool last) 206{ 207 /* If this is the last instruction, next_regs points to the first reg entry. */ 208 struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last); 209 if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) { 210 fprintf(fp, "r%u:t0", next_regs->reg2); 211 bi_disasm_dest_mask(fp, ctrl.slot23.slot2); 212 } else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) { 213 fprintf(fp, "r%u:t0", next_regs->reg3); 214 bi_disasm_dest_mask(fp, ctrl.slot23.slot3); 215 } else 216 fprintf(fp, "t0"); 217} 218 219void 220bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool last) 221{ 222 /* If this is the last instruction, next_regs points to the first reg entry. */ 223 struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last); 224 225 if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) { 226 fprintf(fp, "r%u:t1", next_regs->reg3); 227 bi_disasm_dest_mask(fp, ctrl.slot23.slot3); 228 } else 229 fprintf(fp, "t1"); 230} 231 232static void dump_const_imm(FILE *fp, uint32_t imm) 233{ 234 union { 235 float f; 236 uint32_t i; 237 } fi; 238 fi.i = imm; 239 fprintf(fp, "0x%08x /* %f */", imm, fi.f); 240} 241 242static void 243dump_pc_imm(FILE *fp, uint64_t imm, unsigned branch_offset, enum bi_constmod mod, bool high32) 244{ 245 if (mod == BI_CONSTMOD_PC_HI && !high32) { 246 dump_const_imm(fp, imm); 247 return; 248 } 249 250 /* 60-bit sign-extend */ 251 uint64_t zx64 = (imm << 4); 252 int64_t sx64 = zx64; 253 sx64 >>= 4; 254 255 /* 28-bit sign extend x 2 */ 256 uint32_t imm32[2] = { (uint32_t) imm, (uint32_t) (imm >> 32) }; 257 uint32_t zx32[2] = { imm32[0] << 4, imm32[1] << 4 }; 258 int32_t sx32[2] = { zx32[0], zx32[1] }; 259 sx32[0] >>= 4; 260 sx32[1] >>= 4; 261 262 int64_t offs = 0; 263 264 switch (mod) { 265 case BI_CONSTMOD_PC_LO: 266 offs = sx64; 267 break; 268 case BI_CONSTMOD_PC_HI: 269 offs = sx32[1]; 270 break; 271 case BI_CONSTMOD_PC_LO_HI: 272 offs = sx32[high32]; 273 break; 274 default: 275 unreachable("Invalid PC modifier"); 276 } 277 278 assert((offs & 15) == 0); 279 fprintf(fp, "clause_%" PRId64, branch_offset + (offs / 16)); 280 281 if (mod == BI_CONSTMOD_PC_LO && high32) 282 fprintf(fp, " >> 32"); 283 284 /* While technically in spec, referencing the current clause as (pc + 285 * 0) likely indicates an unintended infinite loop */ 286 if (offs == 0) 287 fprintf(fp, " /* XXX: likely an infinite loop */"); 288} 289 290/* Convert an index to an embedded constant in FAU-RAM to the index of the 291 * embedded constant. No, it's not in order. Yes, really. */ 292 293static unsigned 294const_fau_to_idx(unsigned fau_value) 295{ 296 unsigned map[8] = { 297 ~0, ~0, 4, 5, 0, 1, 2, 3 298 }; 299 300 assert(map[fau_value] < 6); 301 return map[fau_value]; 302} 303 304static void dump_fau_src(FILE *fp, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool high32) 305{ 306 if (srcs.fau_idx & 0x80) { 307 unsigned uniform = (srcs.fau_idx & 0x7f); 308 fprintf(fp, "u%u.w%u", uniform, high32); 309 } else if (srcs.fau_idx >= 0x20) { 310 unsigned idx = const_fau_to_idx(srcs.fau_idx >> 4); 311 uint64_t imm = consts->raw[idx]; 312 imm |= (srcs.fau_idx & 0xf); 313 if (consts->mods[idx] != BI_CONSTMOD_NONE) 314 dump_pc_imm(fp, imm, branch_offset, consts->mods[idx], high32); 315 else if (high32) 316 dump_const_imm(fp, imm >> 32); 317 else 318 dump_const_imm(fp, imm); 319 } else { 320 switch (srcs.fau_idx) { 321 case 0: 322 fprintf(fp, "#0"); 323 break; 324 case 1: 325 fprintf(fp, "lane_id"); 326 break; 327 case 2: 328 fprintf(fp, "warp_id"); 329 break; 330 case 3: 331 fprintf(fp, "core_id"); 332 break; 333 case 4: 334 fprintf(fp, "framebuffer_size"); 335 break; 336 case 5: 337 fprintf(fp, "atest_datum"); 338 break; 339 case 6: 340 fprintf(fp, "sample"); 341 break; 342 case 8: 343 case 9: 344 case 10: 345 case 11: 346 case 12: 347 case 13: 348 case 14: 349 case 15: 350 fprintf(fp, "blend_descriptor_%u", (unsigned) srcs.fau_idx - 8); 351 break; 352 default: 353 fprintf(fp, "XXX - reserved%u", (unsigned) srcs.fau_idx); 354 break; 355 } 356 357 if (high32) 358 fprintf(fp, ".y"); 359 else 360 fprintf(fp, ".x"); 361 } 362} 363 364void 365dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool isFMA) 366{ 367 switch (src) { 368 case 0: 369 fprintf(fp, "r%u", get_reg0(srcs)); 370 break; 371 case 1: 372 fprintf(fp, "r%u", get_reg1(srcs)); 373 break; 374 case 2: 375 fprintf(fp, "r%u", srcs.reg2); 376 break; 377 case 3: 378 if (isFMA) 379 fprintf(fp, "#0"); 380 else 381 fprintf(fp, "t"); // i.e. the output of FMA this cycle 382 break; 383 case 4: 384 dump_fau_src(fp, srcs, branch_offset, consts, false); 385 break; 386 case 5: 387 dump_fau_src(fp, srcs, branch_offset, consts, true); 388 break; 389 case 6: 390 fprintf(fp, "t0"); 391 break; 392 case 7: 393 fprintf(fp, "t1"); 394 break; 395 } 396} 397 398/* Tables for decoding M0, or if M0 == 7, M1 respectively. 399 * 400 * XXX: It's not clear if the third entry of M1_table corresponding to (7, 2) 401 * should have PC_LO_HI in the EC1 slot, or it's a weird hybrid mode? I would 402 * say this needs testing but no code should ever actually use this mode. 403 */ 404 405static const enum bi_constmod M1_table[7][2] = { 406 { BI_CONSTMOD_NONE, BI_CONSTMOD_NONE }, 407 { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE }, 408 { BI_CONSTMOD_PC_LO, BI_CONSTMOD_PC_LO }, 409 { ~0, ~0 }, 410 { BI_CONSTMOD_PC_HI, BI_CONSTMOD_NONE }, 411 { BI_CONSTMOD_PC_HI, BI_CONSTMOD_PC_HI }, 412 { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE }, 413}; 414 415static const enum bi_constmod M2_table[4][2] = { 416 { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_NONE }, 417 { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI }, 418 { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_LO_HI }, 419 { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI }, 420}; 421 422static void 423decode_M(enum bi_constmod *mod, unsigned M1, unsigned M2, bool single) 424{ 425 if (M1 >= 8) { 426 mod[0] = BI_CONSTMOD_NONE; 427 428 if (!single) 429 mod[1] = BI_CONSTMOD_NONE; 430 431 return; 432 } else if (M1 == 7) { 433 assert(M2 < 4); 434 memcpy(mod, M2_table[M2], sizeof(*mod) * (single ? 1 : 2)); 435 } else { 436 assert(M1 != 3); 437 memcpy(mod, M1_table[M1], sizeof(*mod) * (single ? 1 : 2)); 438 } 439} 440 441static void dump_clause(FILE *fp, uint32_t *words, unsigned *size, unsigned offset, bool verbose) 442{ 443 // State for a decoded clause 444 struct bifrost_alu_inst instrs[8] = {}; 445 struct bi_constants consts = {}; 446 unsigned num_instrs = 0; 447 unsigned num_consts = 0; 448 uint64_t header_bits = 0; 449 450 unsigned i; 451 for (i = 0; ; i++, words += 4) { 452 if (verbose) { 453 fprintf(fp, "# "); 454 for (int j = 0; j < 4; j++) 455 fprintf(fp, "%08x ", words[3 - j]); // low bit on the right 456 fprintf(fp, "\n"); 457 } 458 unsigned tag = bits(words[0], 0, 8); 459 460 // speculatively decode some things that are common between many formats, so we can share some code 461 struct bifrost_alu_inst main_instr = {}; 462 // 20 bits 463 main_instr.add_bits = bits(words[2], 2, 32 - 13); 464 // 23 bits 465 main_instr.fma_bits = bits(words[1], 11, 32) | bits(words[2], 0, 2) << (32 - 11); 466 // 35 bits 467 main_instr.reg_bits = ((uint64_t) bits(words[1], 0, 11)) << 24 | (uint64_t) bits(words[0], 8, 32); 468 469 uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t) words[1] << 28 | bits(words[2], 0, 4) << 60; 470 uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t) words[3] << 32; 471 472 /* Z-bit */ 473 bool stop = tag & 0x40; 474 475 if (verbose) { 476 fprintf(fp, "# tag: 0x%02x\n", tag); 477 } 478 if (tag & 0x80) { 479 /* Format 5 or 10 */ 480 unsigned idx = stop ? 5 : 2; 481 main_instr.add_bits |= ((tag >> 3) & 0x7) << 17; 482 instrs[idx + 1] = main_instr; 483 instrs[idx].add_bits = bits(words[3], 0, 17) | ((tag & 0x7) << 17); 484 instrs[idx].fma_bits |= bits(words[2], 19, 32) << 10; 485 consts.raw[0] = bits(words[3], 17, 32) << 4; 486 } else { 487 bool done = false; 488 switch ((tag >> 3) & 0x7) { 489 case 0x0: 490 switch (tag & 0x7) { 491 case 0x3: 492 /* Format 1 */ 493 main_instr.add_bits |= bits(words[3], 29, 32) << 17; 494 instrs[1] = main_instr; 495 num_instrs = 2; 496 done = stop; 497 break; 498 case 0x4: 499 /* Format 3 */ 500 instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17; 501 instrs[2].fma_bits |= bits(words[2], 19, 32) << 10; 502 consts.raw[0] = const0; 503 decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true); 504 num_instrs = 3; 505 num_consts = 1; 506 done = stop; 507 break; 508 case 0x1: 509 case 0x5: 510 /* Format 4 */ 511 instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17; 512 instrs[2].fma_bits |= bits(words[2], 19, 32) << 10; 513 main_instr.add_bits |= bits(words[3], 26, 29) << 17; 514 instrs[3] = main_instr; 515 if ((tag & 0x7) == 0x5) { 516 num_instrs = 4; 517 done = stop; 518 } 519 break; 520 case 0x6: 521 /* Format 8 */ 522 instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17; 523 instrs[5].fma_bits |= bits(words[2], 19, 32) << 10; 524 consts.raw[0] = const0; 525 decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true); 526 num_instrs = 6; 527 num_consts = 1; 528 done = stop; 529 break; 530 case 0x7: 531 /* Format 9 */ 532 instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17; 533 instrs[5].fma_bits |= bits(words[2], 19, 32) << 10; 534 main_instr.add_bits |= bits(words[3], 26, 29) << 17; 535 instrs[6] = main_instr; 536 num_instrs = 7; 537 done = stop; 538 break; 539 default: 540 unreachable("[INSTR_INVALID_ENC] Invalid tag bits"); 541 } 542 break; 543 case 0x2: 544 case 0x3: { 545 /* Format 6 or 11 */ 546 unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7; 547 main_instr.add_bits |= (tag & 0x7) << 17; 548 instrs[idx] = main_instr; 549 consts.raw[0] |= (bits(words[2], 19, 32) | ((uint64_t) words[3] << 13)) << 19; 550 num_consts = 1; 551 num_instrs = idx + 1; 552 done = stop; 553 break; 554 } 555 case 0x4: { 556 /* Format 2 */ 557 unsigned idx = stop ? 4 : 1; 558 main_instr.add_bits |= (tag & 0x7) << 17; 559 instrs[idx] = main_instr; 560 instrs[idx + 1].fma_bits |= bits(words[3], 22, 32); 561 instrs[idx + 1].reg_bits = bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19)); 562 break; 563 } 564 case 0x1: 565 /* Format 0 - followed by constants */ 566 num_instrs = 1; 567 done = stop; 568 FALLTHROUGH; 569 case 0x5: 570 /* Format 0 - followed by instructions */ 571 header_bits = bits(words[2], 19, 32) | ((uint64_t) words[3] << (32 - 19)); 572 main_instr.add_bits |= (tag & 0x7) << 17; 573 instrs[0] = main_instr; 574 break; 575 case 0x6: 576 case 0x7: { 577 /* Format 12 */ 578 unsigned pos = tag & 0xf; 579 580 struct { 581 unsigned const_idx; 582 unsigned nr_tuples; 583 } pos_table[0x10] = { 584 { 0, 1 }, 585 { 0, 2 }, 586 { 0, 4 }, 587 { 1, 3 }, 588 { 1, 5 }, 589 { 2, 4 }, 590 { 0, 7 }, 591 { 1, 6 }, 592 { 3, 5 }, 593 { 1, 8 }, 594 { 2, 7 }, 595 { 3, 6 }, 596 { 3, 8 }, 597 { 4, 7 }, 598 { 5, 6 }, 599 { ~0, ~0 } 600 }; 601 602 ASSERTED bool valid_count = pos_table[pos].nr_tuples == num_instrs; 603 assert(valid_count && "INSTR_INVALID_ENC"); 604 605 unsigned const_idx = pos_table[pos].const_idx; 606 607 if (num_consts < const_idx + 2) 608 num_consts = const_idx + 2; 609 610 consts.raw[const_idx] = const0; 611 consts.raw[const_idx + 1] = const1; 612 613 /* Calculate M values from A, B and 4-bit 614 * unsigned arithmetic. Mathematically it 615 * should be (A - B) % 16 but we use this 616 * alternate form to avoid sign issues */ 617 618 unsigned A1 = bits(words[2], 0, 4); 619 unsigned B1 = bits(words[3], 28, 32); 620 unsigned A2 = bits(words[1], 0, 4); 621 unsigned B2 = bits(words[2], 28, 32); 622 623 unsigned M1 = (16 + A1 - B1) & 0xF; 624 unsigned M2 = (16 + A2 - B2) & 0xF; 625 626 decode_M(&consts.mods[const_idx], M1, M2, false); 627 628 done = stop; 629 break; 630 } 631 default: 632 break; 633 } 634 635 if (done) 636 break; 637 } 638 } 639 640 *size = i + 1; 641 642 if (verbose) { 643 fprintf(fp, "# header: %012" PRIx64 "\n", header_bits); 644 } 645 646 struct bifrost_header header; 647 memcpy((char *) &header, (char *) &header_bits, sizeof(struct bifrost_header)); 648 dump_header(fp, header, verbose); 649 650 fprintf(fp, "{\n"); 651 for (i = 0; i < num_instrs; i++) { 652 struct bifrost_regs regs, next_regs; 653 if (i + 1 == num_instrs) { 654 memcpy((char *) &next_regs, (char *) &instrs[0].reg_bits, 655 sizeof(next_regs)); 656 } else { 657 memcpy((char *) &next_regs, (char *) &instrs[i + 1].reg_bits, 658 sizeof(next_regs)); 659 } 660 661 memcpy((char *) ®s, (char *) &instrs[i].reg_bits, sizeof(regs)); 662 663 if (verbose) { 664 fprintf(fp, " # regs: %016" PRIx64 "\n", instrs[i].reg_bits); 665 dump_regs(fp, regs, i == 0); 666 } 667 668 bi_disasm_fma(fp, instrs[i].fma_bits, ®s, &next_regs, 669 header.staging_register, offset, &consts, 670 i + 1 == num_instrs); 671 672 bi_disasm_add(fp, instrs[i].add_bits, ®s, &next_regs, 673 header.staging_register, offset, &consts, 674 i + 1 == num_instrs); 675 } 676 fprintf(fp, "}\n"); 677 678 if (verbose) { 679 for (unsigned i = 0; i < num_consts; i++) { 680 fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i, consts.raw[i] & 0xffffffff); 681 fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i + 1, consts.raw[i] >> 32); 682 } 683 } 684 685 fprintf(fp, "\n"); 686 return; 687} 688 689void disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose) 690{ 691 uint32_t *words = (uint32_t *) code; 692 uint32_t *words_end = words + (size / 4); 693 // used for displaying branch targets 694 unsigned offset = 0; 695 while (words != words_end) { 696 /* Shaders have zero bytes at the end for padding; stop 697 * disassembling when we hit them. */ 698 if (*words == 0) 699 break; 700 701 fprintf(fp, "clause_%u:\n", offset); 702 703 unsigned size; 704 dump_clause(fp, words, &size, offset, verbose); 705 706 words += size * 4; 707 offset += size; 708 } 709} 710 711