17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2018 Valve Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg *
237ec681f3Smrg */
247ec681f3Smrg
257ec681f3Smrg#include "aco_ir.h"
267ec681f3Smrg
277ec681f3Smrg#ifdef LLVM_AVAILABLE
287ec681f3Smrg#if defined(_MSC_VER) && defined(restrict)
297ec681f3Smrg#undef restrict
307ec681f3Smrg#endif
317ec681f3Smrg#include "llvm/ac_llvm_util.h"
327ec681f3Smrg
337ec681f3Smrg#include "llvm-c/Disassembler.h"
347ec681f3Smrg#include <llvm/ADT/StringRef.h>
357ec681f3Smrg#include <llvm/MC/MCDisassembler/MCDisassembler.h>
367ec681f3Smrg#endif
377ec681f3Smrg
387ec681f3Smrg#include <array>
397ec681f3Smrg#include <iomanip>
407ec681f3Smrg#include <vector>
417ec681f3Smrg
427ec681f3Smrgnamespace aco {
437ec681f3Smrgnamespace {
447ec681f3Smrg
457ec681f3Smrg/**
467ec681f3Smrg * Determines the GPU type to use for CLRXdisasm
477ec681f3Smrg */
487ec681f3Smrgconst char*
497ec681f3Smrgto_clrx_device_name(chip_class cc, radeon_family family)
507ec681f3Smrg{
517ec681f3Smrg   switch (cc) {
527ec681f3Smrg   case GFX6:
537ec681f3Smrg      switch (family) {
547ec681f3Smrg      case CHIP_TAHITI: return "tahiti";
557ec681f3Smrg      case CHIP_PITCAIRN: return "pitcairn";
567ec681f3Smrg      case CHIP_VERDE: return "capeverde";
577ec681f3Smrg      case CHIP_OLAND: return "oland";
587ec681f3Smrg      case CHIP_HAINAN: return "hainan";
597ec681f3Smrg      default: return nullptr;
607ec681f3Smrg      }
617ec681f3Smrg   case GFX7:
627ec681f3Smrg      switch (family) {
637ec681f3Smrg      case CHIP_BONAIRE: return "bonaire";
647ec681f3Smrg      case CHIP_KAVERI: return "gfx700";
657ec681f3Smrg      case CHIP_HAWAII: return "hawaii";
667ec681f3Smrg      default: return nullptr;
677ec681f3Smrg      }
687ec681f3Smrg   case GFX8:
697ec681f3Smrg      switch (family) {
707ec681f3Smrg      case CHIP_TONGA: return "tonga";
717ec681f3Smrg      case CHIP_ICELAND: return "iceland";
727ec681f3Smrg      case CHIP_CARRIZO: return "carrizo";
737ec681f3Smrg      case CHIP_FIJI: return "fiji";
747ec681f3Smrg      case CHIP_STONEY: return "stoney";
757ec681f3Smrg      case CHIP_POLARIS10: return "polaris10";
767ec681f3Smrg      case CHIP_POLARIS11: return "polaris11";
777ec681f3Smrg      case CHIP_POLARIS12: return "polaris12";
787ec681f3Smrg      case CHIP_VEGAM: return "polaris11";
797ec681f3Smrg      default: return nullptr;
807ec681f3Smrg      }
817ec681f3Smrg   case GFX9:
827ec681f3Smrg      switch (family) {
837ec681f3Smrg      case CHIP_VEGA10: return "vega10";
847ec681f3Smrg      case CHIP_VEGA12: return "vega12";
857ec681f3Smrg      case CHIP_VEGA20: return "vega20";
867ec681f3Smrg      case CHIP_RAVEN: return "raven";
877ec681f3Smrg      default: return nullptr;
887ec681f3Smrg      }
897ec681f3Smrg   case GFX10:
907ec681f3Smrg      switch (family) {
917ec681f3Smrg      case CHIP_NAVI10: return "gfx1010";
927ec681f3Smrg      case CHIP_NAVI12: return "gfx1011";
937ec681f3Smrg      default: return nullptr;
947ec681f3Smrg      }
957ec681f3Smrg   case GFX10_3:
967ec681f3Smrg      return nullptr;
977ec681f3Smrg   default: unreachable("Invalid chip class!"); return nullptr;
987ec681f3Smrg   }
997ec681f3Smrg}
1007ec681f3Smrg
1017ec681f3Smrgbool
1027ec681f3Smrgprint_asm_clrx(Program* program, std::vector<uint32_t>& binary, FILE* output)
1037ec681f3Smrg{
1047ec681f3Smrg#ifdef _WIN32
1057ec681f3Smrg   return true;
1067ec681f3Smrg#else
1077ec681f3Smrg   char path[] = "/tmp/fileXXXXXX";
1087ec681f3Smrg   char line[2048], command[128];
1097ec681f3Smrg   FILE* p;
1107ec681f3Smrg   int fd;
1117ec681f3Smrg
1127ec681f3Smrg   const char* gpu_type = to_clrx_device_name(program->chip_class, program->family);
1137ec681f3Smrg
1147ec681f3Smrg   /* Dump the binary into a temporary file. */
1157ec681f3Smrg   fd = mkstemp(path);
1167ec681f3Smrg   if (fd < 0)
1177ec681f3Smrg      return true;
1187ec681f3Smrg
1197ec681f3Smrg   for (uint32_t w : binary) {
1207ec681f3Smrg      if (write(fd, &w, sizeof(w)) == -1)
1217ec681f3Smrg         goto fail;
1227ec681f3Smrg   }
1237ec681f3Smrg
1247ec681f3Smrg   sprintf(command, "clrxdisasm --gpuType=%s -r %s", gpu_type, path);
1257ec681f3Smrg
1267ec681f3Smrg   p = popen(command, "r");
1277ec681f3Smrg   if (p) {
1287ec681f3Smrg      if (!fgets(line, sizeof(line), p)) {
1297ec681f3Smrg         fprintf(output, "clrxdisasm not found\n");
1307ec681f3Smrg         pclose(p);
1317ec681f3Smrg         goto fail;
1327ec681f3Smrg      }
1337ec681f3Smrg
1347ec681f3Smrg      do {
1357ec681f3Smrg         fputs(line, output);
1367ec681f3Smrg      } while (fgets(line, sizeof(line), p));
1377ec681f3Smrg
1387ec681f3Smrg      pclose(p);
1397ec681f3Smrg   }
1407ec681f3Smrg
1417ec681f3Smrg   return false;
1427ec681f3Smrg
1437ec681f3Smrgfail:
1447ec681f3Smrg   close(fd);
1457ec681f3Smrg   unlink(path);
1467ec681f3Smrg   return true;
1477ec681f3Smrg#endif
1487ec681f3Smrg}
1497ec681f3Smrg
1507ec681f3Smrg#ifdef LLVM_AVAILABLE
1517ec681f3Smrgstd::pair<bool, size_t>
1527ec681f3Smrgdisasm_instr(chip_class chip, LLVMDisasmContextRef disasm, uint32_t* binary, unsigned exec_size,
1537ec681f3Smrg             size_t pos, char* outline, unsigned outline_size)
1547ec681f3Smrg{
1557ec681f3Smrg   size_t l =
1567ec681f3Smrg      LLVMDisasmInstruction(disasm, (uint8_t*)&binary[pos], (exec_size - pos) * sizeof(uint32_t),
1577ec681f3Smrg                            pos * 4, outline, outline_size);
1587ec681f3Smrg
1597ec681f3Smrg   if (chip >= GFX10 && l == 8 && ((binary[pos] & 0xffff0000) == 0xd7610000) &&
1607ec681f3Smrg       ((binary[pos + 1] & 0x1ff) == 0xff)) {
1617ec681f3Smrg      /* v_writelane with literal uses 3 dwords but llvm consumes only 2 */
1627ec681f3Smrg      l += 4;
1637ec681f3Smrg   }
1647ec681f3Smrg
1657ec681f3Smrg   bool invalid = false;
1667ec681f3Smrg   size_t size;
1677ec681f3Smrg   if (!l &&
1687ec681f3Smrg       ((chip >= GFX9 && (binary[pos] & 0xffff8000) == 0xd1348000) ||  /* v_add_u32_e64 + clamp */
1697ec681f3Smrg        (chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd7038000) || /* v_add_u16_e64 + clamp */
1707ec681f3Smrg        (chip <= GFX9 && (binary[pos] & 0xffff8000) == 0xd1268000) ||  /* v_add_u16_e64 + clamp */
1717ec681f3Smrg        (chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd76d8000) || /* v_add3_u32 + clamp */
1727ec681f3Smrg        (chip == GFX9 && (binary[pos] & 0xffff8000) == 0xd1ff8000)) /* v_add3_u32 + clamp */) {
1737ec681f3Smrg      strcpy(outline, "\tinteger addition + clamp");
1747ec681f3Smrg      bool has_literal = chip >= GFX10 && (((binary[pos + 1] & 0x1ff) == 0xff) ||
1757ec681f3Smrg                                           (((binary[pos + 1] >> 9) & 0x1ff) == 0xff));
1767ec681f3Smrg      size = 2 + has_literal;
1777ec681f3Smrg   } else if (chip >= GFX10 && l == 4 && ((binary[pos] & 0xfe0001ff) == 0x020000f9)) {
1787ec681f3Smrg      strcpy(outline, "\tv_cndmask_b32 + sdwa");
1797ec681f3Smrg      size = 2;
1807ec681f3Smrg   } else if (!l) {
1817ec681f3Smrg      strcpy(outline, "(invalid instruction)");
1827ec681f3Smrg      size = 1;
1837ec681f3Smrg      invalid = true;
1847ec681f3Smrg   } else {
1857ec681f3Smrg      assert(l % 4 == 0);
1867ec681f3Smrg      size = l / 4;
1877ec681f3Smrg   }
1887ec681f3Smrg
1897ec681f3Smrg   return std::make_pair(invalid, size);
1907ec681f3Smrg}
1917ec681f3Smrg
1927ec681f3Smrgbool
1937ec681f3Smrgprint_asm_llvm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
1947ec681f3Smrg{
1957ec681f3Smrg   std::vector<bool> referenced_blocks(program->blocks.size());
1967ec681f3Smrg   referenced_blocks[0] = true;
1977ec681f3Smrg   for (Block& block : program->blocks) {
1987ec681f3Smrg      for (unsigned succ : block.linear_succs)
1997ec681f3Smrg         referenced_blocks[succ] = true;
2007ec681f3Smrg   }
2017ec681f3Smrg
2027ec681f3Smrg   std::vector<llvm::SymbolInfoTy> symbols;
2037ec681f3Smrg   std::vector<std::array<char, 16>> block_names;
2047ec681f3Smrg   block_names.reserve(program->blocks.size());
2057ec681f3Smrg   for (Block& block : program->blocks) {
2067ec681f3Smrg      if (!referenced_blocks[block.index])
2077ec681f3Smrg         continue;
2087ec681f3Smrg      std::array<char, 16> name;
2097ec681f3Smrg      sprintf(name.data(), "BB%u", block.index);
2107ec681f3Smrg      block_names.push_back(name);
2117ec681f3Smrg      symbols.emplace_back(block.offset * 4,
2127ec681f3Smrg                           llvm::StringRef(block_names[block_names.size() - 1].data()), 0);
2137ec681f3Smrg   }
2147ec681f3Smrg
2157ec681f3Smrg   const char* features = "";
2167ec681f3Smrg   if (program->chip_class >= GFX10 && program->wave_size == 64) {
2177ec681f3Smrg      features = "+wavefrontsize64";
2187ec681f3Smrg   }
2197ec681f3Smrg
2207ec681f3Smrg   LLVMDisasmContextRef disasm =
2217ec681f3Smrg      LLVMCreateDisasmCPUFeatures("amdgcn-mesa-mesa3d", ac_get_llvm_processor_name(program->family),
2227ec681f3Smrg                                  features, &symbols, 0, NULL, NULL);
2237ec681f3Smrg
2247ec681f3Smrg   size_t pos = 0;
2257ec681f3Smrg   bool invalid = false;
2267ec681f3Smrg   unsigned next_block = 0;
2277ec681f3Smrg
2287ec681f3Smrg   unsigned prev_size = 0;
2297ec681f3Smrg   unsigned prev_pos = 0;
2307ec681f3Smrg   unsigned repeat_count = 0;
2317ec681f3Smrg   while (pos < exec_size) {
2327ec681f3Smrg      bool new_block =
2337ec681f3Smrg         next_block < program->blocks.size() && pos == program->blocks[next_block].offset;
2347ec681f3Smrg      if (pos + prev_size <= exec_size && prev_pos != pos && !new_block &&
2357ec681f3Smrg          memcmp(&binary[prev_pos], &binary[pos], prev_size * 4) == 0) {
2367ec681f3Smrg         repeat_count++;
2377ec681f3Smrg         pos += prev_size;
2387ec681f3Smrg         continue;
2397ec681f3Smrg      } else {
2407ec681f3Smrg         if (repeat_count)
2417ec681f3Smrg            fprintf(output, "\t(then repeated %u times)\n", repeat_count);
2427ec681f3Smrg         repeat_count = 0;
2437ec681f3Smrg      }
2447ec681f3Smrg
2457ec681f3Smrg      while (next_block < program->blocks.size() && pos == program->blocks[next_block].offset) {
2467ec681f3Smrg         if (referenced_blocks[next_block])
2477ec681f3Smrg            fprintf(output, "BB%u:\n", next_block);
2487ec681f3Smrg         next_block++;
2497ec681f3Smrg      }
2507ec681f3Smrg
2517ec681f3Smrg      char outline[1024];
2527ec681f3Smrg      std::pair<bool, size_t> res = disasm_instr(program->chip_class, disasm, binary.data(),
2537ec681f3Smrg                                                 exec_size, pos, outline, sizeof(outline));
2547ec681f3Smrg      invalid |= res.first;
2557ec681f3Smrg
2567ec681f3Smrg      fprintf(output, "%-60s ;", outline);
2577ec681f3Smrg
2587ec681f3Smrg      for (unsigned i = 0; i < res.second; i++)
2597ec681f3Smrg         fprintf(output, " %.8x", binary[pos + i]);
2607ec681f3Smrg      fputc('\n', output);
2617ec681f3Smrg
2627ec681f3Smrg      prev_size = res.second;
2637ec681f3Smrg      prev_pos = pos;
2647ec681f3Smrg      pos += res.second;
2657ec681f3Smrg   }
2667ec681f3Smrg   assert(next_block == program->blocks.size());
2677ec681f3Smrg
2687ec681f3Smrg   LLVMDisasmDispose(disasm);
2697ec681f3Smrg
2707ec681f3Smrg   if (program->constant_data.size()) {
2717ec681f3Smrg      fputs("\n/* constant data */\n", output);
2727ec681f3Smrg      for (unsigned i = 0; i < program->constant_data.size(); i += 32) {
2737ec681f3Smrg         fprintf(output, "[%.6u]", i);
2747ec681f3Smrg         unsigned line_size = std::min<size_t>(program->constant_data.size() - i, 32);
2757ec681f3Smrg         for (unsigned j = 0; j < line_size; j += 4) {
2767ec681f3Smrg            unsigned size = std::min<size_t>(program->constant_data.size() - (i + j), 4);
2777ec681f3Smrg            uint32_t v = 0;
2787ec681f3Smrg            memcpy(&v, &program->constant_data[i + j], size);
2797ec681f3Smrg            fprintf(output, " %.8x", v);
2807ec681f3Smrg         }
2817ec681f3Smrg         fputc('\n', output);
2827ec681f3Smrg      }
2837ec681f3Smrg   }
2847ec681f3Smrg
2857ec681f3Smrg   return invalid;
2867ec681f3Smrg}
2877ec681f3Smrg#endif /* LLVM_AVAILABLE */
2887ec681f3Smrg
2897ec681f3Smrg} /* end namespace */
2907ec681f3Smrg
2917ec681f3Smrgbool
2927ec681f3Smrgcheck_print_asm_support(Program* program)
2937ec681f3Smrg{
2947ec681f3Smrg#ifdef LLVM_AVAILABLE
2957ec681f3Smrg   if (program->chip_class >= GFX8) {
2967ec681f3Smrg      /* LLVM disassembler only supports GFX8+ */
2977ec681f3Smrg      return true;
2987ec681f3Smrg   }
2997ec681f3Smrg#endif
3007ec681f3Smrg
3017ec681f3Smrg#ifndef _WIN32
3027ec681f3Smrg   /* Check if CLRX disassembler binary is available and can disassemble the program */
3037ec681f3Smrg   return to_clrx_device_name(program->chip_class, program->family) &&
3047ec681f3Smrg          system("clrxdisasm --version") == 0;
3057ec681f3Smrg#else
3067ec681f3Smrg   return false;
3077ec681f3Smrg#endif
3087ec681f3Smrg}
3097ec681f3Smrg
3107ec681f3Smrg/* Returns true on failure */
3117ec681f3Smrgbool
3127ec681f3Smrgprint_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
3137ec681f3Smrg{
3147ec681f3Smrg#ifdef LLVM_AVAILABLE
3157ec681f3Smrg   if (program->chip_class >= GFX8) {
3167ec681f3Smrg      return print_asm_llvm(program, binary, exec_size, output);
3177ec681f3Smrg   }
3187ec681f3Smrg#endif
3197ec681f3Smrg
3207ec681f3Smrg   return print_asm_clrx(program, binary, output);
3217ec681f3Smrg}
3227ec681f3Smrg
3237ec681f3Smrg} // namespace aco
324