1/* 2 * Copyright © 2018 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25#include "aco_ir.h" 26 27#ifdef LLVM_AVAILABLE 28#if defined(_MSC_VER) && defined(restrict) 29#undef restrict 30#endif 31#include "llvm/ac_llvm_util.h" 32 33#include "llvm-c/Disassembler.h" 34#include <llvm/ADT/StringRef.h> 35#include <llvm/MC/MCDisassembler/MCDisassembler.h> 36#endif 37 38#include <array> 39#include <iomanip> 40#include <vector> 41 42namespace aco { 43namespace { 44 45/** 46 * Determines the GPU type to use for CLRXdisasm 47 */ 48const char* 49to_clrx_device_name(chip_class cc, radeon_family family) 50{ 51 switch (cc) { 52 case GFX6: 53 switch (family) { 54 case CHIP_TAHITI: return "tahiti"; 55 case CHIP_PITCAIRN: return "pitcairn"; 56 case CHIP_VERDE: return "capeverde"; 57 case CHIP_OLAND: return "oland"; 58 case CHIP_HAINAN: return "hainan"; 59 default: return nullptr; 60 } 61 case GFX7: 62 switch (family) { 63 case CHIP_BONAIRE: return "bonaire"; 64 case CHIP_KAVERI: return "gfx700"; 65 case CHIP_HAWAII: return "hawaii"; 66 default: return nullptr; 67 } 68 case GFX8: 69 switch (family) { 70 case CHIP_TONGA: return "tonga"; 71 case CHIP_ICELAND: return "iceland"; 72 case CHIP_CARRIZO: return "carrizo"; 73 case CHIP_FIJI: return "fiji"; 74 case CHIP_STONEY: return "stoney"; 75 case CHIP_POLARIS10: return "polaris10"; 76 case CHIP_POLARIS11: return "polaris11"; 77 case CHIP_POLARIS12: return "polaris12"; 78 case CHIP_VEGAM: return "polaris11"; 79 default: return nullptr; 80 } 81 case GFX9: 82 switch (family) { 83 case CHIP_VEGA10: return "vega10"; 84 case CHIP_VEGA12: return "vega12"; 85 case CHIP_VEGA20: return "vega20"; 86 case CHIP_RAVEN: return "raven"; 87 default: return nullptr; 88 } 89 case GFX10: 90 switch (family) { 91 case CHIP_NAVI10: return "gfx1010"; 92 case CHIP_NAVI12: return "gfx1011"; 93 default: return nullptr; 94 } 95 case GFX10_3: 96 return nullptr; 97 default: unreachable("Invalid chip class!"); return nullptr; 98 } 99} 100 101bool 102print_asm_clrx(Program* program, std::vector<uint32_t>& binary, FILE* output) 103{ 104#ifdef _WIN32 105 return true; 106#else 107 char path[] = "/tmp/fileXXXXXX"; 108 char line[2048], command[128]; 109 FILE* p; 110 int fd; 111 112 const char* gpu_type = to_clrx_device_name(program->chip_class, program->family); 113 114 /* Dump the binary into a temporary file. */ 115 fd = mkstemp(path); 116 if (fd < 0) 117 return true; 118 119 for (uint32_t w : binary) { 120 if (write(fd, &w, sizeof(w)) == -1) 121 goto fail; 122 } 123 124 sprintf(command, "clrxdisasm --gpuType=%s -r %s", gpu_type, path); 125 126 p = popen(command, "r"); 127 if (p) { 128 if (!fgets(line, sizeof(line), p)) { 129 fprintf(output, "clrxdisasm not found\n"); 130 pclose(p); 131 goto fail; 132 } 133 134 do { 135 fputs(line, output); 136 } while (fgets(line, sizeof(line), p)); 137 138 pclose(p); 139 } 140 141 return false; 142 143fail: 144 close(fd); 145 unlink(path); 146 return true; 147#endif 148} 149 150#ifdef LLVM_AVAILABLE 151std::pair<bool, size_t> 152disasm_instr(chip_class chip, LLVMDisasmContextRef disasm, uint32_t* binary, unsigned exec_size, 153 size_t pos, char* outline, unsigned outline_size) 154{ 155 size_t l = 156 LLVMDisasmInstruction(disasm, (uint8_t*)&binary[pos], (exec_size - pos) * sizeof(uint32_t), 157 pos * 4, outline, outline_size); 158 159 if (chip >= GFX10 && l == 8 && ((binary[pos] & 0xffff0000) == 0xd7610000) && 160 ((binary[pos + 1] & 0x1ff) == 0xff)) { 161 /* v_writelane with literal uses 3 dwords but llvm consumes only 2 */ 162 l += 4; 163 } 164 165 bool invalid = false; 166 size_t size; 167 if (!l && 168 ((chip >= GFX9 && (binary[pos] & 0xffff8000) == 0xd1348000) || /* v_add_u32_e64 + clamp */ 169 (chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd7038000) || /* v_add_u16_e64 + clamp */ 170 (chip <= GFX9 && (binary[pos] & 0xffff8000) == 0xd1268000) || /* v_add_u16_e64 + clamp */ 171 (chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd76d8000) || /* v_add3_u32 + clamp */ 172 (chip == GFX9 && (binary[pos] & 0xffff8000) == 0xd1ff8000)) /* v_add3_u32 + clamp */) { 173 strcpy(outline, "\tinteger addition + clamp"); 174 bool has_literal = chip >= GFX10 && (((binary[pos + 1] & 0x1ff) == 0xff) || 175 (((binary[pos + 1] >> 9) & 0x1ff) == 0xff)); 176 size = 2 + has_literal; 177 } else if (chip >= GFX10 && l == 4 && ((binary[pos] & 0xfe0001ff) == 0x020000f9)) { 178 strcpy(outline, "\tv_cndmask_b32 + sdwa"); 179 size = 2; 180 } else if (!l) { 181 strcpy(outline, "(invalid instruction)"); 182 size = 1; 183 invalid = true; 184 } else { 185 assert(l % 4 == 0); 186 size = l / 4; 187 } 188 189 return std::make_pair(invalid, size); 190} 191 192bool 193print_asm_llvm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output) 194{ 195 std::vector<bool> referenced_blocks(program->blocks.size()); 196 referenced_blocks[0] = true; 197 for (Block& block : program->blocks) { 198 for (unsigned succ : block.linear_succs) 199 referenced_blocks[succ] = true; 200 } 201 202 std::vector<llvm::SymbolInfoTy> symbols; 203 std::vector<std::array<char, 16>> block_names; 204 block_names.reserve(program->blocks.size()); 205 for (Block& block : program->blocks) { 206 if (!referenced_blocks[block.index]) 207 continue; 208 std::array<char, 16> name; 209 sprintf(name.data(), "BB%u", block.index); 210 block_names.push_back(name); 211 symbols.emplace_back(block.offset * 4, 212 llvm::StringRef(block_names[block_names.size() - 1].data()), 0); 213 } 214 215 const char* features = ""; 216 if (program->chip_class >= GFX10 && program->wave_size == 64) { 217 features = "+wavefrontsize64"; 218 } 219 220 LLVMDisasmContextRef disasm = 221 LLVMCreateDisasmCPUFeatures("amdgcn-mesa-mesa3d", ac_get_llvm_processor_name(program->family), 222 features, &symbols, 0, NULL, NULL); 223 224 size_t pos = 0; 225 bool invalid = false; 226 unsigned next_block = 0; 227 228 unsigned prev_size = 0; 229 unsigned prev_pos = 0; 230 unsigned repeat_count = 0; 231 while (pos < exec_size) { 232 bool new_block = 233 next_block < program->blocks.size() && pos == program->blocks[next_block].offset; 234 if (pos + prev_size <= exec_size && prev_pos != pos && !new_block && 235 memcmp(&binary[prev_pos], &binary[pos], prev_size * 4) == 0) { 236 repeat_count++; 237 pos += prev_size; 238 continue; 239 } else { 240 if (repeat_count) 241 fprintf(output, "\t(then repeated %u times)\n", repeat_count); 242 repeat_count = 0; 243 } 244 245 while (next_block < program->blocks.size() && pos == program->blocks[next_block].offset) { 246 if (referenced_blocks[next_block]) 247 fprintf(output, "BB%u:\n", next_block); 248 next_block++; 249 } 250 251 char outline[1024]; 252 std::pair<bool, size_t> res = disasm_instr(program->chip_class, disasm, binary.data(), 253 exec_size, pos, outline, sizeof(outline)); 254 invalid |= res.first; 255 256 fprintf(output, "%-60s ;", outline); 257 258 for (unsigned i = 0; i < res.second; i++) 259 fprintf(output, " %.8x", binary[pos + i]); 260 fputc('\n', output); 261 262 prev_size = res.second; 263 prev_pos = pos; 264 pos += res.second; 265 } 266 assert(next_block == program->blocks.size()); 267 268 LLVMDisasmDispose(disasm); 269 270 if (program->constant_data.size()) { 271 fputs("\n/* constant data */\n", output); 272 for (unsigned i = 0; i < program->constant_data.size(); i += 32) { 273 fprintf(output, "[%.6u]", i); 274 unsigned line_size = std::min<size_t>(program->constant_data.size() - i, 32); 275 for (unsigned j = 0; j < line_size; j += 4) { 276 unsigned size = std::min<size_t>(program->constant_data.size() - (i + j), 4); 277 uint32_t v = 0; 278 memcpy(&v, &program->constant_data[i + j], size); 279 fprintf(output, " %.8x", v); 280 } 281 fputc('\n', output); 282 } 283 } 284 285 return invalid; 286} 287#endif /* LLVM_AVAILABLE */ 288 289} /* end namespace */ 290 291bool 292check_print_asm_support(Program* program) 293{ 294#ifdef LLVM_AVAILABLE 295 if (program->chip_class >= GFX8) { 296 /* LLVM disassembler only supports GFX8+ */ 297 return true; 298 } 299#endif 300 301#ifndef _WIN32 302 /* Check if CLRX disassembler binary is available and can disassemble the program */ 303 return to_clrx_device_name(program->chip_class, program->family) && 304 system("clrxdisasm --version") == 0; 305#else 306 return false; 307#endif 308} 309 310/* Returns true on failure */ 311bool 312print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output) 313{ 314#ifdef LLVM_AVAILABLE 315 if (program->chip_class >= GFX8) { 316 return print_asm_llvm(program, binary, exec_size, output); 317 } 318#endif 319 320 return print_asm_clrx(program, binary, output); 321} 322 323} // namespace aco 324