1/* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25 26/** 27 * \file ac_rgp_elf_object_pack.c 28 * 29 * This file provides functions to create elf object for rgp profiling. 30 * The functions in this file create 64bit elf code object irrespective 31 * of if the driver is compiled as 32 or 64 bit. 32 */ 33 34#include <stdint.h> 35#include <stdio.h> 36#include <string.h> 37#include <libelf.h> 38#include "ac_msgpack.h" 39#include "ac_rgp.h" 40#include "main/macros.h" 41 42#ifndef EM_AMDGPU 43// Old distributions may not have this enum constant 44#define EM_AMDGPU 224 45#endif 46 47char shader_stage_api_string[6][10] = { 48 ".vertex", /* vertex */ 49 ".hull", /* tessellation control */ 50 ".domain", /* tessellation evaluation */ 51 ".geometry", /* geometry */ 52 ".pixel", /* fragment */ 53 ".compute" /* compute */ 54}; 55 56char hw_stage_string[RGP_HW_STAGE_MAX][4] = { 57 ".vs", 58 ".ls", 59 ".hs", 60 ".es", 61 ".gs", 62 ".ps", 63 ".cs" 64}; 65 66char hw_stage_symbol_string[RGP_HW_STAGE_MAX][16] = { 67 "_amdgpu_vs_main", 68 "_amdgpu_ls_main", 69 "_amdgpu_hs_main", 70 "_amdgpu_es_main", 71 "_amdgpu_gs_main", 72 "_amdgpu_ps_main", 73 "_amdgpu_cs_main" 74}; 75 76/** 77 * rgp profiler requires data for few variables stored in msgpack format 78 * in notes section. This function writes the data from 79 * struct rgp_code_object_record to elf object in msgpack format. 80 * for msgpack specification refer to 81 * github.com/msgpack/msgpack/blob/master/spec.md 82 */ 83static void 84ac_rgp_write_msgpack(FILE *output, 85 struct rgp_code_object_record *record, 86 uint32_t *written_size) 87{ 88 struct ac_msgpack msgpack; 89 uint32_t num_shaders; 90 uint32_t i; 91 uint32_t mask; 92 93 num_shaders = util_bitcount(record->shader_stages_mask); 94 95 ac_msgpack_init(&msgpack); 96 97 ac_msgpack_add_fixmap_op(&msgpack, 2); 98 ac_msgpack_add_fixstr(&msgpack, "amdpal.version"); 99 ac_msgpack_add_fixarray_op(&msgpack, 2); 100 ac_msgpack_add_uint(&msgpack, 2); 101 ac_msgpack_add_uint(&msgpack, 1); 102 103 ac_msgpack_add_fixstr(&msgpack, "amdpal.pipelines"); 104 ac_msgpack_add_fixarray_op(&msgpack, 1); 105 ac_msgpack_add_fixmap_op(&msgpack, 6); 106 107 /* 1 108 * This not used in RGP but data needs to be present 109 */ 110 ac_msgpack_add_fixstr(&msgpack, ".spill_threshold"); 111 ac_msgpack_add_uint(&msgpack, 0xffff); 112 113 /* 2 114 * This not used in RGP but data needs to be present 115 */ 116 ac_msgpack_add_fixstr(&msgpack, ".user_data_limit"); 117 ac_msgpack_add_uint(&msgpack, 32); 118 119 /* 3 */ 120 ac_msgpack_add_fixstr(&msgpack, ".shaders"); 121 ac_msgpack_add_fixmap_op(&msgpack, num_shaders); 122 mask = record->shader_stages_mask; 123 while(mask) { 124 i = u_bit_scan(&mask); 125 ac_msgpack_add_fixstr(&msgpack, 126 shader_stage_api_string[i]); 127 ac_msgpack_add_fixmap_op(&msgpack, 2); 128 ac_msgpack_add_fixstr(&msgpack, ".api_shader_hash"); 129 ac_msgpack_add_fixarray_op(&msgpack, 2); 130 ac_msgpack_add_uint(&msgpack, 131 record->shader_data[i].hash[0]); 132 ac_msgpack_add_uint(&msgpack, 0); 133 ac_msgpack_add_fixstr(&msgpack, ".hardware_mapping"); 134 ac_msgpack_add_fixarray_op(&msgpack, 1); 135 ac_msgpack_add_fixstr(&msgpack, hw_stage_string[ 136 record->shader_data[i].hw_stage]); 137 } 138 139 /* 4 */ 140 ac_msgpack_add_fixstr(&msgpack, ".hardware_stages"); 141 ac_msgpack_add_fixmap_op(&msgpack, 142 record->num_shaders_combined); 143 mask = record->shader_stages_mask; 144 while(mask) { 145 i = u_bit_scan(&mask); 146 147 if (record->shader_data[i].is_combined) 148 continue; 149 150 ac_msgpack_add_fixstr(&msgpack, hw_stage_string[ 151 record->shader_data[i].hw_stage]); 152 ac_msgpack_add_fixmap_op(&msgpack, 5); 153 ac_msgpack_add_fixstr(&msgpack, ".entry_point"); 154 ac_msgpack_add_fixstr(&msgpack, hw_stage_symbol_string[ 155 record->shader_data[i].hw_stage]); 156 157 ac_msgpack_add_fixstr(&msgpack, ".sgpr_count"); 158 ac_msgpack_add_uint(&msgpack, 159 record->shader_data[i].sgpr_count); 160 161 ac_msgpack_add_fixstr(&msgpack, ".vgpr_count"); 162 ac_msgpack_add_uint(&msgpack, 163 record->shader_data[i].vgpr_count); 164 165 ac_msgpack_add_fixstr(&msgpack, ".scratch_memory_size"); 166 ac_msgpack_add_uint(&msgpack, 167 record->shader_data[i].scratch_memory_size); 168 169 ac_msgpack_add_fixstr(&msgpack, ".wavefront_size"); 170 ac_msgpack_add_uint(&msgpack, 171 record->shader_data[i].wavefront_size); 172 } 173 174 /* 5 */ 175 ac_msgpack_add_fixstr(&msgpack, ".internal_pipeline_hash"); 176 ac_msgpack_add_fixarray_op(&msgpack, 2); 177 ac_msgpack_add_uint(&msgpack, record->pipeline_hash[0]); 178 ac_msgpack_add_uint(&msgpack, record->pipeline_hash[1]); 179 180 /* 6 */ 181 ac_msgpack_add_fixstr(&msgpack, ".api"); 182 ac_msgpack_add_fixstr(&msgpack, "Vulkan"); 183 184 ac_msgpack_resize_if_required(&msgpack, 4 - (msgpack.offset % 4)); 185 msgpack.offset = ALIGN(msgpack.offset, 4); 186 fwrite(msgpack.mem, 1, msgpack.offset, output); 187 *written_size = msgpack.offset; 188 ac_msgpack_destroy(&msgpack); 189} 190 191 192static uint32_t 193get_lowest_shader(uint32_t *shader_stages_mask, 194 struct rgp_code_object_record *record, 195 struct rgp_shader_data **rgp_shader_data) 196{ 197 uint32_t i, lowest = 0; 198 uint32_t mask; 199 uint64_t base_address = -1; 200 201 if (*shader_stages_mask == 0) 202 return false; 203 204 mask = *shader_stages_mask; 205 while(mask) { 206 i = u_bit_scan(&mask); 207 if (record->shader_data[i].is_combined) { 208 *shader_stages_mask = *shader_stages_mask & ~((uint32_t)1 << i); 209 continue; 210 } 211 if (base_address > record->shader_data[i].base_address) { 212 lowest = i; 213 base_address = record->shader_data[i].base_address; 214 } 215 } 216 217 *shader_stages_mask = *shader_stages_mask & ~((uint32_t)1 << lowest); 218 *rgp_shader_data = &record->shader_data[lowest]; 219 return true; 220} 221 222/** 223 * write the shader code into elf object in text section 224 */ 225static void 226ac_rgp_file_write_elf_text(FILE *output, uint32_t *elf_size_calc, 227 struct rgp_code_object_record *record, 228 uint32_t *text_size) 229{ 230 struct rgp_shader_data *rgp_shader_data = NULL; 231 struct rgp_shader_data *prev_rgp_shader_data = NULL; 232 uint32_t symbol_offset = 0; 233 uint32_t mask = record->shader_stages_mask; 234 static bool warn_once = true; 235 236 while(get_lowest_shader(&mask, record, &rgp_shader_data)) { 237 if (prev_rgp_shader_data) { 238 uint32_t code_offset = rgp_shader_data->base_address - 239 prev_rgp_shader_data->base_address; 240 uint32_t gap_between_code = code_offset - 241 prev_rgp_shader_data->code_size; 242 symbol_offset += code_offset; 243 if (gap_between_code > 0x10000 && warn_once) { 244 fprintf(stderr, "Warning: shader code far from previous " 245 "(%d bytes apart). The rgp capture file " 246 "might be very large.\n", gap_between_code); 247 warn_once = false; 248 } 249 250 fseek(output, gap_between_code, SEEK_CUR); 251 *elf_size_calc += gap_between_code; 252 } 253 254 rgp_shader_data->elf_symbol_offset = symbol_offset; 255 fwrite(rgp_shader_data->code, 1, rgp_shader_data->code_size, output); 256 *elf_size_calc += rgp_shader_data->code_size; 257 prev_rgp_shader_data = rgp_shader_data; 258 } 259 260 symbol_offset += rgp_shader_data->code_size; 261 uint32_t align = ALIGN(symbol_offset, 256) - symbol_offset; 262 fseek(output, align, SEEK_CUR); 263 *elf_size_calc += align; 264 *text_size = symbol_offset + align; 265} 266 267/* 268 * hardcoded index for string table and text section in elf object. 269 * While populating section header table, the index order should 270 * be strictly followed. 271 */ 272#define RGP_ELF_STRING_TBL_SEC_HEADER_INDEX 1 273#define RGP_ELF_TEXT_SEC_HEADER_INDEX 2 274 275/* 276 * hardcode the string table so that is a single write to output. 277 * the strings are in a structure so that it is easy to get the offset 278 * of given string in string table. 279 */ 280struct ac_rgp_elf_string_table { 281 char null[sizeof("")]; 282 char strtab[sizeof(".strtab")]; 283 char text[sizeof(".text")]; 284 char symtab[sizeof(".symtab")]; 285 char note[sizeof(".note")]; 286 char vs_main[sizeof("_amdgpu_vs_main")]; 287 char ls_main[sizeof("_amdgpu_ls_main")]; 288 char hs_main[sizeof("_amdgpu_hs_main")]; 289 char es_main[sizeof("_amdgpu_es_main")]; 290 char gs_main[sizeof("_amdgpu_gs_main")]; 291 char ps_main[sizeof("_amdgpu_ps_main")]; 292 char cs_main[sizeof("_amdgpu_cs_main")]; 293}; 294 295struct ac_rgp_elf_string_table rgp_elf_strtab = { 296 .null = "", 297 .strtab = ".strtab", 298 .text = ".text", 299 .symtab = ".symtab", 300 .note = ".note", 301 .vs_main = "_amdgpu_vs_main", 302 .ls_main = "_amdgpu_ls_main", 303 .hs_main = "_amdgpu_hs_main", 304 .es_main = "_amdgpu_es_main", 305 .gs_main = "_amdgpu_gs_main", 306 .ps_main = "_amdgpu_ps_main", 307 .cs_main = "_amdgpu_cs_main", 308}; 309 310uint32_t rgp_elf_hw_stage_string_offset[RGP_HW_STAGE_MAX] = { 311 (uintptr_t)((struct ac_rgp_elf_string_table*)0)->vs_main, 312 (uintptr_t)((struct ac_rgp_elf_string_table*)0)->ls_main, 313 (uintptr_t)((struct ac_rgp_elf_string_table*)0)->hs_main, 314 (uintptr_t)((struct ac_rgp_elf_string_table*)0)->es_main, 315 (uintptr_t)((struct ac_rgp_elf_string_table*)0)->gs_main, 316 (uintptr_t)((struct ac_rgp_elf_string_table*)0)->ps_main, 317 (uintptr_t)((struct ac_rgp_elf_string_table*)0)->cs_main, 318}; 319 320 321static void 322ac_rgp_file_write_elf_symbol_table(FILE *output, uint32_t *elf_size_calc, 323 struct rgp_code_object_record *record, 324 uint32_t *symbol_table_size) 325{ 326 Elf64_Sym elf_sym; 327 uint32_t i; 328 uint32_t mask = record->shader_stages_mask; 329 330 memset(&elf_sym, 0x00, sizeof(elf_sym)); 331 fwrite(&elf_sym, 1, sizeof(elf_sym), output); 332 333 while(mask) { 334 i = u_bit_scan(&mask); 335 if (record->shader_data[i].is_combined) 336 continue; 337 338 elf_sym.st_name = rgp_elf_hw_stage_string_offset 339 [record->shader_data[i].hw_stage]; 340 elf_sym.st_info = STT_FUNC; 341 elf_sym.st_other = 0x0; 342 elf_sym.st_shndx = RGP_ELF_TEXT_SEC_HEADER_INDEX; 343 elf_sym.st_value = record->shader_data[i].elf_symbol_offset; 344 elf_sym.st_size = record->shader_data[i].code_size; 345 fwrite(&elf_sym, 1, sizeof(elf_sym), output); 346 } 347 348 *symbol_table_size = (record->num_shaders_combined + 1) 349 * sizeof(elf_sym); 350 *elf_size_calc += *symbol_table_size; 351} 352 353 354/* Below defines from from llvm project 355 * llvm/includel/llvm/BinaryFormat/ELF.h 356 */ 357#define ELFOSABI_AMDGPU_PAL 65 358#define NT_AMDGPU_METADATA 32 359 360uint8_t elf_ident[EI_NIDENT] = { ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 361 ELFCLASS64, ELFDATA2LSB, EV_CURRENT, 362 ELFOSABI_AMDGPU_PAL, 363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 364 0x00, 0x00 }; 365 366#define NOTE_MSGPACK_NAME "AMDGPU" 367struct ac_rgp_elf_note_msgpack_hdr { 368 Elf64_Nhdr hdr; 369 char name[sizeof(NOTE_MSGPACK_NAME)]; 370}; 371 372void 373ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start, 374 struct rgp_code_object_record *record, 375 uint32_t *written_size, uint32_t flags) 376{ 377 Elf64_Ehdr elf_hdr; 378 Elf64_Shdr sec_hdr[5]; 379 uint32_t elf_size_calc; 380 struct ac_rgp_elf_note_msgpack_hdr note_hdr; 381 uint32_t text_size = 0; 382 uint32_t symbol_table_size = 0; 383 uint32_t msgpack_size = 0; 384 size_t note_sec_start; 385 uint32_t sh_offset; 386 387 /* Give space for header in file. It will be written to file at the end */ 388 fseek(output, sizeof(Elf64_Ehdr), SEEK_CUR); 389 390 elf_size_calc = sizeof(Elf64_Ehdr); 391 392 /* Initialize elf header */ 393 memcpy(&elf_hdr.e_ident, &elf_ident, EI_NIDENT); 394 elf_hdr.e_type = ET_REL; 395 elf_hdr.e_machine = EM_AMDGPU; 396 elf_hdr.e_version = EV_CURRENT; 397 elf_hdr.e_entry = 0; 398 elf_hdr.e_flags = flags; 399 elf_hdr.e_shstrndx = 1; /* string table entry is hardcoded to 1*/ 400 elf_hdr.e_phoff = 0; 401 elf_hdr.e_shentsize = sizeof(Elf64_Shdr); 402 elf_hdr.e_ehsize = sizeof(Elf64_Ehdr); 403 elf_hdr.e_phentsize = 0; 404 elf_hdr.e_phnum = 0; 405 406 /* write hardcoded string table */ 407 fwrite(&rgp_elf_strtab, 1, sizeof(rgp_elf_strtab), output); 408 elf_size_calc += sizeof(rgp_elf_strtab); 409 410 /* write shader code as .text code */ 411 ac_rgp_file_write_elf_text(output, &elf_size_calc, record, &text_size); 412 413 /* write symbol table */ 414 ac_rgp_file_write_elf_symbol_table(output, &elf_size_calc, record, 415 &symbol_table_size); 416 417 /* write .note */ 418 /* the .note section contains msgpack which stores variables */ 419 note_sec_start = file_elf_start + elf_size_calc; 420 fseek(output, sizeof(struct ac_rgp_elf_note_msgpack_hdr), SEEK_CUR); 421 ac_rgp_write_msgpack(output, record, &msgpack_size); 422 note_hdr.hdr.n_namesz = sizeof(NOTE_MSGPACK_NAME); 423 note_hdr.hdr.n_descsz = msgpack_size; 424 note_hdr.hdr.n_type = NT_AMDGPU_METADATA; 425 memcpy(note_hdr.name, NOTE_MSGPACK_NAME "\0", 426 sizeof(NOTE_MSGPACK_NAME) + 1); 427 fseek(output, note_sec_start, SEEK_SET); 428 fwrite(¬e_hdr, 1, sizeof(struct ac_rgp_elf_note_msgpack_hdr), output); 429 fseek(output, 0, SEEK_END); 430 elf_size_calc += (msgpack_size + 431 sizeof(struct ac_rgp_elf_note_msgpack_hdr)); 432 433 /* write section headers */ 434 sh_offset = elf_size_calc; 435 memset(&sec_hdr[0], 0x00, sizeof(Elf64_Shdr) * 5); 436 437 /* string table must be at index 1 as used in other places*/ 438 sec_hdr[1].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->strtab; 439 sec_hdr[1].sh_type = SHT_STRTAB; 440 sec_hdr[1].sh_offset = sizeof(Elf64_Ehdr); 441 sec_hdr[1].sh_size = sizeof(rgp_elf_strtab); 442 443 /* text must be at index 2 as used in other places*/ 444 sec_hdr[2].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->text; 445 sec_hdr[2].sh_type = SHT_PROGBITS; 446 sec_hdr[2].sh_flags = SHF_ALLOC | SHF_EXECINSTR; 447 sec_hdr[2].sh_offset = sec_hdr[1].sh_offset + sec_hdr[1].sh_size; 448 sec_hdr[2].sh_size = text_size; 449 sec_hdr[2].sh_addralign = 256; 450 451 sec_hdr[3].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->symtab; 452 sec_hdr[3].sh_type = SHT_SYMTAB; 453 sec_hdr[3].sh_offset = sec_hdr[2].sh_offset + 454 ALIGN(sec_hdr[2].sh_size, 256); 455 sec_hdr[3].sh_size = symbol_table_size; 456 sec_hdr[3].sh_link = RGP_ELF_STRING_TBL_SEC_HEADER_INDEX; 457 sec_hdr[3].sh_addralign = 8; 458 sec_hdr[3].sh_entsize = sizeof(Elf64_Sym); 459 460 sec_hdr[4].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->note; 461 sec_hdr[4].sh_type = SHT_NOTE; 462 sec_hdr[4].sh_offset = sec_hdr[3].sh_offset + sec_hdr[3].sh_size; 463 sec_hdr[4].sh_size = msgpack_size + 464 sizeof(struct ac_rgp_elf_note_msgpack_hdr); 465 sec_hdr[4].sh_addralign = 4; 466 fwrite(&sec_hdr, 1, sizeof(Elf64_Shdr) * 5, output); 467 elf_size_calc += (sizeof(Elf64_Shdr) * 5); 468 469 /* update and write elf header */ 470 elf_hdr.e_shnum = 5; 471 elf_hdr.e_shoff = sh_offset; 472 473 fseek(output, file_elf_start, SEEK_SET); 474 fwrite(&elf_hdr, 1, sizeof(Elf64_Ehdr), output); 475 fseek(output, 0, SEEK_END); 476 477 *written_size = elf_size_calc; 478} 479